I'm iterating through a list of strings, and I want to return the contents of a string if the beginning of it matches the provided string.
e.g.
strings = [ "GITHUB:github.com", "STACKOVERFLOW:stackoverflow.com" ]
IO.puts fn(strings, "GITHUB") // => "github.com"
This is what I thinking so far:
def get_tag_value([ << tag_name, ": ", tag_value::binary >> | rest ], tag_name), do: tag_value
def get_tag_value([ _ | rest], tag_name), do: get_tag_value(rest, tag_name)
def get_tag_value([], tag_name), do: ""
But I get this:
** (CompileError) lib/file.ex:31: a binary field without size is only allowed at the end of a binary pattern and never allowed in binary generators
Which makes sense, but then I'm not quite sure how to go about doing this. How would I match a substring to a different variable provided as an argument?
Here's how I'd do this making most use of pattern matching and no call to String.starts_with? or String.split:
defmodule A do
def find(strings, string) do
size = byte_size(string)
Enum.find_value strings, fn
<<^string::binary-size(size), ":", rest::binary>> -> rest
_ -> nil
end
end
end
strings = ["GITHUB:github.com", "STACKOVERFLOW:stackoverflow.com"]
IO.inspect A.find(strings, "GITHUB")
IO.inspect A.find(strings, "STACKOVERFLOW")
IO.inspect A.find(strings, "GIT")
IO.inspect A.find(strings, "FOO")
Output:
"github.com"
"stackoverflow.com"
nil
nil
There are many ways to skin this cat.
For example:
def get_tag_value(tag, strings) do
strings
|> Enum.find("", &String.starts_with?(&1, tag <> ":"))
|> String.split(":", parts: 2)
|> Enum.at(1, "")
end
or if you still wanted to explicitly use recursion:
def get_tag_value(_tag, []), do: ""
def get_tag_value(tag, [str | rest]) do
if String.starts_with?(str, tag <> ":") do
String.split(str, ":", parts: 2) |> Enum.at(1, "")
else
get_tag_value(tag, rest)
end
end
Are just two of many possible ways.
However, you won't be able to pattern match the string in the function head without knowing it (or at least the length) beforehand.
iex(1)> strings = [ "GITHUB:github.com", "STACKOVERFLOW:stackoverflow.com" ]
iex(2)> Enum.filter(strings, fn(s) -> String.starts_with?(s, "GITHUB") end)
iex(3)> |> Enum.map(fn(s) -> [_, part_2] = String.split(s, ":"); part_2 end)
# => ["github.com"]
In Enum.filter/2 I select all strings they start with "GITHUB" and I get a new List. Enum.map/2 iterates through the new List and splits each string at the colon to return the second part only. Result is a List with all parts after the colon, where the original string starts with "GITHUB".
Be aware, that If there's an item like "GITHUBgithub.com" without colon, you get a MatchError. To avoid this either use String.starts_with?(s, "GITHUB:") to filter the right strings or avoid the pattern matching like I did in Enum.map/2 or use pattern matching for an empty list like #ryanwinchester did it.
You can use a combination of Enum.map and Enum.filter to get the matching pairs you're looking for:
def get_tag_value(tag_name, tags) do
tags
|> Enum.map(&String.split(&1, ":")) # Creates a list of [tag_name, tag_value] elements
|> Enum.filter(fn([tn, tv]) -> tn == tag_name end) # Filters for the tag name you're after
|> List.last # Potentially gets you the pair [tag_name, tag_value] OR empty list
end
And in the end you can either call List.last/1 again to either get an empty list (no match found) or the tag value.
Alternatively you can use a case statement to return a different kind of result, like a :nomatch atom:
def get_tag_value(tag_name, tags) do
matches = tags
|> Enum.map(&String.split(&1, ":")) # Creates a list of [tag_name, tag_value] elements
|> Enum.filter(fn([tn, tv]) -> tn == tag_name end) # Filters for the tag name you're after
|> List.last # Potentially gets you the pair [tag_name, tag_value] OR empty list
case matches do
[] -> :nomatch
[_, tag_value] -> tag_value
end
end
This would be my take in Erlang:
get_tag_value(Tag, Strings) ->
L = size(Tag),
[First | _] = [Val || <<Tag:L/binary, $:, Val/binary>> <- Strings]
First.
The same in Elixir (there are probably more idiomatic ways of writing it, tho):
def gtv(tag, strings) do
l = :erlang.size(tag)
[first | _ ] =
for << t :: binary - size(l), ":", value :: binary >> <- strings,
t == tag,
do: value
first
end
Related
I have a list like List = [{0,12},{0,12},{-1,0},{0,12},{0,4},{1,2}] and a string Str = "https://www.youtube.com/watch?v=WQfdwsPao9E", now I've to find all the substrings using start and end point from list.
I want substrings to be returned in a List like ["https://www","https://www",..]
I tried using this:
C=lists:map(fun({X,Y}) -> string:sub_string(Str,X,Y) end,List)
1> List = [{0,12},{0,12},{-1,0},{0,12},{0,4},{1,2}].
[{0,12},{0,12},{-1,0},{0,12},{0,4},{1,2}]
2> Str = "https://www.youtube.com/watch?v=WQfdwsPao9E".
"https://www.youtube.com/watch?v=WQfdwsPao9E"
3> Len = length(Str).
43
4> [string:sub_string(Str,max(1,X),min(Len,Y)) || {X,Y} <- List].
["https://www.","https://www.",[],"https://www.","http",
"ht"]
5>
you may have to adjust the indexes in the string to fit exactly to your need.
[edit] It looks like I didn't interpret correctly what is the meaning of the tuple. I think it is {Fist_Char_Index, Char_Number}, or {-1,0} if no match is found. So you should use:
[string:sub_string(Str,X+1,X+Y) || {X,Y} <- List, {X,Y} =/= {-1,0}].
I have the following functions:
search(DirName, Word) ->
NumberedFiles = list_numbered_files(DirName),
Words = make_filter_mapper(Word),
Index = mapreduce(NumberedFiles, Words, fun remove_duplicates/3),
dict:find(Word, Index).
list_numbered_files(DirName) ->
{ok, Files} = file:list_dir(DirName),
FullFiles = [ filename:join(DirName, File) || File <- Files ],
Indices = lists:seq(1, length(Files)),
lists:zip(Indices, FullFiles). % {Index, FileName} tuples
make_filter_mapper(MatchWord) ->
fun (_Index, FileName, Emit) ->
{ok, [Words]} = file:consult(FileName), %% <---- Line 20
lists:foreach(fun (Word) ->
case MatchWord == Word of
true -> Emit(Word, FileName);
false -> false
end
end, Words)
end.
remove_duplicates(Word, FileNames, Emit) ->
UniqueFiles = sets:to_list(sets:from_list(FileNames)),
lists:foreach(fun (FileName) -> Emit(Word, FileName) end, UniqueFiles).
However, when i call search(Path_to_Dir, Word) I get:
Error in process <0.185.0> with exit value:
{{badmatch,{error,{1,erl_parse,["syntax error before: ","wordinfile"]}}},
[{test,'-make_filter_mapper/1-fun-1-',4,[{file,"test.erl"},{line,20}]}]}
And I do not understand why. Any ideas?
The Words variable will match to content of the list, which might not be only one tuple, but many of them. Try to match {ok, Words} instead of {ok, [Words]}.
Beside the fact that the function file:consult/1 may return a list of several elements so you should replace {ok,[Words]} (expecting a list of one element = Words) by {ok,Words}, it actually returns a syntax error meaning that in the file you are reading, there is a syntax error.
Remember that the file should contain only valid erlang terms, each of them terminated by a dot. The most common error is to forget a dot or replace it by a comma.
I am having Data like the below:
Data = [{<<"status">>,<<"success">>},
{<<"META">>,
{struct,[{<<"createdat">>,1406895903.0},
{<<"user_email">>,<<"gopikrishnajonnada#gmail.com">>},
{<<"campaign">>,<<"5IVUPHE42HP1NEYvKb7qSvpX2Cm">>}]}},
{<<"mode">>,1}]
And Now i am having a
FieldList = ['<<"5IVUPHE42HP1NEYvKb7qSvpX2Cm">>']
Now:
I am trying like the below but i am getting empty instead of the value
90> [L || L <- FieldList,proplists:get_value(<<"campaign">>,element(2,proplists:get_value(<<"META">>,Data,{[],[]}))) == L].
[]
so how to get the both values are equal and get the final value.
You can parse the atom as if it were an Erlang term:
atom_to_binary(Atom) ->
L = atom_to_list(Atom),
{ok, Tokens, _} = erl_scan:string(L ++ "."),
{ok, Result} = erl_parse:parse_term(Tokens),
Result.
You can then do
[L ||
L <- FieldList,
proplists:get_value(<<"campaign">>,
element(2,
proplists:get_value(<<"META">>,Data,{[],[]})))
== atom_to_binary(L)
].
You can also do it the other way round, (trying to) convert the binary to an atom using this function:
binary_literal_to_atom(Binary) ->
Literal = lists:flatten(io_lib:format("~p", [Binary])),
try
list_to_existing_atom(Literal)
catch
error:badarg -> undefined
end.
This function will return undefined if the atom is not known yet (s. Erlang: binary_to_atom filling up atom table space security issue for more information on this). This is fine here, since the match can only work if the atom was known before, in this case by being defined in the FieldList variable.
How did you get those values in the first place?
Data = [{<<"status">>,<<"success">>},
{<<"META">>,
{struct,[{<<"createdat">>,1406895903.0},
{<<"user_email">>,<<"gopikrishnajonnada#gmail.com">>},
{<<"campaign">>,<<"5IVUPHE42HP1NEYvKb7qSvpX2Cm">>}]
}
},
{<<"mode">>,1}].
[_,{_,{struct,InData}}|_] = Data.
[X || {<<"campaign">>,X} <- InData].
it gives you the result in the form : [<<"5IVUPHE42HP1NEYvKb7qSvpX2Cm">>]
of course you can use the same kind of code if the tuple {struct,InData} may be in a different place in the Data variable.
-module(wy).
-compile(export_all).
main() ->
Data = [{<<"status">>,<<"success">>},
{<<"META">>,
{struct,[{<<"createdat">>,1406895903.0},
{<<"user_email">>,<<"gopikrishnajonnada#gmail.com">>},
{<<"campaign">>,<<"5IVUPHE42HP1NEYvKb7qSvpX2Cm">>}]
}
},
{<<"mode">>,1}],
Fun = fun({<<"META">>, {struct, InData}}, Acc) ->
Value = proplists:get_value(<<"campaign">>, InData, []),
[Value | Acc];
(_Other, Acc)->
Acc
end,
lists:foldl(Fun, [], Data).
I think you can use this code.
Getting started with Scala parser combinations, before moving on need to grasp failure/error handling better (note: still getting into Scala as well)
Want to parse strings like "a = b, c = d" into a list of tuples but flag the user when dangling commas are found.
Thought about matching off failure ("a = b, ") when matching comma separated property assignments:
def commaList[T](inner: Parser[T]): Parser[List[T]] =
rep1sep(inner, ",") | rep1sep(inner, ",") ~> opt(",") ~> failure("Dangling comma")
def propertyAssignment: Parser[(String, String)] = ident ~ "=" ~ ident ^^ {
case id ~ "=" ~ prop => (id, prop)
}
And call the parser with:
p.parseAll(p.commaList(p.propertyAssignment), "name = John , ")
which results in a Failure, no surprise but with:
string matching regex `\p{javaJavaIdentifierStart}\p{javaJavaIdentifierPart}*' expected but end of source found
The commList function succeeds on the first property assignment and starts repeating given the comma but the next "ident" fails on the fact that the next character is the end of the source data. Thought I could catch that 2nd alternative in the commList would match:
rep1sep(inner, ",") ~> opt(",") ~> failure("Dangling comma")
Nix. Ideas?
Scalaz to the rescue :-)
When you are working with warnings, it is not a good idea to exit your parser with a failure. You can easily combine the parser with the Scalaz writer monad. With this monads you can add messages to the partial result during the parser run. These messages could be infos, warnings or errors. After the parser finishes, you can then validate the result, if it can be used or if it contains critical problems. With such a separate vaildator step you get usual much better error messages. For example you could accept arbitrary characters at the end of the string, but issue an error when they are found (e.g. "Garbage found after last statement"). The error message can be much more helpful for the user than the cryptic default one you get in the example below ("string matching regex `\z' expected [...]").
Here is an example based on the code in your question:
scala> :paste
// Entering paste mode (ctrl-D to finish)
import util.parsing.combinator.RegexParsers
import scalaz._, Scalaz._
object DemoParser extends RegexParsers {
type Warning = String
case class Equation(left : String, right : String)
type PWriter = Writer[Vector[Warning], List[Equation]]
val emptyList : List[Equation] = Nil
def rep1sep2[T](p : => Parser[T], q : => Parser[Any]): Parser[List[T]] =
p ~ rep(q ~> p) ^^ {case x~y => x::y}
def name : Parser[String] = """\w+""".r
def equation : Parser[Equation] = name ~ "=" ~ name ^^ { case n ~ _ ~ v => Equation(n,v) }
def commaList : Parser[PWriter] = rep1sep(equation, ",") ^^ (_.set(Vector()))
def danglingComma : Parser[PWriter] = opt(",") ^^ (
_ map (_ => emptyList.set(Vector("Warning: Dangling comma")))
getOrElse(emptyList.set(Vector("getOrElse(emptyList.set(Vector(""))))
def danglingList : Parser[PWriter] = commaList ~ danglingComma ^^ {
case l1 ~ l2 => (l1.over ++ l2.over).set(l1.written ++ l2.written) }
def apply(input: String): PWriter = parseAll(danglingList, input) match {
case Success(result, _) => result
case failure : NoSuccess => emptyList.set(Vector(failure.msg))
}
}
// Exiting paste mode, now interpreting.
import util.parsing.combinator.RegexParsers
import scalaz._
import Scalaz._
defined module DemoParser
scala> DemoParser("a=1, b=2")
res2: DemoParser.PWriter = (Vector(),List(Equation(a,1), Equation(b,2)))
scala> DemoParser("a=1, b=2,")
res3: DemoParser.PWriter = (Vector(Warning: Dangling comma),List(Equation(a,1), Equation(b,2)))
scala> DemoParser("a=1, b=2, ")
res4: DemoParser.PWriter = (Vector(Warning: Dangling comma),List(Equation(a,1), Equation(b,2)))
scala> DemoParser("a=1, b=2, ;")
res5: DemoParser.PWriter = (Vector(string matching regex `\z' expected but `;' found),List())
scala>
As you can see, it handles the error cases fine. If you want to extend the example, add case classes for different kinds of errors and include the current parser positions in the messages.
Btw. the problem with the white spaces is handled by the RegexParsers class. If you want to change the handling of white spaces, just override the field whiteSpace.
Your parser isn't expecting the trailing whitespace at the end of "name = John , ".
You could use a regex to optionally parse "," followed by any amount of whitespace:
def commaList[T](inner: Parser[T]): Parser[List[T]] =
rep1sep(inner, ",") <~ opt(",\\s*".r ~> failure("Dangling comma"))
Note that you can avoid using alternatives (|) here, by making the failure part of the optional parser. If the optional part consumes some input and then fails, then the whole parser fails.
I'm trying to write some Erlang that would filter an array in the form:
[{dakota, "cold and snowy"}, {california, "perfect weather"}] % and so on
Here is what I've got - I get a syntax error when I try to make a .beam from werl.
-module(matcher).
-export([findkeywords/2]).
findkeywords(Word, Arr) ->
IsMatch = fun({Key, Desc}) ->
lists:any(fun(X) -> X==Word end, string:tokens(Desc, " ")),
lists:filter(IsMatch, [{K, V} || {K, V} <- Arr]).
Can anyone spot where my syntax is off?
I saw your call to arms on twitter and just had to come take a look. :D
If you want this to compile, you're just missing an end on your fun on line 6. Add it in and it compiles without complaint.
-module(matcher).
-export([findkeywords/2]).
findkeywords(Word, Arr) ->
IsMatch = fun({Key, Desc}) ->
lists:any(fun(X) -> X==Word end, string:tokens(Desc, " ")) end, % ADD THE END HERE
lists:filter(IsMatch, [{K, V} || {K, V} <- Arr]).
You can clean this up a bit too, unless this is an exercise in string matching for yourself. The string module has str(String, SubString) -> Index and rstr(String, SubString) -> Index that are described as such in the Erlang Manual:
Returns the position where the first/last occurrence of SubString begins in String. 0 is returned if SubString does not exist in String. For example:
> string:str(" Hello Hello World World ", "Hello World").
8
Using this tidies it up a bit, and you could even shorten the whole thing into a one liner. The list comprehension is unnecessary as the data is already in the format that you're trying to feed it in.
-module(matcher).
-export([findkeywords/2]).
findkeywords(Word, Arr) ->
lists:filter(fun({_Key, Desc}) -> string:str(Desc, Word) > 0 end, Arr).
You miss one "end" from the two functions. Also, it looks like the list comprehension in this example used is not needed.
findkeywords(Word, Arr) ->
IsMatch =
fun({_, Desc}) -> lists:any(fun(X) -> X == Word end, string:tokens(Desc, " ")) end,
lists:filter(IsMatch, [{K, V} || {K, V} <- Arr]).
You are missing the end key word for one of the funs. However, looks like you are searching within strings. This is normally what is use
-define(DATA,[{dakota, "cold and snowy"}, {california, "perfect weather"}]).
string_contains(Big,Small)-> string:rstr(Big,Small) > 0.
findkeywords(Word)-> [X || X <- ?DATA,string_contains(element(2,X),Word) == true].
Anyway, one of your funs was not ended well. that's all.