Recursive Descent Parser in Erlang - parsing

I'm trying to construct a RDP in Erlang. So far I have read and processed a file of tokens, which I will pass to the function as eg [2,6,3,7,3,2,4,6,3,2,4,4,99] (sample input that should work) and I need to ensure that every character(or set of) can be derived to by transforming from the default rule [bexp0] into some matching list of terminals.
get_terminal_list() ->
[1,2,3,4,5,6,7,8,9,10,11,12,99].
get_prod_list() ->
[bexp0,bexp,bexp1,bterm].
get_sym_list(Prod) ->
case Prod of
bexp0 -> [[bexp,99]];
bexp -> [[bterm,bexp1]];
bexp1 -> [[5,bexp,bexp1],[6,bexp,bexp1]];
bterm -> [[3,bexp,4],[2],[8],[9],[2,10,1],[2,12,1],[2,11,1],[7,bterm]]
end.
get_sym_list shows the grammar in use - where each int stands for a terminal character and each sublist is a set, i.e. bterm-> [[7,bterm]] means that bterm can turn into terminal '7' followed by non-terminal 'bterm'.
Right now I'm working on somehow realizing this:
Check if first set of rule has some terminal
if so, check which side, reduce list of tokens from same side until first occurrence of this token,
parse this new list (w/o token) with rest of the set of this rule (also without the matched terminal).
return {success|failure, list_of_tokens, list_of_rules}
if success -> check with new list_of_tokens, default_rule
if failure, check with old list_of_tokens, and new list_of_rules.
I assume the end states will be reached if the list of rules is empty - hence we have exhausted every possible production, hence not valid, or
list of tokens is empty, hence we have matched every token/set of tokens to some rule

Probably this will do what you want:
-module(parse).
-export([parse1/0, parse1/1, parse2/0, parse2/1]).
parse1() ->
parse([bexp], [2,6,3,7,3,2,4,6,3,2,4,4,99], fun get_sym_list1/1).
parse1(Input) ->
parse([bexp], Input, fun get_sym_list1/1).
parse2() ->
parse([bexp0], [2,6,3,7,3,2,4,6,3,2,4,4,99], fun get_sym_list2/1).
parse2(Input) ->
parse([bexp0], Input, fun get_sym_list2/1).
parse([], [], _) ->
true;
parse([], _, _) ->
false;
parse([X | TX], [X | TY], Fun) ->
io:format("+ Current:~w\tTokens:~w Input:~w~n", [X, TX, TY]),
parse(TX, TY, Fun);
parse([X | TX], Input, Fun) ->
io:format(" Current:~w\tTokens:~w Input:~w~n", [X, TX, Input]),
case lists:member(X, get_terminal_list()) of
true -> false;
false -> lists:any(fun(T) -> parse(T ++ TX, Input, Fun) end, Fun(X))
end.
get_terminal_list() ->
[1,2,3,4,5,6,7,8,9,10,11,12,99].
get_sym_list1(Prod) ->
case Prod of
bexp -> [[bexp1],[bterm],[bterm,bexp2]];
bexp1 -> [[99]];
bexp2 -> [[5,bterm,bexp2],[6,bterm,bexp2]];
bterm -> [[bfactor],[7,bterm]];
bfactor -> [[3,bexp,4],[bconst],[2,10,1],[2,12,1],[2,11,1],[2]];
bconst -> [[8],[9]]
end.
get_sym_list2(Prod) ->
case Prod of
bexp0 -> [[bterm,bexp1]];
bexp -> [[bterm,bexp1]];
bexp1 -> [[5,u1],[6,bexp,bexp1],[99]];
bterm -> [[u1,4],[2],[8],[9],[2,10,1],[2,12,1],[2,11,1],[7,bterm]];
u1 -> [[3,bexp]]
end.
However, it looks like either the grammar or the input list is incorrect because as far as I can see neither the old nor the new grammar parses the input. And it seems to be working fine because it will parse an input like this one:
41> parse:parse2([2,6,8,5,3,bterm,5,3,9,99,99]).
Current:bexp0 Tokens:[] Input:[2,6,8,5,3,bterm,5,3,9,99,99]
Current:bterm Tokens:[bexp1] Input:[2,6,8,5,3,bterm,5,3,9,99,99]
Current:u1 Tokens:[4,bexp1] Input:[2,6,8,5,3,bterm,5,3,9,99,99]
Current:3 Tokens:[bexp,4,bexp1] Input:[2,6,8,5,3,bterm,5,3,9,99,99]
+ Current:2 Tokens:[bexp1] Input:[6,8,5,3,bterm,5,3,9,99,99]
Current:bexp1 Tokens:[] Input:[6,8,5,3,bterm,5,3,9,99,99]
Current:5 Tokens:[u1] Input:[6,8,5,3,bterm,5,3,9,99,99]
+ Current:6 Tokens:[bexp,bexp1] Input:[8,5,3,bterm,5,3,9,99,99]
Current:bexp Tokens:[bexp1] Input:[8,5,3,bterm,5,3,9,99,99]
Current:bterm Tokens:[bexp1,bexp1] Input:[8,5,3,bterm,5,3,9,99,99]
Current:u1 Tokens:[4,bexp1,bexp1] Input:[8,5,3,bterm,5,3,9,99,99]
Current:3 Tokens:[bexp,4,bexp1,bexp1] Input:[8,5,3,bterm,5,3,9,99,99]
Current:2 Tokens:[bexp1,bexp1] Input:[8,5,3,bterm,5,3,9,99,99]
+ Current:8 Tokens:[bexp1,bexp1] Input:[5,3,bterm,5,3,9,99,99]
Current:bexp1 Tokens:[bexp1] Input:[5,3,bterm,5,3,9,99,99]
+ Current:5 Tokens:[u1,bexp1] Input:[3,bterm,5,3,9,99,99]
Current:u1 Tokens:[bexp1] Input:[3,bterm,5,3,9,99,99]
+ Current:3 Tokens:[bexp,bexp1] Input:[bterm,5,3,9,99,99]
Current:bexp Tokens:[bexp1] Input:[bterm,5,3,9,99,99]
+ Current:bterm Tokens:[bexp1,bexp1] Input:[5,3,9,99,99]
Current:bexp1 Tokens:[bexp1] Input:[5,3,9,99,99]
+ Current:5 Tokens:[u1,bexp1] Input:[3,9,99,99]
Current:u1 Tokens:[bexp1] Input:[3,9,99,99]
+ Current:3 Tokens:[bexp,bexp1] Input:[9,99,99]
Current:bexp Tokens:[bexp1] Input:[9,99,99]
Current:bterm Tokens:[bexp1,bexp1] Input:[9,99,99]
Current:u1 Tokens:[4,bexp1,bexp1] Input:[9,99,99]
Current:3 Tokens:[bexp,4,bexp1,bexp1] Input:[9,99,99]
Current:2 Tokens:[bexp1,bexp1] Input:[9,99,99]
Current:8 Tokens:[bexp1,bexp1] Input:[9,99,99]
+ Current:9 Tokens:[bexp1,bexp1] Input:[99,99]
Current:bexp1 Tokens:[bexp1] Input:[99,99]
Current:5 Tokens:[u1,bexp1] Input:[99,99]
Current:6 Tokens:[bexp,bexp1,bexp1] Input:[99,99]
+ Current:99 Tokens:[bexp1] Input:[99]
Current:bexp1 Tokens:[] Input:[99]
Current:5 Tokens:[u1] Input:[99]
Current:6 Tokens:[bexp,bexp1] Input:[99]
+ Current:99 Tokens:[] Input:[]
true
BTW true means the input has been parsed and false that it has not.

Related

Erlang inference

The following source doesn't compile because Stopover is unbound.
I'm new to Erlang, how can I rewrite it?
-module(distances).
-export([ path/2 ]).
path( madrid, paris ) ->
{ km, 1049 };
path( paris, moscou ) ->
{ km, 2482 };
path( moscou, berlin ) ->
{ km, 1603 };
path( From, To ) ->
path( From, Stopover ) + path( Stopover, To ).
The usage of this module maybe:
path( madrid, moscou ).
And the epected answer should be { km, 3531}.
The following source doesn't compile because Stopover is unbound.
I'm new to Erlang, how can I rewrite it?
Look at this code:
-module(a).
-compile(export_all).
do_stuff() ->
Stopover.
Here's what happens when I try to compile it:
a.erl:5: variable 'Stopover' is unbound
The variable Stopover was never assigned a value, so erlang has no idea what should be returned by the function do_stuff(). You are doing something similar here:
path( From, Stopover ) + path( Stopover, To ).
The variables From and To are parameter variables for the function path(), and when path() is called, e.g. path(madrid, moscow), then madrid will be assigned to the variable From, and moscow will be assigned to the variable To. Note, however, that nowhere do you assign any value to the variable Stopover.
You need to redefine path() to look like this:
path(From, To, Stopover) ->
Next, you should try to see if adding tuples actually works:
2> {km, 5} + {km, 3}.
** exception error: an error occurred when evaluating an arithmetic expression
in operator +/2
called as {km,5} + {km,3}
3>
Nope!
What you need to do is use pattern matching to extract the distance, an integer, from each tuple, then add the two integers:
{km, Distance1} = path( From, Stopover ),
... = path(Stopover, To),
{km, Distance1 + Distance2}.
This question is already answered by #7stud, and I was wondering how to implement such a path search in erlang. Here is a possible solution:
-module(distances).
-export([ path/2,getTowns/0,start/1, stop/0 ]).
path(From,To) ->
Paths = getPath(),
path(From,To,maps:get(orderedTuple(From,To), Paths, not_found),Paths).
% distanceServer in charge to keep the liste of known distances
% server interfaces
start(Towns) ->
{ok,List} = file:consult(Towns),
Paths = lists:foldl(fun({A,B,D},Acc) -> maps:put(orderedTuple(A,B), D, Acc) end,#{},List),
start(Paths,distance_server).
stop() ->
distance_server ! stop.
getTowns() ->
K = maps:keys(getPath()),
L = lists:usort(lists:flatten([[A,B] || {A,B} <- K])),
io:format("list of towns :~n~p~n~n",[L]).
getPath() ->
distance_server ! {getPath,self()},
receive
Path -> Path
end.
% server fuctions
start(Paths,Server) ->
Pid = spawn(fun() -> distanceServer(Paths) end),
register(Server, Pid).
distanceServer(Path) ->
receive
stop -> stop;
{getPath,From} ->
From ! Path,
distanceServer(Path)
end.
% Searching path
path(From,To,not_found,Paths) -> % if not in the known list, seach for the shortest path
{KM,P} = searchBestPath({0,[From]},To,maps:keys(Paths),{no_dist,no_path}),
case P of
no_path -> not_found;
_ -> {lists:reverse(P),KM}
end;
path(From,To,KM,_) -> % else give the result. Assumption: the known path contains always the best one.
{[From,To],KM}.
searchBestPath({N,[To|_]}=Path,To,_,{BestD,_}) when N < BestD -> Path; % keep the new path if it is better
searchBestPath({N,_},_,_,{BestD,_}=Best) when N >= BestD -> Best; % cancel search if the path so far is longer or equal to the best found
searchBestPath({D,[H|_]=PathSoFar},To,Remaining,Best) ->
Next = [remove(H,{A,B}) || {A,B} <- Remaining, (A =:= H) orelse (B =:= H)], % list of all possible next steps
case Next of
[] -> Best;
Next -> lists:foldl(
fun(X,Acc) ->
{_,ND} = path(H,X), % will always match
R = Remaining -- [orderedTuple(H,X)], % necessary to avoid possible infinite loop in the first search
searchBestPath({D+ND,[X|PathSoFar]},To,R,Acc) % evaluate path for all possible next steps
end,
Best,Next)
end.
% helpers
orderedTuple(A,B) when B > A -> {A,B};
orderedTuple(A,B) -> {B,A}.
remove(X,{X,B}) -> B;
remove(X,{A,X}) -> A.
it uses an external file to define the "known distances", I have used this one for test:
{paris,lyon,465}.
{lyon,marseille,314}.
{marseille,nice,198}.
{marseille,toulouse,404}.
{toulouse,bordeaux,244}.
{bordeaux,paris,568}.
{bordeaux,nantes,347}.
{nantes,paris,385}.
{paris,lille,225}.
{paris,strasbourg,491}.
{lille,strasbourg,525}.
{lille,bruxelles,120}.
{rennes,brest,244}.
{rennes,paris,351}.
{rennes,nantes,113}.
and the result in the shell:
1> c(distances).
{ok,distances}
2> distances:start("distances.txt").
true
3> distances:getTowns().
list of towns :
[bordeaux,brest,bruxelles,lille,lyon,marseille,nantes,nice,paris,rennes,
strasbourg,toulouse]
ok
4> distances:path(bordeaux,bruxelles).
{[bordeaux,paris,lille,bruxelles],913}
5> distances:path(nice,bruxelles).
{[nice,marseille,lyon,paris,lille,bruxelles],1322}
6> distances:path(moscou,paris).
not_found
7> distances:stop().
stop
8>
next step could be to increase the list of known distances each time a new request is done.

Calculate input with atoms, erlang

I am trying to calculate this input:
evaluate({mul,{plus,{num,2},{num,3}},{num,4}}).
and get this as an answer:
20
But I cant get it working, here is my code:
evaluate(List) ->
[Res] = lists:foldl(fun evaluate/2, [], tuple_to_list(List)),
Res.
evaluate({num,X},Stack) -> [X|Stack];
evaluate(plus,[N1,N2|Stack])->[N1+N2|Stack];
evaluate(mul,[N1,N2|Stack])->[N1*N2|Stack];
evaluate([{Optr, Num1, Num2}],Stack) ->
evaluate(Num1,Stack),evaluate(Num2,Stack),evaluate(Optr,Stack).
Can you point out my mistakes and correct me, thank you.
First, define your syntax:
{num,Value} for a number,
{plus,Term1,Term2} for an addition,
{mul,Term1,Term2} for a multiplication,
and so on.
Second, solve individual cases:
evaluate({num,Value}) -> Value;
evaluate({plus,Term1,Term2}) -> evaluate(Term1) + evaluate(Term2);
evaluate({mul,Term1,Term2}) -> evaluate(Term1) * evaluate(Term2).
in the shell:
1> E = fun E({num,Value}) -> Value;
1> E({plus,Term1,Term2}) -> E(Term1) + E(Term2);
1> E({mul,Term1,Term2}) -> E(Term1) * E(Term2)
1> end.
#Fun<erl_eval.30.90072148>
2> E({mul,{plus,{num,2},{num,3}},{num,4}}).
20
3>
Although I used a lot the reverse polish notation, it does not apply in your example. The input should be (for your example) [ 2, 3, plus, 4, mul] and it can be solved like this:
evaluate(plus,[A,B|Rest]) -> [A+B|Rest];
% pop operands,perform the operation and push the result
evaluate(mul,[A,B|Rest]) -> [A*B|Rest];
evaluate(A,Acc) when is_number(A) -> [A|Acc]. % push operand on the stack
evaluate(Exp) -> [Res] = lists:foldl(evaluate/2,[],Exp), Res.
in the shell
22> E = fun E(plus,[A,B|Rest]) -> [A+B|Rest];
22> E(mul,[A,B|Rest]) -> [A*B|Rest];
22> E(A,Acc) -> [A|Acc]
22> end.
#Fun<erl_eval.36.90072148>
23> Eval = fun(Exp) -> [Res] = lists:foldl(E,[],Exp), Res end.
#Fun<erl_eval.6.90072148>
24> Eval([2,3,plus,4,mul]).
20

Erlang repetition string in string

I have a string:
"abc abc abc abc"
How do I calculate the number of "abc" repetitions?
If you are looking for practical and efficient implementation which will scale well for even longer substrings you can use binary:matches/2,3 which is using Boyer–Moore string search algorithm (and Aho-Corasic for multiple substrings). It obviously works only for ASCII or Latin1 strings.
repeats(L, S) -> length(binary:matches(list_to_binary(L), list_to_binary(S))).
If it is for education purposes, you can write your own less efficient version for lists of any kind. If you know substring in compile time you can use very simple and not so much bad in performance:
-define(SUBSTR, "abc").
repeats(L) -> repeats(L, 0).
repeats(?SUBSTR ++ L, N) -> repeats(L, N+1);
repeats([_|L] , N) -> repeats(L, N);
repeats([] , N) -> N.
If you don't know substring you can write a little bit more complicated and less efficient
repeats(L, S) -> repeats(L, S, 0).
repeats([], _, N) -> N;
repeats(L, S, N) ->
case prefix(L, S) of
{found, L2} -> repeats( L2, S, N+1);
nope -> repeats(tl(L), S, N)
end.
prefix([H|T], [H|S]) -> prefix(T, S);
prefix( L, [ ]) -> {found, L};
prefix( _, _ ) -> nope.
And you, of course, can try write some more sophisticated variant as simplified Boyer–Moore for lists.
1> F = fun
F([],_,_,N) -> N;
F(L,P,S,N) ->
case string:sub_string(L,1,S) == P of
true -> F(tl(string:sub_string(L,S,length(L))),P,S,N+1);
_ -> F(tl(L),P,S,N)
end
end.
#Fun<erl_eval.28.106461118>
2> Find = fun(L,P) -> F(L,P,length(P),0) end.
#Fun<erl_eval.12.106461118>
3> Find("abc abc abc abc","abc").
4
4>
this works if defined in a module, or in the shell but only with the R17.
length(lists:filter(fun(X) -> X=="abc" end, string:tokens("abc abc abc abc", " "))).

Code review of CYK algorithm implementation in Erlang

I am beginning Erlang and as an exercise I tried to implement the CYK algorithm.
Main code(cyk.erl):
%%% An attempt for a CYK parser in Erlang
-module(cyk).
-export([
init_storage/0,
import_grammar_file/1,
add_grammar_rule/1,
analyze/1,
test_analyze/0
]).
%% Initialize the ets storage for grammar
init_storage() ->
ets:new(?MODULE, [bag, named_table]).
%%------------------------------------------
%%
%% Grammar
%%
%%------------------------------------------
%% Import a grammar file
import_grammar_file(File) ->
{ok, Device} = file:open(File, read),
import_file_rules(Device).
%% Import all the rules in the file
import_file_rules(Device) ->
case io:get_line(Device, "") of
eof ->
io:format("Grammar file imported~n"),
file:close(Device);
Line ->
add_grammar_rule(Line),
import_file_rules(Device)
end.
%% Add a grammar rule
add_grammar_rule(Rule) ->
case re:run(Rule, "^([^\s]+)\s?->\s?([^\n]+)$", [{capture, all_but_first, binary}]) of
{match, [A, B]} ->
ets:insert(?MODULE, {A, B}),
io:format("parsing ~p -> ~p~n", [A, B]);
nomatch ->
io:format("cannot parse ~p~n", [Rule])
end.
%%------------------------------------------
%%
%% Main logic
%%
%%------------------------------------------
%% Analyze a sentence
analyze(Sentence) ->
io:format("analysing: ~p~n", [Sentence]),
WordList = re:split(Sentence, " "),
io:format("wordlist: ~p~n", [WordList]),
Representation = lists:map( fun(Word) -> associate(Word) end, WordList),
io:format("representation: ~p~n", [Representation]),
Result = process([Representation]),
io:format("result: ~p~n", [Result]).
% associate sentence words with grammar terms
associate(Word) ->
case ets:match(cyk, {'$1', Word}) of
[H|T] -> lists:flatten([H|T]);
[] -> []
end.
% process sentence representation
process(Representation) ->
Limit = length(lists:last(Representation)),
process(Representation, Limit).
process(Representation, Limit) when Limit > 1 ->
NextStep = process(Representation, 1, Limit-1, []),
process([NextStep|Representation], Limit-1);
process(Representation, _Limit) ->
Representation.
process(Representation, Index, Limit, Acc) when Index =< Limit ->
Subtree = extract_subtree(lists:reverse(Representation), Index),
Result = process_subtree(Subtree),
process(Representation, Index+1, Limit, [Result|Acc]);
process(_Representation, _Index, _Limit, Acc) ->
lists:reverse(Acc).
%%------------------------------------------
%%
%% Subtree
%%
%%------------------------------------------
process_subtree(Subtree) ->
process_subtree(Subtree, Subtree, [], 1).
process_subtree([], _Subtree, Acc, _Index) ->
Acc;
process_subtree([H|T], Subtree, Acc, Index) ->
A = lists:nth(1,H),
Bind = length( Subtree ) - Index + 1,
B = lists:last( lists:nth( Bind, Subtree) ),
% generating the possibilities of grammar
Pos = [ list_to_binary(binary:bin_to_list(X)++" "++binary:bin_to_list(Y)) || X<-A, Y<-B ],
% looking up in the grammar
Result = lists:flatten( [ ets:match(cyk, {'$1', X}) || X <- Pos ] ),
process_subtree(T, Subtree, Acc++Result, Index + 1).
%% Extract a subtree from the representation
extract_subtree(Representation, Position) ->
Size = length(Representation) + 1,
extract_subtree(Representation, Size, Position, []).
extract_subtree([], _Size, _Position, Acc) ->
lists:reverse(Acc);
extract_subtree([H|T], Size, Position, Acc) ->
Segment = lists:sublist(H, Position, Size),
extract_subtree(T, Size - 1, Position, [Segment|Acc]).
%%------------------------------------------
%%
%% Test
%% using the same example as
%% http://en.wikipedia.org/wiki/CYK_algorithm
%%
%%------------------------------------------
test_analyze() ->
init_storage(),
import_grammar_file("grammar.txt"),
analyze("she eats a fish with a fork").
The grammar file (grammar.txt)
S -> NP VP
VP -> VP PP
VP -> V NP
VP -> eats
PP -> P NP
NP -> Det N
NP -> she
V -> eats
P -> with
N -> fish
N -> fork
Det -> a
The code can be tested from the erlang shell
> c(cyk).
> cyk:test_analyze().
parsing <<"S">> -> <<"NP VP">>
parsing <<"VP">> -> <<"VP PP">>
parsing <<"VP">> -> <<"V NP">>
parsing <<"VP">> -> <<"eats">>
parsing <<"PP">> -> <<"P NP">>
parsing <<"NP">> -> <<"Det N">>
parsing <<"NP">> -> <<"she">>
parsing <<"V">> -> <<"eats">>
parsing <<"P">> -> <<"with">>
parsing <<"N">> -> <<"fish">>
parsing <<"N">> -> <<"fork">>
parsing <<"Det">> -> <<"a">>
Grammar file imported
analysing: "she eats a fish with a fork"
wordlist: [<<"she">>,<<"eats">>,<<"a">>,<<"fish">>,<<"with">>,<<"a">>,
<<"fork">>]
representation: [[<<"NP">>],
[<<"VP">>,<<"V">>],
[<<"Det">>],
[<<"N">>],
[<<"P">>],
[<<"Det">>],
[<<"N">>]]
result: [[[<<"S">>]],
[[],[<<"VP">>]],
[[],[],[]],
[[<<"S">>],[],[],[]],
[[],[<<"VP">>],[],[],[<<"PP">>]],
[[<<"S">>],[],[<<"NP">>],[],[],[<<"NP">>]],
[[<<"NP">>],
[<<"VP">>,<<"V">>],
[<<"Det">>],
[<<"N">>],
[<<"P">>],
[<<"Det">>],
[<<"N">>]]]
The code seems to work fine for this example, but I was looking for ways to improve it (make it more erlang-ish) and specially to make the processing distributed on multiple process/nodes.
I guess all the process_subtree executions for each step could be done concurrent, but I can't really figure how.
Any suggestions will be greatly appreciated!
I have written this solution which use concurrent execution.
Compare to Eric solution, some changes were needed for the usage of multi-processes, some other because I think it is more efficient (I reverted keys and values in the rules ets, and I have chosen a set), some because I think it is cleaner (I close the grammar file in the function that open it) and some because I am more familiar with these modules (string:tokens ...).
[edit]
I have replaced a useless spawn by faster recursive call, and suppressed the wait function by adding a message to synchronize the processes.
I got the idea of this implementation looking at the nice animation at a Javascript animation of the CYK algorithm, which is unfortunately no longer available.
#Eric, it is possible to look at all steps of the analysis opening the ets analyze with observer, it is why I do not delete it.
-module(cyk).
-export([
import_grammar_file/1,
add_grammar_rule/2,
analyze/1,
test_analyze/1,
test_analyze/0
]).
%%------------------------------------------
%%
%% Grammar
%%
%%------------------------------------------
%% Import a grammar file
import_grammar_file(File) ->
reset_ets(rules, ets:info(rules)),
{ok, Device} = file:open(File, read),
ok = add_grammar_rule(Device,file:read_line(Device)),
file:close(Device),
io:format("Grammar file imported~n").
%% Add a grammar rule
add_grammar_rule(_,eof) -> ok;
add_grammar_rule(Device,{ok,Rule}) ->
[T,"->",H|Q] = string:tokens(Rule," \n"),
Key = key(H,Q),
insert(Key,T,ets:lookup(rules, Key)),
add_grammar_rule(Device,file:read_line(Device)).
key(H,[]) -> H;
key(H,[Q]) -> {H,Q}.
insert(Key,T,[]) -> ets:insert(rules, {Key,[T]});
insert(Key,T,[{Key,L}]) -> ets:insert(rules, {Key,[T|L]}).
%%------------------------------------------
%%
%% Main logic
%%
%%------------------------------------------
%% Analyze a sentence
analyze(Sentence) ->
reset_ets(analyze, ets:info(analyze)),
io:format("analysing: ~p~n", [Sentence]),
WordList = string:tokens(Sentence, " "),
Len = length(WordList),
Me = self(),
lists:foldl(fun(X,{J,Pid}) -> ets:insert(analyze,{{0,J},ets:lookup_element(rules,X,2)}),
(NewPid = spawn(fun() -> whatis(1,J,Len,Pid,Me) end)) ! {done,0},
{J+1,NewPid} end,
{1,none}, WordList),
receive
M -> M
end.
reset_ets(Name, undefined) -> ets:new(Name,[set, named_table,public]);
reset_ets(Name, _) -> ets:delete_all_objects(Name).
whatis(Len,1,Len,_,PidRet) -> PidRet ! ets:lookup_element(analyze,{Len-1,1},2); % finished
whatis(I,J,Len,_,_) when I + J == Len +1 -> ok; % ends useless processes
whatis(I,J,Len,Pid,PidRet) ->
receive {done,V} when V == I-1 -> ok end,
Cases = lists:map(fun({X,Y}) -> [{A,B} || A <- ets:lookup_element(analyze,X,2),
B <- ets:lookup_element(analyze,Y,2)] end,
[{{X-1,J},{I-X,J+X}} || X <- lists:seq(1,I)]),
Val = lists:foldl(fun(X,Acc) -> case ets:lookup(rules,X) of
[] -> Acc;
[{_,[R]}] -> [R|Acc]
end end,
[],lists:flatten(Cases)),
ets:insert(analyze,{{I,J},Val}),
send(Pid,I),
whatis(I+1,J,Len,Pid,PidRet).
send(none,_) -> ok;
send(Pid,I) -> Pid ! {done,I}.
%%------------------------------------------
%%
%% Test
%% using the same example as
%% http://en.wikipedia.org/wiki/CYK_algorithm
%%
%%------------------------------------------
test_analyze(S) ->
import_grammar_file("grammar.txt"),
analyze(S).
test_analyze() ->
test_analyze("she eats a fish with a fork").

How to collect frequencies of characters using a list of tuples {char,freq} in Erlang

I am supposed to collect frequencies of characters.
freq(Sample) -> freq(Sample,[]).
freq([],Freq) ->
Freq;
freq([Char|Rest],Freq)->
freq(Rest,[{Char,1}|Freq]).
This function does not work in the right way. If the input is "foo", then the output will be
[{f,1},{o,1},{o,1}].
But I wished to have the output like
[{f,1},{o,2}].
I can't manage to modify element in a tulpe. Can anyone help me out of this and show me how it can be fixed?
a one line solution :o)
% generate a random list
L = [random:uniform(26)+$a-1 || _ <- lists:seq(1,1000)].
% collect frequency
lists:foldl(fun(X,[{[X],I}|Q]) -> [{[X],I+1}|Q] ; (X,Acc) -> [{[X],1}|Acc] end , [], lists:sort(L)).
in action
1> lists:foldl(fun(X,[{[X],I}|Q]) -> [{[X],I+1}|Q] ; (X,Acc) -> [{[X],1}|Acc] end , [], lists:sort("foo")).
[{"o",2},{"f",1}]
quite fast with short list, but the execution time increase a lot with long list (on my PC, it needs 6.5s for a 1 000 000 character text) .
in comparison, with the same 1 000 000 character text Ricardo solution needs 5 sec
I will try another version using ets.
By far the easiest way is to use an orddict to store the value as it already comes with an update_counter function and returns the value in a (sorted) list.
freq(Text) ->
lists:foldl(fun (C, D) -> orddict:update_counter(C, 1, D) end, orddict:new(), Text).
Try with something like this:
freq(Text) ->
CharsDictionary = lists:foldl(fun(Char, Acc) -> dict:update_counter(Char, 1, Acc) end, dict:new(), Text),
dict:fold(fun(Char, Frequency, Acc) -> [{Char, Frequency} | Acc] end, [], CharsDictionary).
The first line creates a dictionary that uses the char as key and the frequency as value (dict:update_counter).
The second line converts the dictionary in the list that you need.
Using pattern matching and proplists.
-module(freq).
-export([char_freq/1]).
-spec char_freq(string()) -> [tuple()].
char_freq(L) -> char_freq(L, []).
char_freq([], PL) -> PL;
char_freq([H|T], PL) ->
case proplists:get_value([H], PL) of
undefined ->
char_freq(T, [{[H],1}|PL]);
N ->
L = proplists:delete([H], PL),
char_freq(T, [{[H],N+1}|L])
end.
Test
1> freq:char_freq("abacabz").
[{"z",1},{"b",2},{"a",3},{"c",1}]
L = [list_to_atom(X) || X <- Str].
D = lists:foldl(fun({Char, _}, Acc) -> dict:update_counter(Char, 1, Acc) end, dict:new(), L).
dict:to_list(D).

Resources