The problem occurs in the find function where the erlang shell tells me that an exception error has occurred, it says:
Exception error: no function clause matching seminar1:find("t", []) (seminar1.erl, line 117) in function seminar1:encode/3 ( seminar1.erl, line 113).
I believe what is happening is that the pattern-matching done in the first find function is always failing, though I do not understand why since attempts to do comparisons manually have been successful.
-module(seminar1).
-compile(export_all).
sample() -> "the quick brown fox jumps over the lazy dog
this is a sample text that we will use when we build
up a table we will only handle lower case letters and
no punctuation symbols the frequency will of course not
represent english but it is probably not that far off".
text() -> "this is something that we should encode".
test() ->
Sample = sample(),
Tree = tree(Sample),
Encode = encode_table(Tree),
Decode = decode_table(Tree),
Text = text(),
Seq = encode(Text, Encode),
Text = decode(Seq, Decode).
tree(Sample) -> Freq = freq(Sample),
F = fun({node,N1,V1,_,_}, {node,N2,V2,_,_}) ->
if
V1 > V2 -> false;
V1 == V2 -> if
N1 > N2 -> false;
true -> true
end;
true -> true
end
end,
%lists:sort(F,Freq).
huffman(lists:sort(F,Freq)).
% Calculate the frequency of each letter in the Sample and return a
datastructure of nodes containing the letter involved,
% frequency of it in the sample.
% datastructure {node, Key, Value, Left, Right}
freq(Sample) -> freq(Sample, []).
freq([], Freq) -> Freq;
freq([Char|Rest], Freq) -> freq(Rest, check(Char, Freq)).
% Check function complements the Freq function, it takes the current input
and pattern matches it with the frequency datastructue being built.
% If it scores a hit that particular node has its frequency incremented and
then the whole datastructure is returned.
check(Key, []) -> [{node, [Key], 1, nil, nil}];
check(Key, [{node, [Key], Value, nil, nil}| Tail]) -> [{node, [Key], Value +
1, nil, nil}| Tail];
check(Key, [H|T]) -> [H |check(Key, T)].
% Creates the Huffman tree that is later used to encode a sample.
% The input is the SORTED datastructure derived from the freq-function.
% The leaves of the huffman tree are where actual values reside, branches
are just nodes containing information.
huffman( [ Tree | [] ] ) -> Tree;
huffman([{node, LeftKey, LeftValue, _L1, _R1},
{node, RightKey, RightValue, _L2, _R2} |Tail]) ->
% Creating a branch node
BranchNode = {node, LeftKey ++ RightKey, LeftValue + RightValue, {node,
LeftKey, LeftValue, _L1, _R1}, {node, RightKey, RightValue, _L2, _R2}},
huffman(insert(BranchNode, Tail)).
% A complementary function to the huffman function, inserts the newly made
branchnode into the already sorted tail.
% This is to prevent the sorted tail from becoming unsorted when turning the
tail list into a tree.
% It is inserted as such that the branchnode is the first selection of its
current value,
% meaning that if you have 4 nodes of value 5 ( one being a branchnode) then
the branch node will be the first option.
% This will make the Tree structure left leaning.
%
% N
% N N
% N N
% N N N N
insert(Node, []) -> [Node|[]];
insert(Node, [H|T]) ->
{_, _, Nvalue, _, _} = Node,
{_, _, Hdvalue, _, _} = H,
if
Nvalue =< Hdvalue -> [ Node | [H|T]];
true -> [H | insert(Node, T)]
end.
% Takes the tree created by the huffman-function as input and traverses said
tree.
% Returns a list containing the letters found and their position in the
tree, Left = 0, Right = 1.
% {"e"/[101], [0,0,0]} -- {[Key], [pathway]}
% Traversal method used: Left based traversal.
encode_table(RootNode) -> encode_table(RootNode, [], []).
% When traversing the Tree I need to know the branchnode I am in, the result
list as I am adding letters to it and a PathwayList which is the current
binary path to the branchnode I am in.
encode_table({_, Key, _, nil, nil}, AccList, PathwayList) ->
[AccList | [{Key, reverse(PathwayList)}]];
encode_table({_, _, _, Left, Right}, AccList, PathwayList) ->
encode_table(
Right,
encode_table(Left, AccList, [0| PathwayList]),
[1|PathwayList]).
% Complementary function for the encode_table/3 function, when traversing
the tree the the pathway gets reversed so it needs to be corrected.
reverse(L) -> reverse(L, []).
reverse([], Rev) -> Rev;
reverse([H|T], Rev) -> reverse(T, [H|Rev]).
% Takes a sample text and encodes it in accordance to the encoding table
supplied
encode(Text, Table) -> encode(Text, Table, []).
encode([], _, EncodedText) -> EncodedText;
encode([Letter|Rest], Table, EncodedText) ->
encode(Rest, Table, [find([Letter], Table) | EncodedText]).
% Complementary function to encode/3, searches the Table for the related
Letters binary path.
%find(Letter, []) -> Letter;
find(Letter, [{Letter, BinaryPath} | _Rest]) ->
BinaryPath;
find(Letter, [ _ | Rest]) ->
find(Letter, Rest).
decode_table(tree) -> ok.
decode(sequence, table) -> ok.
test(Letter, [{Letter, Asd} | []]) ->
true;
test(_, _) -> false.
I have tried to follow your code , but I am stuck on the function decode_table(tree) -> ok.. With this spelling, it fails (tree is an atom and won't match anything but tree itself). Change to _Tree to ignore the issue when I understood that the decode functions are not written yet or not provided.
For the encoding, the trouble is that the function encode_table returns a nested list, not suitable with the find function. If you replace the code by encode_table(RootNode) -> lists:flatten(encode_table(RootNode, [], [])). then it works (at least it seems to work since I don't know which result you are expecting)
-module(seminar1).
-compile(export_all).
sample() -> "the quick brown fox jumps over the lazy dog
this is a sample text that we will use when we build
up a table we will only handle lower case letters and
no punctuation symbols the frequency will of course not
represent english but it is probably not that far off".
text() -> "this is something that we should encode".
test() ->
Sample = sample(),
Tree = tree(Sample),
Encode = encode_table(Tree),
%Decode = decode_table(Tree),
Text = text(),
Seq = encode(Text, Encode),
%Text = decode(Seq, Decode).
Seq.
tree(Sample) -> Freq = freq(Sample),
F = fun({node,N1,V1,_,_}, {node,N2,V2,_,_}) ->
if
V1 > V2 -> false;
V1 == V2 -> if
N1 > N2 -> false;
true -> true
end;
true -> true
end
end,
%lists:sort(F,Freq).
huffman(lists:sort(F,Freq)).
% Calculate the frequency of each letter in the Sample and return a
% datastructure of nodes containing the letter involved,
% frequency of it in the sample.
% datastructure {node, Key, Value, Left, Right}
freq(Sample) -> freq(Sample, []).
freq([], Freq) -> Freq;
freq([Char|Rest], Freq) -> freq(Rest, check(Char, Freq)).
% Check function complements the Freq function, it takes the current input
% and pattern matches it with the frequency datastructue being built.
% If it scores a hit that particular node has its frequency incremented and
% then the whole datastructure is returned.
check(Key, []) ->
[{node, [Key], 1, nil, nil}];
check(Key, [{node, [Key], Value, nil, nil}| Tail]) ->
[{node, [Key], Value + 1, nil, nil}| Tail];
check(Key, [H|T]) ->
[H |check(Key, T)].
% Creates the Huffman tree that is later used to encode a sample.
% The input is the SORTED datastructure derived from the freq-function.
% The leaves of the huffman tree are where actual values reside, branches
% are just nodes containing information.
huffman( [ Tree | [] ] ) -> Tree;
huffman([{node, LeftKey, LeftValue, _L1, _R1},
{node, RightKey, RightValue, _L2, _R2} |Tail]) ->
% Creating a branch node
BranchNode = {node, LeftKey ++ RightKey, LeftValue + RightValue, {node, LeftKey, LeftValue, _L1, _R1}, {node, RightKey, RightValue, _L2, _R2}},
huffman(insert(BranchNode, Tail)).
% A complementary function to the huffman function, inserts the newly made
% branchnode into the already sorted tail.
% This is to prevent the sorted tail from becoming unsorted when turning the
% tail list into a tree.
% It is inserted as such that the branchnode is the first selection of its
% current value,
% meaning that if you have 4 nodes of value 5 ( one being a branchnode) then
% the branch node will be the first option.
% This will make the Tree structure left leaning.
%
% N
% N N
% N N
% N N N N
insert(Node, []) -> [Node|[]];
insert(Node, [H|T]) ->
{_, _, Nvalue, _, _} = Node,
{_, _, Hdvalue, _, _} = H,
if
Nvalue =< Hdvalue -> [ Node | [H|T]];
true -> [H | insert(Node, T)]
end.
% Takes the tree created by the huffman-function as input and traverses said tree.
% Returns a list containing the letters found and their position in the
% tree, Left = 0, Right = 1.
% {"e"/[101], [0,0,0]} -- {[Key], [pathway]}
% Traversal method used: Left based traversal.
encode_table(RootNode) -> lists:flatten(encode_table(RootNode, [], [])).
% When traversing the Tree I need to know the branchnode I am in, the result
% list as I am adding letters to it and a PathwayList which is the current
% binary path to the branchnode I am in.
encode_table({_, Key, _, nil, nil}, AccList, PathwayList) ->
[AccList | [{Key, reverse(PathwayList)}]];
encode_table({_, _, _, Left, Right}, AccList, PathwayList) ->
encode_table(
Right,
encode_table(Left, AccList, [0| PathwayList]),
[1|PathwayList]).
% Complementary function for the encode_table/3 function, when traversing
% the tree the the pathway gets reversed so it needs to be corrected.
reverse(L) -> reverse(L, []).
reverse([], Rev) -> Rev;
reverse([H|T], Rev) -> reverse(T, [H|Rev]).
% Takes a sample text and encodes it in accordance to the encoding table supplied
encode(Text, Table) -> encode(Text, Table, []).
encode([], _, EncodedText) -> EncodedText;
encode([Letter|Rest], Table, EncodedText) ->
encode(Rest, Table, [find([Letter], Table) | EncodedText]).
% Complementary function to encode/3, searches the Table for the related
% Letters binary path.
find(Letter, []) -> Letter;
find(Letter, [{Letter, BinaryPath} | _Rest]) ->
BinaryPath;
find(Letter, [ _ | Rest]) ->
find(Letter, Rest).
decode_table(_Tree) -> ok.
decode(sequence, table) -> ok.
test(Letter, [{Letter, _Asd} | []]) ->
true;
test(_, _) -> false.
gives the result:
64> c(seminar1).
{ok,seminar1}
65> rp(seminar1:test()).
[[0,0,0],
[1,0,0,0,1,0],
[0,1,1,1],
[1,0,1,1,0,0],
[0,1,0,0],
[0,0,0],
[1,1,1],
[1,0,0,0,1,0],
[1,0,0,1],
[1,1,0,1,0],
[0,1,1,1],
[1,0,1,0,0],
[0,1,0,1],
[1,1,1],
[0,0,0],
[1,0,1,0,1],
[1,1,1],
[1,1,0,0],
[0,0,1,1],
[1,0,1,0,0],
[1,1,0,0],
[1,1,1],
[1,0,0,0,0,0,0],
[0,1,0,0],
[1,1,0,1,1],
[1,0,1,0,0],
[1,1,0,0],
[0,0,0],
[1,0,0,0,1,1,1],
[0,1,1,1],
[0,1,0,1],
[1,1,1],
[0,1,0,1],
[1,1,0,1,1],
[1,1,1],
[0,1,0,1],
[1,1,0,1,1],
[1,0,1,0,0],
[1,1,0,0]]
ok
66>
Edit
You will get the same result if you replace
encode_table({_, Key, _, nil, nil}, AccList, PathwayList) ->
[AccList | [{Key, reverse(PathwayList)}]];
which is responsible for the nested result by this version which directly produces a flat list.
encode_table({_, Key, _, nil, nil}, AccList, PathwayList) ->
[{Key, reverse(PathwayList)} | AccList];
It is the general way to build a list: [Head|Tail] where Head is any erlang term and Tail is a list. Your code produces a result like [[[],{Key1,Path1}],{Key2,Path2}] while my version gives [{Key2,Path2},{Key1,Path1}]
Some remarks :
In erlang, the usage of if is not frequent, in my opinion mainly because of the last true -> DoSomething() clause which is in most cases very inexpressive.
Another point, searching in a list is not very fast, it is not an issue for isolate search, but in your case the encode and decode functions are doing it for each character, in my opinion, a map is more appropriate to store the Encode and Decode tables than a key/value list.
I would like to divide a string to sub-strings based on a given number , for example:
divide("string",1) = ["s","t","r","i","n","g"].
I have tried this, but no success .
lists:split(1,"string") = {"s", "tring"}
Any idea?
I would calculate the length once (since it's a slow operation) and then recursively use lists:split/2 until the list left is smaller than N:
divide(List, N) ->
divide(List, N, length(List)).
divide(List, N, Length) when Length > N ->
{A, B} = lists:split(N, List),
[A | divide(B, N, Length - N)];
divide(List, _, _) ->
[List].
1> c(a).
{ok,a}
2> a:divide("string", 1).
["s","t","r","i","n","g"]
3> a:divide("string", 2).
["st","ri","ng"]
4> a:divide("string", 3).
["str","ing"]
5> a:divide("string", 4).
["stri","ng"]
6> a:divide("string", 5).
["strin","g"]
7> a:divide("string", 6).
["string"]
8> a:divide("string", 7).
["string"]
I think #Dogbert solution is currently the best... But here an other implementation example with recursive loop.
divide_test() ->
[?assertEqual(divide("string",1), ["s","t","r","i","n","g"]),
?assertEqual(divide("string",2), ["st","ri","ng"]),
?assertEqual(divide("string",3), ["str","ing"]),
?assertEqual(divide("string",4), ["stri","ng"])
].
-spec divide(list(), integer()) -> list(list()).
divide(String, Size)
when is_list(String), is_integer(Size) ->
divide(String, Size, 0, [], []).
-spec divide(list(), integer(), integer(), list(), list()) -> list(list()).
divide([], _, _, Buf, Result) ->
Return = [lists:reverse(Buf)] ++ Result,
lists:reverse(Return);
divide([H|T], Size, 0, Buf, Result) ->
divide(T, Size, 1, [H] ++ Buf, Result);
divide([H|T], Size, Counter, Buf, Result) ->
case Counter rem Size =:= 0 of
true ->
divide(T, Size, Counter+1, [H] ++ [], [lists:reverse(Buf)] ++ Result);
false ->
divide(T, Size, Counter+1, [H] ++ Buf, Result)
end.
You can try this function. provided the number is > 0 less than or equal to string length divided by two.
first_substring(List, Separator) ->
first_substring_loop(List, Separator, []).
first_substring_loop([], _, Reversed_First) ->
lists:reverse(Reversed_First);
first_substring_loop(List, Separator, Reversed_First) ->
[H|T]= my_tuple_to_list(lists:split(Separator,List)),
first_substring_loop(lists:flatten(T), Separator, [H|Reversed_First]).
my_tuple_to_list(Tuple) -> [element(T, Tuple) || T <- lists:seq(1, tuple_size(Tuple))].
the result is
1> fact:first_substring("string", 1).
["s","t","r","i","n","g"]
2> fact:first_substring("string", 2).
["st","ri","ng"]
3> fact:first_substring("string", 3).
["str","ing"]
A short simple solution can be:
divide(String, Length) -> divide(String, Length, []).
divide([], _, Acc) -> Acc;
divide(String, Length, Acc) ->
{Res, Rest} = lists:split(min(Length, length(String)), String),
divide(Rest, Length, Acc ++ [Res]).
Also for a specific case of splitting with length 1, a list comprehension can be used:
ListOfLetters = [[Letter] || Letter <- String].
I am supposed to collect frequencies of characters.
freq(Sample) -> freq(Sample,[]).
freq([],Freq) ->
Freq;
freq([Char|Rest],Freq)->
freq(Rest,[{Char,1}|Freq]).
This function does not work in the right way. If the input is "foo", then the output will be
[{f,1},{o,1},{o,1}].
But I wished to have the output like
[{f,1},{o,2}].
I can't manage to modify element in a tulpe. Can anyone help me out of this and show me how it can be fixed?
a one line solution :o)
% generate a random list
L = [random:uniform(26)+$a-1 || _ <- lists:seq(1,1000)].
% collect frequency
lists:foldl(fun(X,[{[X],I}|Q]) -> [{[X],I+1}|Q] ; (X,Acc) -> [{[X],1}|Acc] end , [], lists:sort(L)).
in action
1> lists:foldl(fun(X,[{[X],I}|Q]) -> [{[X],I+1}|Q] ; (X,Acc) -> [{[X],1}|Acc] end , [], lists:sort("foo")).
[{"o",2},{"f",1}]
quite fast with short list, but the execution time increase a lot with long list (on my PC, it needs 6.5s for a 1 000 000 character text) .
in comparison, with the same 1 000 000 character text Ricardo solution needs 5 sec
I will try another version using ets.
By far the easiest way is to use an orddict to store the value as it already comes with an update_counter function and returns the value in a (sorted) list.
freq(Text) ->
lists:foldl(fun (C, D) -> orddict:update_counter(C, 1, D) end, orddict:new(), Text).
Try with something like this:
freq(Text) ->
CharsDictionary = lists:foldl(fun(Char, Acc) -> dict:update_counter(Char, 1, Acc) end, dict:new(), Text),
dict:fold(fun(Char, Frequency, Acc) -> [{Char, Frequency} | Acc] end, [], CharsDictionary).
The first line creates a dictionary that uses the char as key and the frequency as value (dict:update_counter).
The second line converts the dictionary in the list that you need.
Using pattern matching and proplists.
-module(freq).
-export([char_freq/1]).
-spec char_freq(string()) -> [tuple()].
char_freq(L) -> char_freq(L, []).
char_freq([], PL) -> PL;
char_freq([H|T], PL) ->
case proplists:get_value([H], PL) of
undefined ->
char_freq(T, [{[H],1}|PL]);
N ->
L = proplists:delete([H], PL),
char_freq(T, [{[H],N+1}|L])
end.
Test
1> freq:char_freq("abacabz").
[{"z",1},{"b",2},{"a",3},{"c",1}]
L = [list_to_atom(X) || X <- Str].
D = lists:foldl(fun({Char, _}, Acc) -> dict:update_counter(Char, 1, Acc) end, dict:new(), L).
dict:to_list(D).