How to divide a string into substrings? - erlang

I would like to divide a string to sub-strings based on a given number , for example:
divide("string",1) = ["s","t","r","i","n","g"].
I have tried this, but no success .
lists:split(1,"string") = {"s", "tring"}
Any idea?

I would calculate the length once (since it's a slow operation) and then recursively use lists:split/2 until the list left is smaller than N:
divide(List, N) ->
divide(List, N, length(List)).
divide(List, N, Length) when Length > N ->
{A, B} = lists:split(N, List),
[A | divide(B, N, Length - N)];
divide(List, _, _) ->
[List].
1> c(a).
{ok,a}
2> a:divide("string", 1).
["s","t","r","i","n","g"]
3> a:divide("string", 2).
["st","ri","ng"]
4> a:divide("string", 3).
["str","ing"]
5> a:divide("string", 4).
["stri","ng"]
6> a:divide("string", 5).
["strin","g"]
7> a:divide("string", 6).
["string"]
8> a:divide("string", 7).
["string"]

I think #Dogbert solution is currently the best... But here an other implementation example with recursive loop.
divide_test() ->
[?assertEqual(divide("string",1), ["s","t","r","i","n","g"]),
?assertEqual(divide("string",2), ["st","ri","ng"]),
?assertEqual(divide("string",3), ["str","ing"]),
?assertEqual(divide("string",4), ["stri","ng"])
].
-spec divide(list(), integer()) -> list(list()).
divide(String, Size)
when is_list(String), is_integer(Size) ->
divide(String, Size, 0, [], []).
-spec divide(list(), integer(), integer(), list(), list()) -> list(list()).
divide([], _, _, Buf, Result) ->
Return = [lists:reverse(Buf)] ++ Result,
lists:reverse(Return);
divide([H|T], Size, 0, Buf, Result) ->
divide(T, Size, 1, [H] ++ Buf, Result);
divide([H|T], Size, Counter, Buf, Result) ->
case Counter rem Size =:= 0 of
true ->
divide(T, Size, Counter+1, [H] ++ [], [lists:reverse(Buf)] ++ Result);
false ->
divide(T, Size, Counter+1, [H] ++ Buf, Result)
end.

You can try this function. provided the number is > 0 less than or equal to string length divided by two.
first_substring(List, Separator) ->
first_substring_loop(List, Separator, []).
first_substring_loop([], _, Reversed_First) ->
lists:reverse(Reversed_First);
first_substring_loop(List, Separator, Reversed_First) ->
[H|T]= my_tuple_to_list(lists:split(Separator,List)),
first_substring_loop(lists:flatten(T), Separator, [H|Reversed_First]).
my_tuple_to_list(Tuple) -> [element(T, Tuple) || T <- lists:seq(1, tuple_size(Tuple))].
the result is
1> fact:first_substring("string", 1).
["s","t","r","i","n","g"]
2> fact:first_substring("string", 2).
["st","ri","ng"]
3> fact:first_substring("string", 3).
["str","ing"]

A short simple solution can be:
divide(String, Length) -> divide(String, Length, []).
divide([], _, Acc) -> Acc;
divide(String, Length, Acc) ->
{Res, Rest} = lists:split(min(Length, length(String)), String),
divide(Rest, Length, Acc ++ [Res]).
Also for a specific case of splitting with length 1, a list comprehension can be used:
ListOfLetters = [[Letter] || Letter <- String].

Related

Warning: use of operator '<' has no effect?

I am new to erlang and am trying to implement a simple function as follows:
% * ChatServers is a dictionary of usernames with tuples of the form:
% {server, Pid, Reference,LoggedInUsers}
get_chat_server([], _) ->
undefined;
get_chat_server([Key|_], ChatServers) ->
{server, Pid, Reference,LoggedInUsers} = dict:fetch(Key,ChatServers),
LoggedInUsers < 100,
{server, Pid, Reference,LoggedInUsers};
get_chat_server([_|T], ChatServers) ->
get_chat_server(T, ChatServers).
Basically what I am trying to do is find the first tuple of my dictionary whose LoggedInUsers number is less than 100.
However, once I compile my code, I get the following 2 warnings:
main_server_distributed.erl:63: Warning: use of operator '<' has no
effect main_server_distributed.erl:66: Warning: this clause cannot
match because a previous clause at line 61 always matches
I have some experience with prolog and as far as I recall this is a valid use of pattern matching and recursion. Could you please point out what am I doing wrong here? Thanks in advance.
The body of a clause (everything to the right of the ->) is not a list of conditions to fulfil, but simply a comma-separated list of expressions to evaluate. All resulting values except from that of the last expression will be discarded. Hence, the boolean value of your < comparison is not used anywhere.
You can do something like this...
get_chat_server([], _) ->
undefined;
get_chat_server([Key|T], ChatServers) ->
{server, Pid, Reference,LoggedInUsers} = dict:fetch(Key,ChatServers),
if
LoggedInUsers < 100 ->
{server, Pid, Reference,LoggedInUsers};
true ->
get_chat_server(T, ChatServers)
end.
Or this
get_chat_server([], _) ->
undefined;
get_chat_server([Key|T], ChatServers) ->
Result = dict:fetch(Key,ChatServers),
case Result of
{_, _, _, LoggedInUsers} when LoggedInUsers < 100 ->
Result;
_ ->
get_chat_server(T, ChatServers)
end.
main_server_distributed.erl:66: Warning: this clause cannot match
because a previous clause at line 61 always matches
You've essentially written:
get_chat_server(NonEmptyList, ChatServers) ->
{server, Pid, Reference,LoggedInUsers} = dict:fetch(Key,ChatServers),
LoggedInUsers < 100,
{server, Pid, Reference,LoggedInUsers};
get_chat_server(NonEmptyList, ChatServers) ->
get_chat_server(T, ChatServers).
Therefore, the first clause will always match anything that the second clause would have matched. More specifically, in the pattern:
[Key|_]
Key will match anything and _ will match anything. Likewise, in the pattern:
[_|T]
_ will match anything, and T will match anything.
Riffing off #dsmith's answer:
-module(my).
-export([get_chat_server/3, get_chat_server_test/0]).
get_chat_server(_MaxLoggedIn, []=_Keys, _ChatServers) ->
none;
get_chat_server(MaxLoggedIn, [Key|Keys], ChatServers) ->
get_chat_server(MaxLoggedIn, Keys, ChatServers, dict:fetch(Key, ChatServers) ).
get_chat_server(MaxLoggedIn, _, _, {_,_,_,LoggedInUsers}=ChatServer) when LoggedInUsers < MaxLoggedIn ->
ChatServer;
get_chat_server(MaxLoggedIn, [Key|Keys], ChatServers, _ChatServer) ->
get_chat_server(MaxLoggedIn, Keys, ChatServers, dict:fetch(Key, ChatServers) ).
%---------
get_chat_server_test() ->
Keys = [a, c],
ChatServers = [
{a, {server, a, a_, 200}},
{b, {server, b, b_, 100}},
{c, {server, c, c_, 30}}
],
ChatServerDict = dict:from_list(ChatServers),
none = get_chat_server(10, [], ChatServerDict),
{server, c, c_, 30} = get_chat_server(50, Keys, ChatServerDict),
{server, c, c_, 30} = get_chat_server(150, Keys, ChatServerDict),
PossibleResults = sets:from_list([{server,a,a_, 200},{server,c,c_,30}]),
true = sets:is_element(
get_chat_server(250, Keys, ChatServerDict),
PossibleResults
),
all_tests_passed.
You can also use higher order functions, i.e. dict:fold(), to get a list of all the ChatServers that meet your requirements:
max_fun(Max, Keys) ->
fun(Key, {_,_,_,LoggedInUsers}=Server, Acc) ->
case lists:member(Key, Keys) andalso LoggedInUsers<Max of
true -> [Server | Acc];
false -> Acc
end
end.
In the shell:
44> ChatServers = [
44> {a, {server, a, a_, 200}},
44> {b, {server, b, b_, 100}},
44> {c, {server, c, c_, 30}}
44> ].
[{a,{server,a,a_,200}},
{b,{server,b,b_,100}},
{c,{server,c,c_,30}}]
45> ChatServerDict = dict:from_list(ChatServers).
{dict,3,16,16,8,80,48,
{[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[]},
{{[],
[[a|{server,a,a_,200}]],
[[b|{server,b,b_,100}]],
[[c|{server,c,c_,30}]],
[],[],[],[],[],[],[],[],[],[],[],[]}}}
46> Keys = [a,c].
[a,c]
47> MaxLoggedIn = 150.
150
50> F = my:max_fun(MaxLoggedIn, Keys).
#Fun<fl.0.128553666>
51> dict:fold(F, [], ChatServerDict).
[{server,c,c_,30}]

Erlang sumif function

I'm trying to make a sumif function in Erlang that would return a sum of all elements in a list if the predicate function evaluates to true. Here is what I have:
sumif(_, []) -> undefined;
sumif(Fun, [H|T]) -> case Fun(H) of
true -> H + sumif(Fun, T);
false -> sumif(Fun, T)
end.
I also implemented my own pos function which returns true if a number is greater than 0 and false otherwise:
pos(A) -> A > 0.
I tried using pos with sumif but I'm getting this error:
exception error: bad function pos
Why is this happening? Is it because of my sumif function or pos? I have tested pos on its own and it seems to work just fine.
Edit: It might be because how I'm calling the function. This is how I'm currently calling it: hi:sumif(pos,[-1,1,2,-3]). Where hi is my module name.
Is it because of my sumif function or pos?
It's because of sumif. You should return 0 when an empty list is passed, as it'll be called from the 2nd clause when T is []:
-module(a).
-compile(export_all).
sumif(_, []) -> 0;
sumif(Fun, [H|T]) -> case Fun(H) of
true -> H + sumif(Fun, T);
false -> sumif(Fun, T)
end.
pos(A) -> A > 0.
Test:
1> c(a).
{ok,a}
2> a:sumif(fun a:pos/1, [-4, -2, 0, 2, 4]).
6
List comprehensions make things far simpler:
sumif(F, L) ->
lists:sum([X || X <- L, F(X)]).
Dobert's answer is of cousrse right, problem is your sum for empty list.
If your concern is performance a little bit you should stick to tail recursive solution (in this case it matter because there is not lists:reverse/1 involved).
sumif(F, L) ->
sumif(F, L, 0).
sumif(F, [], Acc) when is_function(F, 1) -> Acc;
sumif(F, [H|T], Acc) ->
New = case F(H) of
true -> H+Acc;
false -> Acc
end,
sumif(F, T, New).
Ways how to make correct function for first parameter:
F1 = fun pos/1, % inside module where pos/1 defined
F2 = fun xyz:pos/1, % exported function from module xyz (hot code swap works)
N = 0,
F3 = fun(X) -> X > N end, % closure
% test it
true = lists:all(fun(F) -> is_function(F, 1) end, [F1, F2, F3]).
There has tow error in your code:
1. sumif(_, []) -> undefined; should return 0, not undefined.
2. when you pass pos(A) -> A > 0. to sumif/2,you should use fun pos/1, please read http://erlang.org/doc/programming_examples/funs.html#id59138
sumif(F, L) ->
lists:foldl(fun(X, Sum) when F(X) -> Sum+X; (_) -> Sum end, 0, L).
You can use lists:foldl.

How to split a list of strings into given number of lists in erlang

Given a list and an integer, I want to split that list into the specified number of lists (inside a list).
For example:
Input:
[1,2,3,4,5,6,7,8,9], 3
Output:
[[1,2,3],[4,5,6],[7,8,9]]
What is a clean and efficient way to do this?
The solution written by Steve Vinoski calls length/1 in guard for each partition which makes it O(N^2). It simply bothers me because it can be done in O(N) and I am performance freak. It can be done in many ways so just for example there is one:
divide(L, N) when is_integer(N), N > 0 ->
divide(N, 0, L, []).
divide(_, _, [], Acc) ->
[lists:reverse(Acc)];
divide(N, N, L, Acc) ->
[lists:reverse(Acc) | divide(N, 0, L, [])];
divide(N, X, [H|T], Acc) ->
divide(N, X+1, T, [H|Acc]).
or as a modification of Steve's solution
divide(L, N) ->
divide(L, N, []).
divide([], _, Acc) ->
lists:reverse(Acc);
divide(L, N, Acc) ->
try lists:split(N, L) of
{H,T} -> divide(T, N, [H|Acc])
catch
error:badarg ->
lists:reverse([L|Acc])
end.
or even simpler:
divide([], _) -> [];
divide(L, N) ->
try lists:split(N, L) of
{H,T} -> [H|divide(T, N)]
catch
error:badarg -> [L]
end.
You can use lists:split/2 for this:
divide(L, N) ->
divide(L, N, []).
divide([], _, Acc) ->
lists:reverse(Acc);
divide(L, N, Acc) when length(L) < N ->
lists:reverse([L|Acc]);
divide(L, N, Acc) ->
{H,T} = lists:split(N, L),
divide(T, N, [H|Acc]).
The first function, divide/2, serves as the entry point. It merely calls the helper function divide/3 with an initial accumulator value of an empty list, and then divide/3 does all the work. The first clause of divide/3 matches when the list has been completely processed, so it just reverses the accumulator and returns that value. The second clause handles the case when the length of L is less than the requested N value; it creates a new accumulator by prepending Acc with L and then returning the reverse of that new accumulator. The third clause first calls lists:split/2 to split the incoming list into H, which is a list of N elements, and T, the remainder of the list. It then calls itself recursively, passing T as the new list value, the original N value, and a new accumulator consisting of H as the first element and the original accumulator, Acc, as the tail.

Splitting a list in equal sized chunks in Erlang

I want to split:
[1,2,3,4,5,6,7,8]
into:
[[1,2],[3,4],[5,6],[7,8]]
It generally works great with:
[ lists:sublist(List, X, 2) || X <- lists:seq(1,length(List),2) ] .
But it is really slow this way. 10000 Elements take amazing 2.5 seconds on my netbook. I have also written a really fast recursive function, but I am simply interested: Could this list comprehension also be written in a different way, so that it is faster?
Try this:
part(List) ->
part(List, []).
part([], Acc) ->
lists:reverse(Acc);
part([H], Acc) ->
lists:reverse([[H]|Acc]);
part([H1,H2|T], Acc) ->
part(T, [[H1,H2]|Acc]).
Test in erlang-shell (I've declared this function in module part):
2> part:part([1,2,3,4,5,6,7,8]).
[[1,2],[3,4],[5,6],[7,8]]
3>
3> timer:tc(part, part, [lists:seq(1,10000)]).
{774,
[[1,2],
[3,4],
[5,6],
[7,8],
"\t\n","\v\f",
[13,14],
[15,16],
[17,18],
[19,20],
[21,22],
[23,24],
[25,26],
[27,28],
[29,30],
[31,32],
"!\"","#$","%&","'(",")*","+,","-.","/0","12","34",
[...]|...]}
Just 774 microseconds (which is ~0,8 milliseconds)
Here are two quick solutions for you that are both flexible. One is easy to read, but only slightly faster than your proposed solution. The other is quite fast, but is a bit cryptic to read. And note that both of my proposed algorithms will work for lists of anything, not just numeric ordered lists.
Here is the "easy-to-read" one. Call by n_length_chunks(List,Chunksize). For example, to get a list of chunks 2 long, call n_length_chunks(List,2). This works for chunks of any size, ie, you could call n_length_chunks(List,4) to get [[1,2,3,4],[5,6,7,8],...]
n_length_chunks([],_) -> [];
n_length_chunks(List,Len) when Len > length(List) ->
[List];
n_length_chunks(List,Len) ->
{Head,Tail} = lists:split(Len,List),
[Head | n_length_chunks(Tail,Len)].
The much faster one is here, but is definitely harder to read, and is called in the same way: n_length_chunks_fast(List,2) (I've made one change to this compared with the one above, in that it pads the end of the list with undefined if the length of the list isn't cleanly divisible by the desired chunk length.
n_length_chunks_fast(List,Len) ->
LeaderLength = case length(List) rem Len of
0 -> 0;
N -> Len - N
end,
Leader = lists:duplicate(LeaderLength,undefined),
n_length_chunks_fast(Leader ++ lists:reverse(List),[],0,Len).
n_length_chunks_fast([],Acc,_,_) -> Acc;
n_length_chunks_fast([H|T],Acc,Pos,Max) when Pos==Max ->
n_length_chunks_fast(T,[[H] | Acc],1,Max);
n_length_chunks_fast([H|T],[HAcc | TAcc],Pos,Max) ->
n_length_chunks_fast(T,[[H | HAcc] | TAcc],Pos+1,Max);
n_length_chunks_fast([H|T],[],Pos,Max) ->
n_length_chunks_fast(T,[[H]],Pos+1,Max).
Tested on my (really old) laptop:
Your proposed solution took about 3 seconds.
My slow-but-readable one was slightly faster and takes about 1.5 seconds (still quite slow)
My fast version takes about 5 milliseconds.
For completeness, Isac's solution took about 180 milliseconds on my same machine.
Edit: wow, I need to read the complete question first. Oh well I'll keep here for posterity if it helps. As far as I can tell, there's not a good way to do this using list comprehensions. Your original version is slow because each iteration of sublist needs to traverse the list each time to get to each successive X, resulting in complexity just under O(N^2).
Or with a fold:
lists:foldr(fun(E, []) -> [[E]];
(E, [H|RAcc]) when length(H) < 2 -> [[E|H]|RAcc] ;
(E, [H|RAcc]) -> [[E],H|RAcc]
end, [], List).
I want to submit slightly complicated but more flexible (and mostly faster) solution of one proposed by #Tilman
split_list(List, Max) ->
element(1, lists:foldl(fun
(E, {[Buff|Acc], C}) when C < Max ->
{[[E|Buff]|Acc], C+1};
(E, {[Buff|Acc], _}) ->
{[[E],Buff|Acc], 1};
(E, {[], _}) ->
{[[E]], 1}
end, {[], 0}, List)).
so function part can be implemented as
part(List) ->
RevList = split_list(List, 2),
lists:foldl(fun(E, Acc) ->
[lists:reverse(E)|Acc]
end, [], RevList).
update
I've added reverse in case if you want to preserve order, but as I can see it adds no more than 20% of processing time.
You could do it like this:
1> {List1, List2} = lists:partition(fun(X) -> (X rem 2) == 1 end, List).
{[1,3,5|...],[2,4,6|...]}
2> lists:zipwith(fun(X, Y) -> [X, Y] end, List1, List2).
[[1,2],[3,4],[5,6]|...]
This takes ~73 milliseconds with a 10000 elements List on my computer. The original solution takes ~900 miliseconds.
But I would go with the recursive function anyway.
I was looking for a partition function which can split a large list to small amount of workers. With lkuty's partition you might get that one worker gets almost double work than all the others. If that's not what you want, here is a version which sublist lengths differ by at most 1.
Uses PropEr for testing.
%% #doc Split List into sub-lists so sub-lists lengths differ most by 1.
%% Does not preserve order.
-spec split_many(pos_integer(), [T]) -> [[T]] when T :: term().
split_many(N, List) ->
PieceLen = length(List) div N,
lists:reverse(split_many(PieceLen, N, List, [])).
-spec split_many(pos_integer(), pos_integer(), [T], [[T]]) ->
[[T]] when T :: term().
split_many(PieceLen, N, List, Acc) when length(Acc) < N ->
{Head, Tail} = lists:split(PieceLen, List),
split_many(PieceLen, N, Tail, [Head|Acc]);
split_many(_PieceLen, _N, List, Acc) ->
% Add an Elem to each list in Acc
{Appendable, LeaveAlone} = lists:split(length(List), Acc),
Appended = [[Elem|XS] || {Elem, XS} <- lists:zip(List, Appendable)],
lists:append(Appended, LeaveAlone).
Tests:
split_many_test_() ->
[
?_assertEqual([[1,2]], elibs_lists:split_many(1, [1,2])),
?_assertEqual([[1], [2]], elibs_lists:split_many(2, [1,2])),
?_assertEqual([[1], [3,2]], elibs_lists:split_many(2, [1,2,3])),
?_assertEqual([[1], [2], [4,3]], elibs_lists:split_many(3, [1,2,3,4])),
?_assertEqual([[1,2], [5,3,4]], elibs_lists:split_many(2, [1,2,3,4,5])),
?_assert(proper:quickcheck(split_many_proper1())),
?_assert(proper:quickcheck(split_many_proper2()))
].
%% #doc Verify all elements are preserved, number of groups is correct,
%% all groups have same number of elements (+-1)
split_many_proper1() ->
?FORALL({List, Groups},
{list(), pos_integer()},
begin
Split = elibs_lists:split_many(Groups, List),
% Lengths of sub-lists
Lengths = lists:usort(lists:map(fun erlang:length/1, Split)),
length(Split) =:= Groups andalso
lists:sort(lists:append(Split)) == lists:sort(List) andalso
length(Lengths) =< 2 andalso
case Lengths of
[Min, Max] -> Max == Min + 1;
[_] -> true
end
end
).
%% #doc If number of groups is divisable by number of elements, ordering must
%% stay the same
split_many_proper2() ->
?FORALL({Groups, List},
?LET({A, B},
{integer(1, 20), integer(1, 10)},
{A, vector(A*B, term())}),
List =:= lists:append(elibs_lists:split_many(Groups, List))
).
Here is a more general answer that works with any sublist size.
1> lists:foreach(fun(N) -> io:format("~2.10.0B -> ~w~n",[N, test:partition([1,2,3,4,5,6,7,8,9,10],N)] ) end, [1,2,3,4,5,6,7,8,9,10]).
01 -> [[1],[2],[3],[4],[5],[6],[7],[8],[9],[10]]
02 -> [[1,2],[3,4],[5,6],[7,8],[9,10]]
03 -> [[1,2,3],[4,5,6],[7,8,9],[10]]
04 -> [[1,2,3,4],[5,6,7,8],[10,9]]
05 -> [[1,2,3,4,5],[6,7,8,9,10]]
06 -> [[1,2,3,4,5,6],[10,9,8,7]]
07 -> [[1,2,3,4,5,6,7],[10,9,8]]
08 -> [[1,2,3,4,5,6,7,8],[10,9]]
09 -> [[1,2,3,4,5,6,7,8,9],[10]]
10 -> [[1,2,3,4,5,6,7,8,9,10]]
And the code to achieve this is stored inside a file called test.erl:
-module(test).
-compile(export_all).
partition(List, N) ->
partition(List, 1, N, []).
partition([], _C, _N, Acc) ->
lists:reverse(Acc) ;
partition([H|T], 1, N, Acc) ->
partition(T, 2, N, [[H]|Acc]) ;
partition([H|T], C, N, [HAcc|TAcc]) when C < N ->
partition(T, C+1, N, [[H|HAcc]|TAcc]) ;
partition([H|T], C, N, [HAcc|TAcc]) when C == N ->
partition(T, 1, N, [lists:reverse([H|HAcc])|TAcc]) ;
partition(L, C, N, Acc) when C > N ->
partition(L, 1, N, Acc).
It could probably be more elegant regarding the special case where C > N. Note that C is the size of the current sublist being constructed. At start, it is 1. And then it increments until it reaches the partition size of N.
We could also use a modified version of #chops code to let the last list contains the remaining items even if its size < N :
-module(n_length_chunks_fast).
-export([n_length_chunks_fast/2]).
n_length_chunks_fast(List,Len) ->
SkipLength = case length(List) rem Len of
0 -> 0;
N -> Len - N
end,
n_length_chunks_fast(lists:reverse(List),[],SkipLength,Len).
n_length_chunks_fast([],Acc,_Pos,_Max) -> Acc;
n_length_chunks_fast([H|T],Acc,Pos,Max) when Pos==Max ->
n_length_chunks_fast(T,[[H] | Acc],1,Max);
n_length_chunks_fast([H|T],[HAcc | TAcc],Pos,Max) ->
n_length_chunks_fast(T,[[H | HAcc] | TAcc],Pos+1,Max);
n_length_chunks_fast([H|T],[],Pos,Max) ->
n_length_chunks_fast(T,[[H]],Pos+1,Max).
I've slightly altered the implementation from #JLarky to remove the guard expression, which should be slightly faster:
split_list(List, Max) ->
element(1, lists:foldl(fun
(E, {[Buff|Acc], 1}) ->
{[[E],Buff|Acc], Max};
(E, {[Buff|Acc], C}) ->
{[[E|Buff]|Acc], C-1};
(E, {[], _}) ->
{[[E]], Max}
end, {[], Max}, List)).

Erlang - list comprehensions - populating records

I have a simple record structure consisting of a header (H) and a list of the data lines (D) 1:N. All header lines must start with a digit. All data lines have a leading whitespace. There also might be some empty lines (E) in between that must be ignored.
L = [H, D, D, E, H, D, E, H, D, D, D].
I would like to create a list of records:
-record(posting,{header,data}).
using list comprehension. Whats the best way to do it?
You must use lists:foldl/3 instead of list comprehensions in this case. With foldl/3 you can accumulate values of header and data through whole list L.
You should do something like this:
make_records(L) when is_list(L) ->
F = fun([32|_]=D,{#posting{}=H,Acc}) -> {H,[H#posting{data=D}|Acc]};
([], Acc) -> Acc;
([F|_]=H, {_,Acc}) when F=<$0, F>=$9 -> {#posting{header=>H}, Acc}
end,
{_, R} = lists:foldl(F, {undefined, []}, L),
R.
Anyway I think that straightforward Erlang version doesn't seems too complicated and should be little bit faster.
make_records2(L) when is_list(L) ->
make_records2(L, undefined, []).
make_records2([], _, R) -> R;
make_records2([[32|_]=D|T], H, Acc) when is_list(H) ->
make_records2(T, H, [#posting{header=H,data=D}|Acc]);
make_records2([[]|T], H, Acc) ->
make_records2(T, H, Acc);
make_records2([[F|_]=H|T], _, Acc) when F>=$0, F=<$9 ->
make_records2(T, H, Acc).
Edit: If you have to add better row classification or parsing, adding new function is better because it improves readability.
parse_row([Digit|_]=R) when Digit >= $0, Digit =< $9 -> {header, R};
parse_row(R) -> try_spaces(R).
try_spaces([]) -> empty;
try_spaces([Sp|R]) when Sp=:=$\s; Sp=:=$\t; Sp=:=$\n ->
try_spaces(R); % skip all white spaces from Data field
try_spaces(Data) -> {data, Data}.
You can use it like this:
make_records(L) when is_list(L) ->
F = fun(Row, {H, Acc}) ->
case parse_row(Row) of
{data, D} when is_record(H, posting) -> {H,[H#posting{data=D}|Acc]};
empty -> Acc;
{header, H} -> {#posting{header=>H}, Acc}
end,
{_, R} = lists:foldl(F, {undefined, []}, L),
R.
Tail recursive native Erlang solution:
make_records2(L) when is_list(L) ->
make_records2([parse_row(R) || R<-L], undefined, []).
make_records2([], _, R) -> R;
make_records2([{data, D}|T], H, Acc) when is_list(H) ->
make_records2(T, H, [#posting{header=H,data=D}|Acc]);
make_records2([empty|T], H, Acc) ->
make_records2(T, H, Acc);
make_records2([{header,H}|T], _, Acc) ->
make_records2(T, H, Acc).
I think that there is no reason use tail recursion from performance point of view:
make_records3(L) when is_list(L) ->
make_records3(L, undefined).
make_records3([], _) -> [];
make_records3([R|T], H) ->
case parse_row(R) of
{data, D} when is_list(H) -> [#posting{head=H,data=D}|make_records3(T, H)];
empty -> make_records3(T, H);
{header, H2} -> make_records3(T, H2)
end.
... and many many other variants.
I needed to collapse all Data lines beneath the header - so for the moment here is what I have:
sanitize(S) -> trim:trim(S).
make_records(L) when is_list(L) -> make_records(L, undefined, []).
make_records([], _, R) -> lists:reverse(R);
make_records([[32|_]=D|T], H, Acc) when is_tuple(H) ->
make_records(T, {element(1,H),[sanitize(D)|element(2,H)]},Acc);
make_records([[$\n|_]=D|T], H, Acc) when is_tuple(H) ->
make_records(T, H, Acc);
make_records([[F|_]=H|T], B, Acc) when F>=$0, F=<$9 ->
if is_tuple(B) ->
make_records(T, {sanitize(H),[]}, [#posting{header=element(1,B),
data=lists:reverse(element(2,B))}|Acc]);
true ->
make_records(T, {sanitize(H),[]}, Acc)
end.

Resources