How to handle text tables with FastParse? - parsing

I have text file with single row table (tab separated) and I need to parse it to receive Map("one" -> 1, "two" -> 2, "three" -> 3). I can't figure out how to do it and even not sure that it is possible at all. Any ideas guys?
one two three
1 2 3

Ok, I've figured out how to do it by myself.
val lines = Source.fromResource("test.txt").getLines().mkString("\r\n")
def sentence[_: P] = P(CharIn("0-9", "a-z").rep(1).!)
def tableHeader[_: P] = P((sentence.! ~ "\t".?).rep ~ lineSeparator)
def tableRow[_: P](h: Seq[String]) = P((sentence.! ~ "\t".?).rep ~ (lineSeparator | End))
.map(r => println(h.zip(r).toMap))
def singleRowTable[_: P] = P(tableHeader.flatMap(tableRow))
def lineSeparator[_: P] = P("\r\n" | "\r" | "\n")
def parseA[_: P] = P(singleRowTable)
parse(lines, parseA(_), true) match {
case Parsed.Success(value, successIndex) =>
println("Success value=" + value +" successIndex=" + successIndex)
case f # Parsed.Failure(label, index, extra) =>
println("Failure " + f.trace(true))
}
It will print
Map(one -> 1, two -> 2, three -> 3)
Success value=() successIndex=20

Related

Why and/or where is the nil value?

I'm using geany to run this. The problem is, the console is returning that I'm trying to index a nil value but I don't know if it is in line 63, 80, or even in another line. I'm really a beginner in programming so I don't know and understand many things if you could keep it simple I would be grateful.
Here's the error message:
lua: TTC.Lua:65: attempt to index a nil vallue (field'?')
stack traceback:
TTC.Lua:65: in function 'preenche_tabuleiro'
TTC.Lua:82: in main chunk
[C]:in ?
Here's the code:
function cria_tabuleiro()
return {{_, _, _}, {_, _, _}, {_, _, _}}
end
function pula_linha(_)
for __ = 0, _ do print() end
end
function abertura_do_jogo()
pula_linha(7)
print("\t-_-_-_-_-_-_-_-_-_-_-_-_-")
print("\t- TIC-TAC-TOE -")
print("\t-_-_-_-_-_-_-_-_-_-_-_-_-")
end
function checa_OS()
os.getenv("HOME")
if home == nil then return "|", "--", " " end
return "↓", "→ ", "."
end
function recebe_nomes()
jogs = {}
for _ = 1,2 do
msg = ("Digite o nome do jogador numero %s: ")
io.write(msg:format(_))
table.insert(jogs, io.read())
end
return jogs
end
function imprimir_tabuleiro(T, SB, SD)
abertura_do_jogo()
pula_linha(2)
print(string.format("\t\t A B C\n\t\t %s %s %s" , SB, SB, SB))
for _ = 1,3 do
io.write(string.format("\t\t %s%s", _, SD))
print(table.concat(T[_], " "))
end
pula_linha(5)
end
function ler_jogada(JOGADORES, X)
jogada = {}
checa_jog = function(jog)
coluna = string.byte(jog:upper()) - 64
linha = tonumber(jog:sub(2))
if coluna >= 1 and coluna <= 3 and linha >= 1 and linha <= 3 then
return coluna, linha
else
print("Sua jogada foi invalida, tente novamente")
ler_jogada(jogadores, X)
end
end
pula_linha(2)
io.write(string.format("%s, digite sua jogada (EX:B3, A2, ETC...) : ", JOGADORES[X]))
table.insert(jogada, io.read())
col, lin = checa_jog(jogada[1])
end
function preenche_tabuleiro(tabuleiro, POS_VAZIA, PECAS, jogadores, _, COL, LIN)
if tabuleiro[LIN][COL] == POS_VAZIA then
tabuleiro[LIN][COL] = PECAS
else
msg = "%s, voce tentou uma posicao ja preenchida. jogue novamente."
print(msg:format(jogadores[_]))
preenche_tabuleiro(tabuleiro, POS_VAZIA, PECAS, jogadores, _, ler_jogada(jogadores, _))
return tabuleiro
end
end
abertura_do_jogo()
SETA_BAIXO, SETA_DIREITA, POS_VAZIA = checa_OS()
tabuleiro = cria_tabuleiro(POS_VAZIA)
jogadores = recebe_nomes()
PECAS = {"X", "O"}
imprimir_tabuleiro(tabuleiro, SETA_BAIXO, SETA_DIREITA)
for _ in pairs(jogadores) do
preenche_tabuleiro(tabuleiro, POS_VAZIA, PECAS[_], jogadores, _, ler_jogada(jogadores, _))
end
ler_jogada does not return your lin and col values but you're using it here, as if it did:
preenche_tabuleiro(tabuleiro, POS_VAZIA, PECAS[_], jogadores, _, ler_jogada(jogadores, _))
ler_jogada sets global values:
col, lin = checa_jog(jogada[1])
so when you pass it as the last param to preenche_tabuleiro you will get nil values for COL and LIN
If you add a print before line 64, you can see the issue: print(LIN, COL, lin, col)

convert a string into a map in Elixir

I have a string something like this:
### image_date: 23/01/2019 ###
pool2 wxcs 2211
pool3 wacs 1231
### line_count: 1 ###
I want to convert this to a map, something like:
%{
image_data: "23/01/2019",
content: "pool2 wxcs 2211\npool3 wacs 1231",
line_count: 1
}
Can anyone help me with this?
One might use Regex.scan/3:
for [capture] <- Regex.scan(~r/(?<=###).*?(?=###)/mus, str), into: %{} do
case capture |> String.split(":") |> Enum.map(&String.trim/1) do
[name, value] -> {name, value}
[content] -> {"content", content}
end
end
resulting in:
#⇒ %{
# "content" => "pool2 wxcs 2211\n pool3 wacs 1231",
# "image_date" => "23/01/2019",
# "line_count" => "1"
# }
It ain't pretty but it does the job.
defmodule UglyParser do
def main do
str = """
### image_date: 23/01/2019 ###
pool2 wxcs 2211
pool3 wacs 1231
### line_count: 1 ###
"""
[header, content, footer] = String.split(str, ~r/(?:#\s*\n)|(?:\n\s*#)/, trim: true)
header = to_pair(header)
footer = to_pair(footer)
content = {:content, String.trim(content) |> String.replace(~r/\n\s*/, "\n")}
Enum.into([header, footer, content], %{})
end
defp to_pair(str) do
String.replace(str, "#", "")
|> String.trim()
|> String.split(": ")
|> (fn [key, value] -> {String.to_atom(key), value} end).()
end
end

problems with Lua match to find a pattern

I'm struggling with this problem:
Given 2 strings:
s1 = '/foo/:bar/oof/:rab'
s2 = '/foo/lua/oof/rocks'
I would like to produce the following information:
If they match (these two above should match, s2 follows a pattern described in s1).
A table holding the values of s2 in with the corresponding name in s1. In this case we would have: { bar = "lua", rab = "rocks" }
I think this algorithm solves it, but I can't figure how to implement it (probably with gmatch):
store the placeholders : indexes as KEYS of a table, and the respective VALUES being the name of these placeholders.
Example with s1:
local aux1 = { "6" = "bar", "15" = "rab" }
With the keys of aux1 fetched as indexes, extract the values of s2
into another table:
local aux2 = {"6" = "lua", "15" = "rocks"}
Finally merge them two into one table (this one is easy :P)
{ bar = "lua", rab = "rocks" }
Something like this maybe:
function comp(a,b)
local t = {}
local i, len_a = 0
for w in (a..'/'):gmatch('(.-)/') do
i = i + 1
if w:sub(1,1) == ':' then
t[ -i ] = w:sub(2)
else
t[ i ] = w
end
end
len_a = i
i = 0
local ans = {}
for w in (b..'/'):gmatch('(.-)/') do
i = i + 1
if t[ i ] and t[ i ] ~= w then
return {}
elseif t[ -i ] then
ans[ t[ -i ] ] = w
end
end
if len_a ~= i then return {} end
return ans
end
s1 = '/foo/:bar/oof/:rab'
s2 = '/foo/lua/oof/rocks'
for k,v in pairs(comp(s1,s2)) do print(k,v) end
Another solution could be:
s1 = '/foo/:bar/oof/:rab'
s2 = '/foo/lua/oof/rocks'
pattern = "/([^/]+)"
function getStrngTable(_strng,_pattern)
local t = {}
for val in string.gmatch(_strng,_pattern) do
table.insert(t,val)
end
return t
end
local r = {}
t1 = getStrngTable(s1,pattern)
t2 = getStrngTable(s2,pattern)
for k = 1,#t1 do
if (t1[k] == t2[k]) then
r[t1[k + 1]:match(":(.+)")] = t2[k + 1]
end
end
The Table r will have the required result
The solution below, which is some what cleaner, will also give the same result:
s1 = '/foo/:bar/oof/:rab'
s2 = '/foo/lua/oof/rocks'
pattern = "/:?([^/]+)"
function getStrng(_strng,_pattern)
local t = {}
for val in string.gmatch(_strng,_pattern) do
table.insert(t,val)
end
return t
end
local r = {}
t1 = getStrng(s1,pattern)
t2 = getStrng(s2,pattern)
for k = 1,#t1 do
if (t1[k] == t2[k]) then
r[t1[k + 1]] = t2[k + 1]
end
end

Parsing a TeX-like language with lpeg

I am struggling to get my head around LPEG. I have managed to produce one grammar which does what I want, but I have been beating my head against this one and not getting far. The idea is to parse a document which is a simplified form of TeX. I want to split a document into:
Environments, which are \begin{cmd} and \end{cmd} pairs.
Commands which can either take an argument like so: \foo{bar} or can be bare: \foo.
Both environments and commands can have parameters like so: \command[color=green,background=blue]{content}.
Other stuff.
I also would like to keep track of line number information for error handling purposes. Here's what I have so far:
lpeg = require("lpeg")
lpeg.locale(lpeg)
-- Assume a lot of "X = lpeg.X" here.
-- Line number handling from http://lua-users.org/lists/lua-l/2011-05/msg00607.html
-- with additional print statements to check they are working.
local newline = P"\r"^-1 * "\n" / function (a) print("New"); end
local incrementline = Cg( Cb"linenum" )/ function ( a ) print("NL"); return a + 1 end , "linenum"
local setup = Cg ( Cc ( 1) , "linenum" )
nl = newline * incrementline
space = nl + lpeg.space
-- Taken from "Name-value lists" in http://www.inf.puc-rio.br/~roberto/lpeg/
local identifier = (R("AZ") + R("az") + P("_") + R("09"))^1
local sep = lpeg.S(",;") * space^0
local value = (1-lpeg.S(",;]"))^1
local pair = lpeg.Cg(C(identifier) * space ^0 * "=" * space ^0 * C(value)) * sep^-1
local list = lpeg.Cf(lpeg.Ct("") * pair^0, rawset)
local parameters = (P("[") * list * P("]")) ^-1
-- And the rest is mine
anything = C( (space^1 + (1-lpeg.S("\\{}")) )^1) * Cb("linenum") / function (a,b) return { text = a, line = b } end
begin_environment = P("\\begin") * Ct(parameters) * P("{") * Cg(identifier, "environment") * Cb("environment") * P("}") / function (a,b) return { params = a[1], environment = b } end
end_environment = P("\\end{") * Cg(identifier) * P("}")
texlike = lpeg.P{
"document";
document = setup * V("stuff") * -1,
stuff = Cg(V"environment" + anything + V"bracketed_stuff" + V"command_with" + V"command_without")^0,
bracketed_stuff = P"{" * V"stuff" * P"}" / function (a) return a end,
command_with =((P("\\") * Cg(identifier) * Ct(parameters) * Ct(V"bracketed_stuff"))-P("\\end{")) / function (i,p,n) return { command = i, parameters = p, nodes = n } end,
command_without = (( P("\\") * Cg(identifier) * Ct(parameters) )-P("\\end{")) / function (i,p) return { command = i, parameters = p } end,
environment = Cg(begin_environment * Ct(V("stuff")) * end_environment) / function (b,stuff, e) return { b = b, stuff = stuff, e = e} end
}
It almost works!
> texlike:match("\\foo[one=two]thing\\bar")
{
command = "foo",
parameters = {
{
one = "two",
},
},
}
{
line = 1,
text = "thing",
}
{
command = "bar",
parameters = {
},
}
But! First, I can't get the line number handling part to work at all. The function within incrementline is never fired.
I also can't quite work out how nested capture information is passed to handling functions (which is why I have scattered Cg, C and Ct semirandomly over the grammar). This means that only one item is returned from within a command_with:
> texlike:match("\\foo{text \\command moretext}")
{
command = "foo",
nodes = {
{
line = 1,
text = "text ",
},
},
parameters = {
},
}
I would also love to be able to check that the environment start and ends match up but when I tried to do so, my back references from "begin" were not in scope by the time I got to "end". I don't know where to go from here.
Late answer but hopefully it'll offer some insight if you're still looking for a solution or wondering what the problem was.
There are a couple of issues with your grammar, some of which can be tricky to spot.
Your line increment here looks incorrect:
local incrementline = Cg( Cb"linenum" ) /
function ( a ) print("NL"); return a + 1 end,
"linenum"
It looks like you meant to create a named capture group and not an anonymous group. The backcapture linenum is essentially being used like a variable. The problem is because this is inside an anonymous capture, linenum will not update properly -- function(a) will always receive 1 when called. You need to move the closing ) to the end so "linenum" is included:
local incrementline = Cg( Cb"linenum" /
function ( a ) print("NL"); return a + 1 end,
"linenum")
Relevant LPeg documentation for Cg capture.
The second problem is with your anything non-terminal rule:
anything = C( (space^1 + (1-lpeg.S("\\{}")) )^1) * Cb("linenum") ...
There are several things to be careful here. First, a named Cg capture (from incrementline rule once it's fixed) doesn't produce anything unless it's in a table or you backref it. The second major thing is that it has an adhoc scope like a variable. More precisely, its scope ends once you close it in an outer capture -- like what you're doing here:
C( (space^1 + (...) )^1)
Which means by the time you reference its backcapture with * Cb("linenum"), that's already too late -- the linenum you really want already closed its scope.
I always found LPeg's re syntax a bit easier to grok so I've rewritten the grammar with that instead:
local grammar_cb =
{
fold = pairfold,
resetlinenum = resetlinenum,
incrementlinenum = incrementlinenum, getlinenum = getlinenum,
error = error
}
local texlike_grammar = re.compile(
[[
document <- '' -> resetlinenum {| docpiece* |} !.
docpiece <- {| envcmd |} / {| cmd |} / multiline
beginslash <- cmdslash 'begin'
endslash <- cmdslash 'end'
envcmd <- beginslash paramblock? {:beginenv: envblock :} (!endslash docpiece)*
endslash openbrace {:endenv: =beginenv :} closebrace / &beginslash {} -> error .
envblock <- openbrace key closebrace
cmd <- cmdslash {:command: identifier :} (paramblock? cmdblock)?
cmdblock <- openbrace {:nodes: {| docpiece* |} :} closebrace
paramblock <- opensq ( {:parameters: {| parampairs |} -> fold :} / whitesp) closesq
parampairs <- parampair (sep parampair)*
parampair <- key assign value
key <- whitesp { identifier }
value <- whitesp { [^],;%s]+ }
multiline <- (nl? text)+
text <- {| {:text: (!cmd !closebrace !%nl [_%w%p%s])+ :} {:line: '' -> getlinenum :} |}
identifier <- [_%w]+
cmdslash <- whitesp '\'
assign <- whitesp '='
sep <- whitesp ','
openbrace <- whitesp '{'
closebrace <- whitesp '}'
opensq <- whitesp '['
closesq <- whitesp ']'
nl <- {%nl+} -> incrementlinenum
whitesp <- (nl / %s)*
]], grammar_cb)
The callback functions are straight-forwardly defined as:
local function pairfold(...)
local t, kv = {}, ...
if #kv % 2 == 1 then return ... end
for i = #kv, 2, -2 do
t[ kv[i - 1] ] = kv[i]
end
return t
end
local incrementlinenum, getlinenum, resetlinenum do
local line = 1
function incrementlinenum(nl)
assert(not nl:match "%S")
line = line + #nl
end
function getlinenum() return line end
function resetlinenum() line = 1 end
end
Testing the grammar with a non-trivial tex-like str with multiple lines:
local test1 = [[\foo{text \bar[color = red, background = black]{
moretext \baz{
even
more text} }
this time skipping multiple
lines even, such wow!}]]
Produces the follow AST in lua-table format:
{
command = "foo",
nodes = {
{
text = "text",
line = 1
},
{
parameters = {
color = "red",
background = "black"
},
command = "bar",
nodes = {
{
text = " moretext",
line = 2
},
{
command = "baz",
nodes = {
{
text = "even ",
line = 3
},
{
text = "more text",
line = 4
}
}
}
}
},
{
text = "this time skipping multiple",
line = 7
},
{
text = "lines even, such wow!",
line = 9
}
}
}
And a second test for begin/end environments:
local test2 = [[\begin[p1
=apple,
p2=blue]{scope} scope foobar
\end{scope} global foobar]]
Which seems to give approximately what you're looking for:
{
{
{
text = " scope foobar",
line = 3
},
parameters = {
p1 = "apple",
p2 = "blue"
},
beginenv = "scope",
endenv = "scope"
},
{
text = " global foobar",
line = 4
}
}

Lua table.concat

Is there a way to use the arg 2 value of table.concat to represent the current table index?
eg:
t = {}
t[1] = "a"
t[2] = "b"
t[3] = "c"
X = table.concat(t,"\n")
desired output of table concat (X):
"1 a\n2 b\n3 c\n"
Simple answer : no.
table.concat is something really basic, and really fast.
So you should do it in a loop anyhow.
If you want to avoid excessive string concatenation you can do:
function concatIndexed(tab,template)
template = template or '%d %s\n'
local tt = {}
for k,v in ipairs(tab) do
tt[#tt+1]=template:format(k,v)
end
return table.concat(tt)
end
X = concatIndexed(t) -- and optionally specify a certain per item format
Y = concatIndexed(t,'custom format %3d %s\n')
I don't think so: how would you tell it that the separator between keys and values is supposed to be a space, for example?
You can write a general mapping function to do what you'd like:
function map2(t, func)
local out = {}
for k, v in pairs(t) do
out[k] = func(k, v)
end
return out
end
function joinbyspace(k, v)
return k .. ' ' .. v
end
X = table.concat(map2(t, joinbyspace), "\n")
No. But there is a work around:
local n = 0
local function next_line_no()
n = n + 1
return n..' '
end
X = table.concat(t,'\0'):gsub('%f[%Z]',next_line_no):gsub('%z','\n')
function Util_Concat(tab, seperator)
if seperator == nil then return table.concat(tab) end
local buffer = {}
for i, v in ipairs(tab) do
buffer[#buffer + 1] = v
if i < #tab then
buffer[#buffer + 1] = seperator
end
end
return table.concat(buffer)
end
usage tab is where the table input is and seperator be both nil or string (if it nil it act like ordinary table.concat)
print(Util_Concat({"Hello", "World"}, "_"))
--Prints
--Hello_world

Resources