Parsing nested indented text into lists - parsing

Parsing nested indented text into lists
Hi,
maybe someone can give me a start help.
I have nested indented txt similar to this. I should parse that into a nested list structure like
TXT = r"""
Test1
NeedHelp
GotStuck
Sometime
NoLuck
NeedHelp2
StillStuck
GoodLuck
"""
Nested_Lists = ['Test1',
['NeedHelp',
['GotStuck',
['Sometime',
'NoLuck']]],
['NeedHelp2',
['StillStuck',
'GoodLuck']]
]
Nested_Lists = ['Test1', ['NeedHelp', ['GotStuck', ['Sometime', 'NoLuck']]], ['NeedHelp2', ['StillStuck', 'GoodLuck']]]
Any help for python3 would be appriciated

You could exploit Python tokenizer to parse the indented text:
from tokenize import NAME, INDENT, DEDENT, tokenize
def parse(file):
stack = [[]]
lastindent = len(stack)
def push_new_list():
stack[-1].append([])
stack.append(stack[-1][-1])
return len(stack)
for t in tokenize(file.readline):
if t.type == NAME:
if lastindent != len(stack):
stack.pop()
lastindent = push_new_list()
stack[-1].append(t.string) # add to current list
elif t.type == INDENT:
lastindent = push_new_list()
elif t.type == DEDENT:
stack.pop()
return stack[-1]
Example:
from io import BytesIO
from pprint import pprint
pprint(parse(BytesIO(TXT.encode('utf-8'))), width=20)
Output
['Test1',
['NeedHelp',
['GotStuck',
['Sometime',
'NoLuck']]],
['NeedHelp2',
['StillStuck',
'GoodLuck']]]

I hope you can understand my solution. If not, ask.
def nestedbyindent(string, indent_char=' '):
splitted, i = string.splitlines(), 0
def first_non_indent_char(string):
for i, c in enumerate(string):
if c != indent_char:
return i
return -1
def subgenerator(indent):
nonlocal i
while i < len(splitted):
s = splitted[i]
title = s.lstrip()
if not title:
i += 1
continue
curr_indent = first_non_indent_char(s)
if curr_indent < indent:
break
elif curr_indent == indent:
i += 1
yield title
else:
yield list(subgenerator(curr_indent))
return list(subgenerator(-1))
>>> nestedbyindent(TXT)
['Test1', ['NeedHelp', ['GotStuck', ['Sometime', 'NoLuck']],
'NeedHelp2',['StillStuck', 'GoodLuck']]]

Here is the answer that is very non-Pythonic and verbose way. But it seems to work.
TXT = r"""
Test1
NeedHelp
GotStuck
Sometime
NoLuck
NeedHelp2
StillStuck
GoodLuck
"""
outString = '['
level = 0
first = 1
for i in TXT.split("\n")[1:]:
count = 0
for j in i:
if j!=' ':
break
count += 1
count /= 4 #4 space = 1 indent
if i.lstrip()!='':
itemStr = "'" + i.lstrip() + "'"
else:
itemStr = ''
if level < count:
if first:
outString += '['*(count - level) + itemStr
first = 0
else:
outString += ',' + '['*(count - level) + itemStr
elif level > count:
outString += ']'*(level - count) + ',' + itemStr
else:
if first:
outString += itemStr
first = False
else:
outString += ',' + itemStr
level = count
if len(outString)>1:
outString = outString[:-1] + ']'
else:
outString = '[]'
output = eval(outString)
#['Test1', ['NeedHelp', ['GotStuck', ['Sometime', 'NoLuck']], 'NeedHelp2', ['StillStuck', 'GoodLuck']]]

Riffing off of this answer, if entire lines want to be retained and if those lines consist of more than just variable names, t.type == NAME can be substituted with t.type == NEWLINE, and that if-statement can append the stripped line instead of the t.string. Something like this:
from tokenize import NEWLINE, INDENT, DEDENT, tokenize
def parse(file):
stack = [[]]
lastindent = len(stack)
def push_new_list():
stack[-1].append([])
stack.append(stack[-1][-1])
return len(stack)
for t in tokenize(file.readline):
if t.type == NEWLINE:
if lastindent != len(stack):
stack.pop()
lastindent = push_new_list()
stack[-1].append(t.line.strip()) # add entire line to current list
elif t.type == INDENT:
lastindent = push_new_list()
elif t.type == DEDENT:
stack.pop()
return stack[-1]
Otherwise, the lines get split on any token, where a token includes spaces, parentheses, brackets, etc.

Related

Shunting-yard with functions

I'm trying to parse a text with the Shunting-yard algorithm but I've come across a problem. I don't know where to start parsing functions.
This is my goal: print('Hello ' + in())
The current tokens:
[ID print, LPAREN, STRING "Hello ", PLUS, ID in, LPAREN, RPAREN, RPAREN]
My current parser:
class Parser:
def __init__(self, tokens:list, variables:dict):
self.tokens = tokens
self.idx = -1
self.tok = None
self.variables = variables
self.value_stack = []
self.operator_stack = []
self.next_token()
def next_token(self):
self.idx += 1
self.tok = self.tokens[self.idx] if self.idx < len(self.tokens) else None
def pop(self):
newop = self.operator_stack.pop()
val1 = self.value_stack.pop()
val2 = self.value_stack.pop()
self.value_stack.append(eval_(val1, val2, newop))
def parse(self):
while self.tok:
if self.tok.type in VALUE_TYPE:
self.value_stack.append(self.tok.value)
elif self.tok.type == TT_ID:
self.id()
continue
elif self.tok.type == TT_LPAREN:
self.operator_stack.append(TT_LPAREN)
elif self.tok.type in OPERATORS:
op = self.tok.type
while self.operator_stack and PRESCEDENCE.get(self.operator_stack[-1], 0) >= PRESCEDENCE.get(op, 0):
self.pop()
self.operator_stack.append(op)
elif self.tok.type == TT_RPAREN:
while self.operator_stack and self.operator_stack[-1] != TT_LPAREN:
self.pop()
self.operator_stack.pop()
self.next_token()
while self.operator_stack:
self.pop()
return self.value_stack[-1] if self.value_stack else None
def id(self):
tok = self.tok
self.next_token()
if not self.tok: return
if self.tok.type == TT_EQUALS:
self.next_token()
self.variables[tok.value] = self.parse()
elif self.tok.type == TT_LPAREN:
self.operator_stack.append(tok.value)
else:
self.value_stack.append(self.variables.get(tok.value, 'null'))
How would I implement function handling? Every time I try to execute a function I get this error:
Traceback (most recent call last):
File "lang.py", line 19, in <module>
out = evaluate(text, variables)
File "lang.py", line 10, in evaluate
parser.parse()
File "parsing.py", line 85, in parse
self.pop()
File "parsing.py", line 52, in pop
val2 = self.value_stack.pop()
IndexError: pop from empty list
Any help is appreciated.

Is it possible to dump the EBNF/BNF grammar table of a pyparsing object?

Preface: this may be an stupid uniformed question.
I have a grammar I wrote with the pyparsing library (and the help of stack-overflow posts) that parses nested expressions with parenthesis, curly, and square brackets. I'm curious what productions in a grammar table would look like. I was wondering if there was a way to automatically generate this for an arbitrary pyparsing context free grammar.
For reference the pyparsing grammer is defined here:
def parse_nestings(string, only_curl=False):
r"""
References:
http://stackoverflow.com/questions/4801403/pyparsing-nested-mutiple-opener-clo
CommandLine:
python -m utool.util_gridsearch parse_nestings:1 --show
Example:
>>> from utool.util_gridsearch import * # NOQA
>>> import utool as ut
>>> string = r'lambda u: sign(u) * abs(u)**3.0 * greater(u, 0)'
>>> parsed_blocks = parse_nestings(string)
>>> recombined = recombine_nestings(parsed_blocks)
>>> print('PARSED_BLOCKS = ' + ut.repr3(parsed_blocks, nl=1))
>>> print('recombined = %r' % (recombined,))
>>> print('orig = %r' % (string,))
PARSED_BLOCKS = [
('nonNested', 'lambda u: sign'),
('paren', [('ITEM', '('), ('nonNested', 'u'), ('ITEM', ')')]),
('nonNested', '* abs'),
('paren', [('ITEM', '('), ('nonNested', 'u'), ('ITEM', ')')]),
('nonNested', '**3.0 * greater'),
('paren', [('ITEM', '('), ('nonNested', 'u, 0'), ('ITEM', ')')]),
]
Example:
>>> from utool.util_gridsearch import * # NOQA
>>> import utool as ut
>>> string = r'\chapter{Identification \textbf{foobar} workflow}\label{chap:application}'
>>> parsed_blocks = parse_nestings(string)
>>> print('PARSED_BLOCKS = ' + ut.repr3(parsed_blocks, nl=1))
PARSED_BLOCKS = [
('nonNested', '\\chapter'),
('curl', [('ITEM', '{'), ('nonNested', 'Identification \\textbf'), ('curl', [('ITEM', '{'), ('nonNested', 'foobar'), ('ITEM', '}')]), ('nonNested', 'workflow'), ('ITEM', '}')]),
('nonNested', '\\label'),
('curl', [('ITEM', '{'), ('nonNested', 'chap:application'), ('ITEM', '}')]),
]
"""
import utool as ut # NOQA
import pyparsing as pp
def as_tagged(parent, doctag=None):
"""Returns the parse results as XML. Tags are created for tokens and lists that have defined results names."""
namedItems = dict((v[1], k) for (k, vlist) in parent._ParseResults__tokdict.items()
for v in vlist)
# collapse out indents if formatting is not desired
parentTag = None
if doctag is not None:
parentTag = doctag
else:
if parent._ParseResults__name:
parentTag = parent._ParseResults__name
if not parentTag:
parentTag = "ITEM"
out = []
for i, res in enumerate(parent._ParseResults__toklist):
if isinstance(res, pp.ParseResults):
if i in namedItems:
child = as_tagged(res, namedItems[i])
else:
child = as_tagged(res, None)
out.append(child)
else:
# individual token, see if there is a name for it
resTag = None
if i in namedItems:
resTag = namedItems[i]
if not resTag:
resTag = "ITEM"
child = (resTag, pp._ustr(res))
out += [child]
return (parentTag, out)
def combine_nested(opener, closer, content, name=None):
r"""
opener, closer, content = '(', ')', nest_body
"""
import utool as ut # NOQA
ret1 = pp.Forward()
_NEST = ut.identity
#_NEST = pp.Suppress
opener_ = _NEST(opener)
closer_ = _NEST(closer)
group = pp.Group(opener_ + pp.ZeroOrMore(content) + closer_)
ret2 = ret1 << group
if ret2 is None:
ret2 = ret1
else:
pass
#raise AssertionError('Weird pyparsing behavior. Comment this line if encountered. pp.__version__ = %r' % (pp.__version__,))
if name is None:
ret3 = ret2
else:
ret3 = ret2.setResultsName(name)
assert ret3 is not None, 'cannot have a None return'
return ret3
# Current Best Grammar
nest_body = pp.Forward()
nestedParens = combine_nested('(', ')', content=nest_body, name='paren')
nestedBrackets = combine_nested('[', ']', content=nest_body, name='brak')
nestedCurlies = combine_nested('{', '}', content=nest_body, name='curl')
nonBracePrintables = ''.join(c for c in pp.printables if c not in '(){}[]') + ' '
nonNested = pp.Word(nonBracePrintables).setResultsName('nonNested')
nonNested = nonNested.leaveWhitespace()
# if with_curl and not with_paren and not with_brak:
if only_curl:
# TODO figure out how to chain |
nest_body << (nonNested | nestedCurlies)
else:
nest_body << (nonNested | nestedParens | nestedBrackets | nestedCurlies)
nest_body = nest_body.leaveWhitespace()
parser = pp.ZeroOrMore(nest_body)
debug_ = ut.VERBOSE
if len(string) > 0:
tokens = parser.parseString(string)
if debug_:
print('string = %r' % (string,))
print('tokens List: ' + ut.repr3(tokens.asList()))
print('tokens XML: ' + tokens.asXML())
parsed_blocks = as_tagged(tokens)[1]
if debug_:
print('PARSED_BLOCKS = ' + ut.repr3(parsed_blocks, nl=1))
else:
parsed_blocks = []
return parsed_blocks

longest palindrome in Ruby on Rails

Write a method that takes in a string of lowercase letters (no uppercase letters, no repeats). Consider the substrings of the string: consecutive sequences of letters contained inside the string.
Find the longest such string of letters that is a palindrome.
Based on local method Palindrome?(string), I implemented longest-palindrome(string) as below with test cases:
def palindrome?(string)
i = 0
while i < string.length
if string[i] != string[(string.length - 1) - i]
return false
end
i += 1
end
return true
end
def longest_palindrome(string)
dix = 0
lstr = ""
lstrc = nil
while dix < string.length
dix2 = 1
while dix2 < string.length
str = string.slice(dix,dix2)
count = str.length
if palindrome?(str)
if lstrc == nil || lstrc < count
lstr = str
lstrc = count
end
end
dix2 += 1
end
dix += 1
end
puts(lstr)
return lstr
end
# These are tests to check that your code is working. After writing
# your solution, they should all print true.
puts(
'longest_palindrome("abcbd") == "bcb": ' +
(longest_palindrome('abcbd') == 'bcb').to_s
)
puts(
'longest_palindrome("abba") == "abba": ' +
(longest_palindrome('abba') == 'abba').to_s
)
puts(
'longest_palindrome("abcbdeffe") == "effe": ' +
(longest_palindrome('abcbdeffe') == 'effe').to_s
)
Test results as below:
bcb
longest_palindrome("abcbd") == "bcb": true
bb
longest_palindrome("abba") == "abba": false
effe
longest_palindrome("abcbdeffe") == "effe": true
Why did the second test failed?
... this line is preventing you from considering the entire string
while dix2 < string.length
So when dix is the whole string, you're not doing any testing for palindromes
Change the line to...
while dix2 <= string.length
It would actually be slightly more efficient if you did...
while dix2 <= string.length - dix
Which would prevent you from testing (for, say, a string of length 10), string(7,3) and string(7,4) and string(7,5) etc. etc., which are all basically the same string.

Elixir parse binary data?

​for example:
I have a binary look like this:
bin1 = "2\nok\n3\nbcd\n\n"​
or
bin2 = "2\nok\n3\nbcd\n1\na\n\n"​
and so on...
The format is
byte_size \n bytes \n byte_size \n bytes \n \n
I want parse binary get
["ok", "bcd"]
how to implement in Elixir or Erlang ?
Go version
a Go version parse this
func (c *Client) parse() []string {
resp := []string{}
buf := c.recv_buf.Bytes()
var idx, offset int
idx = 0
offset = 0
for {
idx = bytes.IndexByte(buf[offset:], '\n')
if idx == -1 {
break
}
p := buf[offset : offset+idx]
offset += idx + 1
//fmt.Printf("> [%s]\n", p);
if len(p) == 0 || (len(p) == 1 && p[0] == '\r') {
if len(resp) == 0 {
continue
} else {
c.recv_buf.Next(offset)
return resp
}
}
size, err := strconv.Atoi(string(p))
if err != nil || size < 0 {
return nil
}
if offset+size >= c.recv_buf.Len() {
break
}
v := buf[offset : offset+size]
resp = append(resp, string(v))
offset += size + 1
}
return []string{}
}
Thanks
A more flexible solution:
result = bin
|> String.split("\n")
|> Stream.chunk(2)
|> Stream.map(&parse_bytes/1)
|> Enum.filter(fn s -> s != "" end)
def parse_bytes(["", ""]), do: ""
def parse_bytes([byte_size, bytes]) do
byte_size_int = byte_size |> String.to_integer
<<parsed :: binary-size(byte_size_int)>> = bytes
parsed
end
I wrote a solution:
defp parse("\n") do
[]
end
defp parse(data) do
{offset, _} = :binary.match(data, "\n")
size = String.to_integer(binary_part(data, 0, offset))
value = binary_part(data, offset + 1, size)
len = offset + 1 + size + 1
[value] ++ parse(binary_part(data, len, byte_size(data) - len))
end
The Elixir mailing list provides another one:
defp parse_binary("\n"), do: []
defp parse_binary(binary) do
{size, "\n" <> rest} = Integer.parse(binary)
<<chunk :: [binary, size(size)], "\n", rest :: binary>> = rest
[chunk|parse_binary(rest)]
end

How to dump a table to console?

I'm having trouble displaying the contents of a table which contains nested tables (n-deep). I'd like to just dump it to std out or the console via a print statement or something quick and dirty but I can't figure out how. I'm looking for the rough equivalent that I'd get when printing an NSDictionary using gdb.
If the requirement is "quick and dirty"
I've found this one useful. Because of the recursion it can print nested tables too. It doesn't give the prettiest formatting in the output but for such a simple function it's hard to beat for debugging.
function dump(o)
if type(o) == 'table' then
local s = '{ '
for k,v in pairs(o) do
if type(k) ~= 'number' then k = '"'..k..'"' end
s = s .. '['..k..'] = ' .. dump(v) .. ','
end
return s .. '} '
else
return tostring(o)
end
end
e.g.
local people = {
{
name = "Fred",
address = "16 Long Street",
phone = "123456"
},
{
name = "Wilma",
address = "16 Long Street",
phone = "123456"
},
{
name = "Barney",
address = "17 Long Street",
phone = "123457"
}
}
print("People:", dump(people))
Produces the following output:
People: { [1] = { ["address"] = 16 Long Street,["phone"] =
123456,["name"] = Fred,} ,[2] = { ["address"] = 16 Long
Street,["phone"] = 123456,["name"] = Wilma,} ,[3] = { ["address"] = 17
Long Street,["phone"] = 123457,["name"] = Barney,} ,}
I know this question has already been marked as answered, but let me plug my own library here. It's called inspect.lua, and you can find it here:
https://github.com/kikito/inspect.lua
It's just a single file that you can require from any other file. It returns a function that transforms any Lua value into a human-readable string:
local inspect = require('inspect')
print(inspect({1,2,3})) -- {1, 2, 3}
print(inspect({a=1,b=2})
-- {
-- a = 1
-- b = 2
-- }
It indents subtables properly, and handles "recursive tables" (tables that contain references to themselves) correctly, so it doesn't get into infinite loops. It sorts values in a sensible way. It also prints metatable information.
Regards!
Feel free to browse the Lua Wiki on table serialization. It lists several ways on how to dump a table to the console.
You just have to choose which one suits you best. There are many ways to do it, but I usually end up using the one from Penlight:
> t = { a = { b = { c = "Hello world!", 1 }, 2, d = { 3 } } }
> require 'pl.pretty'.dump(t)
{
a = {
d = {
3
},
b = {
c = "Hello world!",
1
},
2
}
}
found this:
-- Print contents of `tbl`, with indentation.
-- `indent` sets the initial level of indentation.
function tprint (tbl, indent)
if not indent then indent = 0 end
for k, v in pairs(tbl) do
formatting = string.rep(" ", indent) .. k .. ": "
if type(v) == "table" then
print(formatting)
tprint(v, indent+1)
elseif type(v) == 'boolean' then
print(formatting .. tostring(v))
else
print(formatting .. v)
end
end
end
from here
https://gist.github.com/ripter/4270799
works pretty good for me...
Most pure lua print table functions I've seen have a problem with deep recursion
and tend to cause a stack overflow when going too deep. This print
table function that I've written does not have this problem. It should also be capable of handling really large tables due to the way it handles concatenation. In my personal usage of this function, it outputted 63k lines to file in about a second.
The output also keeps lua syntax and the script can easily be modified
for simple persistent storage by writing the output to file if modified to allow
only number, boolean, string and table data types to be formatted.
function print_table(node)
local cache, stack, output = {},{},{}
local depth = 1
local output_str = "{\n"
while true do
local size = 0
for k,v in pairs(node) do
size = size + 1
end
local cur_index = 1
for k,v in pairs(node) do
if (cache[node] == nil) or (cur_index >= cache[node]) then
if (string.find(output_str,"}",output_str:len())) then
output_str = output_str .. ",\n"
elseif not (string.find(output_str,"\n",output_str:len())) then
output_str = output_str .. "\n"
end
-- This is necessary for working with HUGE tables otherwise we run out of memory using concat on huge strings
table.insert(output,output_str)
output_str = ""
local key
if (type(k) == "number" or type(k) == "boolean") then
key = "["..tostring(k).."]"
else
key = "['"..tostring(k).."']"
end
if (type(v) == "number" or type(v) == "boolean") then
output_str = output_str .. string.rep('\t',depth) .. key .. " = "..tostring(v)
elseif (type(v) == "table") then
output_str = output_str .. string.rep('\t',depth) .. key .. " = {\n"
table.insert(stack,node)
table.insert(stack,v)
cache[node] = cur_index+1
break
else
output_str = output_str .. string.rep('\t',depth) .. key .. " = '"..tostring(v).."'"
end
if (cur_index == size) then
output_str = output_str .. "\n" .. string.rep('\t',depth-1) .. "}"
else
output_str = output_str .. ","
end
else
-- close the table
if (cur_index == size) then
output_str = output_str .. "\n" .. string.rep('\t',depth-1) .. "}"
end
end
cur_index = cur_index + 1
end
if (size == 0) then
output_str = output_str .. "\n" .. string.rep('\t',depth-1) .. "}"
end
if (#stack > 0) then
node = stack[#stack]
stack[#stack] = nil
depth = cache[node] == nil and depth + 1 or depth - 1
else
break
end
end
-- This is necessary for working with HUGE tables otherwise we run out of memory using concat on huge strings
table.insert(output,output_str)
output_str = table.concat(output)
print(output_str)
end
Here is an example:
local t = {
["abe"] = {1,2,3,4,5},
"string1",
50,
["depth1"] = { ["depth2"] = { ["depth3"] = { ["depth4"] = { ["depth5"] = { ["depth6"] = { ["depth7"]= { ["depth8"] = { ["depth9"] = { ["depth10"] = {1000}, 900}, 800},700},600},500}, 400 }, 300}, 200}, 100},
["ted"] = {true,false,"some text"},
"string2",
[function() return end] = function() return end,
75
}
print_table(t)
Output:
{
[1] = 'string1',
[2] = 50,
[3] = 'string2',
[4] = 75,
['abe'] = {
[1] = 1,
[2] = 2,
[3] = 3,
[4] = 4,
[5] = 5
},
['function: 06472B70'] = 'function: 06472A98',
['depth1'] = {
[1] = 100,
['depth2'] = {
[1] = 200,
['depth3'] = {
[1] = 300,
['depth4'] = {
[1] = 400,
['depth5'] = {
[1] = 500,
['depth6'] = {
[1] = 600,
['depth7'] = {
[1] = 700,
['depth8'] = {
[1] = 800,
['depth9'] = {
[1] = 900,
['depth10'] = {
[1] = 1000
}
}
}
}
}
}
}
}
}
},
['ted'] = {
[1] = true,
[2] = false,
[3] = 'some text'
}
}
As previously mentioned, you have to write it.
Here is my humble version: (super basic one)
function tprint (t, s)
for k, v in pairs(t) do
local kfmt = '["' .. tostring(k) ..'"]'
if type(k) ~= 'string' then
kfmt = '[' .. k .. ']'
end
local vfmt = '"'.. tostring(v) ..'"'
if type(v) == 'table' then
tprint(v, (s or '')..kfmt)
else
if type(v) ~= 'string' then
vfmt = tostring(v)
end
print(type(t)..(s or '')..kfmt..' = '..vfmt)
end
end
end
example:
local mytbl = { ['1']="a", 2, 3, b="c", t={d=1} }
tprint(mytbl)
output (Lua 5.0):
table[1] = 2
table[2] = 3
table["1"] = "a"
table["t"]["d"] = 1
table["b"] = "c"
I use my own function to print the contents of a table but not sure how well it translates to your environment:
---A helper function to print a table's contents.
---#param tbl table #The table to print.
---#param depth number #The depth of sub-tables to traverse through and print.
---#param n number #Do NOT manually set this. This controls formatting through recursion.
function PrintTable(tbl, depth, n)
n = n or 0;
depth = depth or 5;
if (depth == 0) then
print(string.rep(' ', n).."...");
return;
end
if (n == 0) then
print(" ");
end
for key, value in pairs(tbl) do
if (key and type(key) == "number" or type(key) == "string") then
key = string.format("[\"%s\"]", key);
if (type(value) == "table") then
if (next(value)) then
print(string.rep(' ', n)..key.." = {");
PrintTable(value, depth - 1, n + 4);
print(string.rep(' ', n).."},");
else
print(string.rep(' ', n)..key.." = {},");
end
else
if (type(value) == "string") then
value = string.format("\"%s\"", value);
else
value = tostring(value);
end
print(string.rep(' ', n)..key.." = "..value..",");
end
end
end
if (n == 0) then
print(" ");
end
end
The simplest way, with circular reference handling and all:
function dump(t, indent, done)
done = done or {}
indent = indent or 0
done[t] = true
for key, value in pairs(t) do
print(string.rep("\t", indent))
if type(value) == "table" and not done[value] then
done[value] = true
print(key, ":\n")
dump(value, indent + 2, done)
done[value] = nil
else
print(key, "\t=\t", value, "\n")
end
end
end
There are 2 solutions that I want to mention: a quick&dirty one, and another which properly escapes all keys and values but is bigger
Simple & fast solution (use only on "safe" inputs):
local function format_any_value(obj, buffer)
local _type = type(obj)
if _type == "table" then
buffer[#buffer + 1] = '{"'
for key, value in next, obj, nil do
buffer[#buffer + 1] = tostring(key) .. '":'
format_any_value(value, buffer)
buffer[#buffer + 1] = ',"'
end
buffer[#buffer] = '}' -- note the overwrite
elseif _type == "string" then
buffer[#buffer + 1] = '"' .. obj .. '"'
elseif _type == "boolean" or _type == "number" then
buffer[#buffer + 1] = tostring(obj)
else
buffer[#buffer + 1] = '"???' .. _type .. '???"'
end
end
Usage:
local function format_as_json(obj)
if obj == nil then return "null" else
local buffer = {}
format_any_value(obj, buffer)
return table.concat(buffer)
end
end
local function print_as_json(obj)
print(_format_as_json(obj))
end
print_as_json {1, 2, 3}
print_as_json(nil)
print_as_json("string")
print_as_json {[1] = 1, [2] = 2, three = { { true } }, four = "four"}
Correct solution with key/value escaping
Small library that I wrote in pure Lua for this specific use-case: https://github.com/vn971/fast_json_encode
Or specifically this 1 file that includes both a formatter and a printer: https://github.com/vn971/fast_json_encode/blob/master/json_format.lua
You have to code it yourself I'm afraid. I wrote this, and it may be of some use to you
function printtable(table, indent)
indent = indent or 0;
local keys = {};
for k in pairs(table) do
keys[#keys+1] = k;
table.sort(keys, function(a, b)
local ta, tb = type(a), type(b);
if (ta ~= tb) then
return ta < tb;
else
return a < b;
end
end);
end
print(string.rep(' ', indent)..'{');
indent = indent + 1;
for k, v in pairs(table) do
local key = k;
if (type(key) == 'string') then
if not (string.match(key, '^[A-Za-z_][0-9A-Za-z_]*$')) then
key = "['"..key.."']";
end
elseif (type(key) == 'number') then
key = "["..key.."]";
end
if (type(v) == 'table') then
if (next(v)) then
printf("%s%s =", string.rep(' ', indent), tostring(key));
printtable(v, indent);
else
printf("%s%s = {},", string.rep(' ', indent), tostring(key));
end
elseif (type(v) == 'string') then
printf("%s%s = %s,", string.rep(' ', indent), tostring(key), "'"..v.."'");
else
printf("%s%s = %s,", string.rep(' ', indent), tostring(key), tostring(v));
end
end
indent = indent - 1;
print(string.rep(' ', indent)..'}');
end
The table.tostring metehod of metalua is actually very complete. It deals with nested tables, the indentation level is changeable, ...
See https://github.com/fab13n/metalua/blob/master/src/lib/metalua/table2.lua
This is my version that supports excluding tables and userdata
-- Lua Table View by Elertan
table.print = function(t, exclusions)
local nests = 0
if not exclusions then exclusions = {} end
local recurse = function(t, recurse, exclusions)
indent = function()
for i = 1, nests do
io.write(" ")
end
end
local excluded = function(key)
for k,v in pairs(exclusions) do
if v == key then
return true
end
end
return false
end
local isFirst = true
for k,v in pairs(t) do
if isFirst then
indent()
print("|")
isFirst = false
end
if type(v) == "table" and not excluded(k) then
indent()
print("|-> "..k..": "..type(v))
nests = nests + 1
recurse(v, recurse, exclusions)
elseif excluded(k) then
indent()
print("|-> "..k..": "..type(v))
elseif type(v) == "userdata" or type(v) == "function" then
indent()
print("|-> "..k..": "..type(v))
elseif type(v) == "string" then
indent()
print("|-> "..k..": ".."\""..v.."\"")
else
indent()
print("|-> "..k..": "..v)
end
end
nests = nests - 1
end
nests = 0
print("### START TABLE ###")
for k,v in pairs(t) do
print("root")
if type(v) == "table" then
print("|-> "..k..": "..type(v))
nests = nests + 1
recurse(v, recurse, exclusions)
elseif type(v) == "userdata" or type(v) == "function" then
print("|-> "..k..": "..type(v))
elseif type(v) == "string" then
print("|-> "..k..": ".."\""..v.."\"")
else
print("|-> "..k..": "..v)
end
end
print("### END TABLE ###")
end
This is an example
t = {
location = {
x = 10,
y = 20
},
size = {
width = 100000000,
height = 1000,
},
name = "Sidney",
test = {
hi = "lol",
},
anotherone = {
1,
2,
3
}
}
table.print(t, { "test" })
Prints:
### START TABLE ###
root
|-> size: table
|
|-> height: 1000
|-> width: 100000000
root
|-> location: table
|
|-> y: 20
|-> x: 10
root
|-> anotherone: table
|
|-> 1: 1
|-> 2: 2
|-> 3: 3
root
|-> test: table
|
|-> hi: "lol"
root
|-> name: "Sidney"
### END TABLE ###
Notice that the root doesn't remove exclusions
Made this version to print tables with identation. Can probably be extended to work recursively.
function printtable(table, indent)
print(tostring(table) .. '\n')
for index, value in pairs(table) do
print(' ' .. tostring(index) .. ' : ' .. tostring(value) .. '\n')
end
end
--~ print a table
function printTable(list, i)
local listString = ''
--~ begin of the list so write the {
if not i then
listString = listString .. '{'
end
i = i or 1
local element = list[i]
--~ it may be the end of the list
if not element then
return listString .. '}'
end
--~ if the element is a list too call it recursively
if(type(element) == 'table') then
listString = listString .. printTable(element)
else
listString = listString .. element
end
return listString .. ', ' .. printTable(list, i + 1)
end
local table = {1, 2, 3, 4, 5, {'a', 'b'}, {'G', 'F'}}
print(printTable(table))
Hi man, I wrote a siple code that do this in pure Lua, it has a bug (write a coma after the last element of the list) but how i wrote it quickly as a prototype I will let it to you adapt it to your needs.
Adding another version. This one tries to iterate over userdata as well.
function inspect(o,indent)
if indent == nil then indent = 0 end
local indent_str = string.rep(" ", indent)
local output_it = function(str)
print(indent_str..str)
end
local length = 0
local fu = function(k, v)
length = length + 1
if type(v) == "userdata" or type(v) == 'table' then
output_it(indent_str.."["..k.."]")
inspect(v, indent+1)
else
output_it(indent_str.."["..k.."] "..tostring(v))
end
end
local loop_pairs = function()
for k,v in pairs(o) do fu(k,v) end
end
local loop_metatable_pairs = function()
for k,v in pairs(getmetatable(o)) do fu(k,v) end
end
if not pcall(loop_pairs) and not pcall(loop_metatable_pairs) then
output_it(indent_str.."[[??]]")
else
if length == 0 then
output_it(indent_str.."{}")
end
end
end
Convert to json and then print.
local json = require('cjson')
json_string = json.encode(this_table)
print (json_string)
simple example of dump a table in lua
i suggest using serpent.lua
local function parser(value, indent, subcategory)
local indent = indent or 2
local response = '(\n'
local subcategory = type(subcategory) == 'number' and subcategory or indent
for key, value in pairs(value) do
if type(value) == 'table' then
value = parser(value, indent, subcategory + indent)
elseif type(value) == 'string' then
value = '\''.. value .. '\''
elseif type(value) ~= 'number' then
value = tostring(value)
end
if type(tonumber(key)) == 'number' then
key = '[' .. key .. ']'
elseif not key:match('^([A-Za-z_][A-Za-z0-9_]*)$') then
key = '[\'' .. key .. '\']'
end
response = response .. string.rep(' ', subcategory) .. key .. ' = ' .. value .. ',\n'
end
return response .. string.rep(' ', subcategory - indent) .. ')'
end
example
response = parser{1,2,3, {ok = 10, {}}}
print(response)
result
(
[1] = 1,
[2] = 2,
[3] = 3,
[4] = (
[1] = (),
ok = 10
)
)
here's my little snippet for that:
--- Dump value of a variable in a formatted string
--
--- #param o table Dumpable object
--- #param tbs string|nil Tabulation string, ' ' by default
--- #param tb number|nil Initial tabulation level, 0 by default
--- #return string
local function dump(o, tbs, tb)
tb = tb or 0
tbs = tbs or ' '
if type(o) == 'table' then
local s = '{'
if (next(o)) then s = s .. '\n' else return s .. '}' end
tb = tb + 1
for k,v in pairs(o) do
if type(k) ~= 'number' then k = '"' .. k .. '"' end
s = s .. tbs:rep(tb) .. '[' .. k .. '] = ' .. dump(v, tbs, tb)
s = s .. ',\n'
end
tb = tb - 1
return s .. tbs:rep(tb) .. '}'
else
return tostring(o)
end
end
I have humbly modified a bit Alundaio code:
-- by Alundaio
-- KK modified 11/28/2019
function dump_table_to_string(node, tree, indentation)
local cache, stack, output = {},{},{}
local depth = 1
if type(node) ~= "table" then
return "only table type is supported, got " .. type(node)
end
if nil == indentation then indentation = 1 end
local NEW_LINE = "\n"
local TAB_CHAR = " "
if nil == tree then
NEW_LINE = "\n"
elseif not tree then
NEW_LINE = ""
TAB_CHAR = ""
end
local output_str = "{" .. NEW_LINE
while true do
local size = 0
for k,v in pairs(node) do
size = size + 1
end
local cur_index = 1
for k,v in pairs(node) do
if (cache[node] == nil) or (cur_index >= cache[node]) then
if (string.find(output_str,"}",output_str:len())) then
output_str = output_str .. "," .. NEW_LINE
elseif not (string.find(output_str,NEW_LINE,output_str:len())) then
output_str = output_str .. NEW_LINE
end
-- This is necessary for working with HUGE tables otherwise we run out of memory using concat on huge strings
table.insert(output,output_str)
output_str = ""
local key
if (type(k) == "number" or type(k) == "boolean") then
key = "["..tostring(k).."]"
else
key = "['"..tostring(k).."']"
end
if (type(v) == "number" or type(v) == "boolean") then
output_str = output_str .. string.rep(TAB_CHAR,depth*indentation) .. key .. " = "..tostring(v)
elseif (type(v) == "table") then
output_str = output_str .. string.rep(TAB_CHAR,depth*indentation) .. key .. " = {" .. NEW_LINE
table.insert(stack,node)
table.insert(stack,v)
cache[node] = cur_index+1
break
else
output_str = output_str .. string.rep(TAB_CHAR,depth*indentation) .. key .. " = '"..tostring(v).."'"
end
if (cur_index == size) then
output_str = output_str .. NEW_LINE .. string.rep(TAB_CHAR,(depth-1)*indentation) .. "}"
else
output_str = output_str .. ","
end
else
-- close the table
if (cur_index == size) then
output_str = output_str .. NEW_LINE .. string.rep(TAB_CHAR,(depth-1)*indentation) .. "}"
end
end
cur_index = cur_index + 1
end
if (size == 0) then
output_str = output_str .. NEW_LINE .. string.rep(TAB_CHAR,(depth-1)*indentation) .. "}"
end
if (#stack > 0) then
node = stack[#stack]
stack[#stack] = nil
depth = cache[node] == nil and depth + 1 or depth - 1
else
break
end
end
-- This is necessary for working with HUGE tables otherwise we run out of memory using concat on huge strings
table.insert(output,output_str)
output_str = table.concat(output)
return output_str
end
then:
print(dump_table_to_string("AA", true,3))
print(dump_table_to_string({"AA","BB"}, true,3))
print(dump_table_to_string({"AA","BB"}))
print(dump_table_to_string({"AA","BB"},false))
print(dump_table_to_string({"AA","BB",{22,33}},true,2))
gives:
only table type is supported, got string
{
[1] = 'AA',
[2] = 'BB'
}
{
[1] = 'AA',
[2] = 'BB'
}
{[1] = 'AA',[2] = 'BB'}
{
[1] = 'AA',
[2] = 'BB',
[3] = {
[1] = 22,
[2] = 33
}
}
Now the function print can print the (flat) tables!
oprint = print -- origin print
print = function (...)
if type(...) == "table" then
local str = ''
local amount = 0
for i,v in pairs(...) do
amount=amount+1
local pre = type(i) == "string" and i.."=" or ""
str = str .. pre..tostring(v) .. "\t"
end
oprint('#'..amount..':', str)
else
oprint(...)
end
end
For example:
print ({x=7, y=9, w=11, h="height", 7, 8, 9})
prints:
#7: 7 8 9 y=9 x=7 h=height w=11
The same way it can be just new function tostring:
otostring = tostring -- origin tostring
tostring = function (...)
if type(...) == "table" then
local str = '{'
for i,v in pairs(...) do
local pre = type(i) == "string" and i.."=" or ""
str = str .. pre..tostring(v) .. ", "
end
str = str:sub(1, -3)
return str..'}'
else
return otostring(...)
end
end

Resources