Dafny precondition to require a string is not only whitespace - dafny

I am trying to write a precondition to require a string to contain at least one non-whitespace character. I wrote the following:
predicate AllWhiteSpaceChars(s: string) {
forall i :: 0 <= i < |s| ==> s[i] in {' ', '\n', /*'\f',*/ '\r', '\t'/*, '\v'*/}
}
But I can't get my program to verify with it. The following fails:
method test1(s: string)
requires !AllWhiteSpaceChars(s)
{
print s;
}
method test2()
{
test1("./foo");
}
What is wrong with my predicate, and how can I create a working precondition?

Seems to be a trigger issue. The following works. But maybe someone more familiar with triggers can suggest a better fix.
predicate HasNonWhiteSpace(s: string) {
if s == []
then false
else s[0] !in {' ', '\n', /*'\f',*/ '\r', '\t'/*, '\v'*/} || HasNonWhiteSpace(s[1..])
}
method test1(s: string)
requires HasNonWhiteSpace(s)
{
print s;
}
method test2()
{
test1("./foo");
test1("\t\n ");
test1("d d");
}
BTW: not sure if you meant to require the string being printed to be non-empty. My current solution also requires that.

Related

Can dafny assert cope with "! false"

The final commented out assert will not validate but when run the if statement above will prints.
OUTPUT
ohb= true
ohx= false
palin(xe) == false
ohx ==false
function method palin(a:seq<int>) :bool {
forall i:int :: (0<=i && i<(|a|/2)) ==> a[i]==a[|a|-i -1]
}
method Main() {
var xe:seq<int> := [0,1,2,3,0];
var se:seq<int> := [0,1,2,1,0];
var ohb := palin(se);
var ohx :bool := palin(xe);
print "ohb= ",ohb,"\n";
print "ohx= ",ohx,"\n";
assert palin(se);
if (palin(xe) == false) {print "palin(xe) == false\n";}
if (!ohx) {print "ohx ==false\n";}
//assert !ohx;
}
A failing assert means the verifier cannot automatically find a proof. If you think the property holds, you need to write the proof (or part of the proof).
For your program, proving that something is not a palindrome comes down to showing a position that violates the palindrome property. In terms of logic, you're trying to prove the negation of a forall, which is an exists, and to prove an exists you'll need to supply the witness for the bound variable i.
In your example, the following suffices:
predicate method palin(a: seq<int>) {
forall i :: 0 <= i < |a| / 2 ==> a[i] == a[|a| - i - 1]
}
method Main() {
var xe := [0,1,2,3,0];
var se := [0,1,2,1,0];
var ohb := palin(se);
var ohx := palin(xe);
print "ohb= ", ohb, "\n";
print "ohx= ", ohx, "\n";
assert palin(se);
if palin(xe) == false { print "palin(xe) == false\n"; }
if !ohx { print "ohx == false\n"; }
assert !ohx by {
assert xe[1] != xe[3];
}
}

How to match the start and end of a block

I want to define a special code block, which may starts by any combination of characters of {[<#, and the end will be }]>#.
Some example:
{
block-content
}
{##
block-content
##}
#[[<{###
block-content
###}>]]#
Is it possible with petitparser-dart?
Yes, back-references are possible, but it is not that straight-forward.
First we need a function that can reverse our delimiter. I came up with the following:
String reverseDelimiter(String token) {
return token.split('').reversed.map((char) {
if (char == '[') return ']';
if (char == '{') return '}';
if (char == '<') return '>';
return char;
}).join();
}
Then we have to declare the stopDelimiter parser. It is undefined at this point, but will be replaced with the real parser as soon as we know it.
var stopDelimiter = undefined();
In the action of the startDelimiter we replace the stopDelimiter with a dynamically created parser as follows:
var startDelimiter = pattern('#<{[').plus().flatten().map((String start) {
stopDelimiter.set(string(reverseDelimiter(start)).flatten());
return start;
});
The rest is trivial, but depends on your exact requirements:
var blockContents = any().starLazy(stopDelimiter).flatten();
var parser = startDelimiter & blockContents & stopDelimiter;
The code above defines the blockContents parser so that it reads through anything until the matching stopDelimiter is encountered. The provided examples pass:
print(parser.parse('{ block-content }'));
// Success[1:18]: [{, block-content , }]
print(parser.parse('{## block-content ##}'));
// Success[1:22]: [{##, block-content , ##}]
print(parser.parse('#[[<{### block-content ###}>]]#'));
// Success[1:32]: [#[[<{###, block-content , ###}>]]#]
The above code doesn't work if you want to nest the parser. If necessary, that problem can be avoided by remembering the previous stopDelimiter and restoring it.

ANTLR Tree Grammar for loops

I'm trying to implement a parser by directly reading a treeWalker and implementing the commands needed for the compiler on the fly. So if I have a command like:
statement
:
^('WRITE' expression)
{
//Here is the command that is created by my Tree Parser
ch.emitRO("OUT",0,0,0,"write out the value of ac");
//and then I handle it in my other classes
}
;
I want it to write OUT 0,0,0; to a file. That's my grammar.
I have a problem though with the loop section in my grammar it is:
'WHILE'^ expression 'DO' stat_seq 'ENDDO'
and in the tree parser:
doWhileStatement
:
^('WHILE' expression 'DO' stat_seq 'ENDDO')
;
What I want to do is directly parse the code from the while loop into the commands I need. I came up with this solution but it doesn't work:
doWhileStatement
:
^('WHILE' e=expression head='DO'
{
int loopHead =((CommonTree) head).getTokenStartIndex();
}
stat_seq
{
if ($e.result==1) {
input.seek(loopHead);
doWhileStatement();
}
}
'ENDDO')
;
for the record here are some of the other commands I've written:
(ignore the code written in brackets, it's for the generation of the commands in a text file.)
stat_seq
:
(statement)+
;
statement
:
^(':=' ID e=expression) { variables.put($ID.text,e); }
| ^('WRITE' expression)
{
ch.emitRM("LDC",ac,$expression.result,0,"pass the expression value to the ac reg");
ch.emitRO("OUT",ac,0,0,"write out the value of ac");
}
| ^('READ' ID)
{
ch.emitRO("IN",ac,0,0,"read value");
}
| ^('IF' expression 'THEN'
{
ch.emitRM("LDC",ac1,$expression.result,0,"pass the expression result to the ac reg");
int savedLoc1 = ch.emitSkip(1);
}
sseq1=stat_seq
'ELSE'
{
int savedLoc2 = ch.emitSkip(1);
ch.emitBackup(savedLoc1);
ch.emitRM("JEQ",ac1,savedLoc2+1,0,"skip as many places as needed depending on the expression");
ch.emitRestore();
}
sseq2=stat_seq
{
int savedLoc3 = ch.emitSkip(0);
ch.emitBackup(savedLoc2);
ch.emitRM("LDC",PC_REG,savedLoc3,0,"skip for the else command");
ch.emitRestore();
}
'ENDIF')
| doWhileStatement
;
Any help would be appreciated, thank you
I found it for everyone who has the same problem I did it like this and it's working:
^('WHILE'
{int c = input.index();}
expression
{int s=input.index();}
.* )// .* is a sequence of statements
{
int next = input.index(); // index of node following WHILE
input.seek(c);
match(input, Token.DOWN, null);
pushFollow(FOLLOW_expression_in_statement339);
int condition = expression();
state._fsp--;
//there is a problem here
//expression() seemed to be reading from the grammar file and I couldn't
//get it to read from the tree walker rule somehow
//It printed something like no viable alt at input 'DOWN'
//I googled it and found this mistake
// So I copied the code from the normal while statement
// And pasted it here and it works like a charm
// Normally there should only be int condition = expression()
while ( condition == 1 ) {
input.seek(s);
stat_seq();//stat_seq is a sequence of statements: (statement ';')+
input.seek(c);
match(input, Token.DOWN, null); //Copied value from EvaluatorWalker.java
//cause couldn't find another way to do it
pushFollow(FOLLOW_expression_in_statement339);
condition = expression();
state._fsp--;
System.out.println("condition:"+condition + " i:"+ variables.get("i"));
}
input.seek(next);
}
I wrote the problem at the comments of my code. If anyone can help me out and answer this for me how to do it I would be grateful. It's so weird that there is nearly no feedback on a correct way to implement loops within a tree grammar on the fly.
Regards,
Alex

JSON2 Error / Conflict with another script

I am using the JSON2 library in order to use JSON.stringify to send some JSON data to my MVC controller.
When I include another script in my view (Telerik MVC) I start to get script conflicts when using IE7.
When I click the refresh button in the grid, I get the following error:
Line: 191
Error: Object doesn't support this property or method
String.prototype.toJSON =
Number.prototype.toJSON =
Boolean.prototype.toJSON = function (key) {
return this.valueOf();
};
The error occurs on the following line specifically:
return this.valueOf();
Does anyone have any insight into why this conflict is occurring and how to resolve it? Specifically, why would this work in IE8/Chrome but fail in IE7. What would cause the error? Are both scripts trying to define the same method and that's why it is failing or is it impossible to tell without digging through tons of code?
Edit:
This is the json2.js library I am speaking of: https://github.com/douglascrockford/JSON-js
Probably the reply is too late, but I thought it's worth replying as this might save some valuable lives ;)
The JSON2 script won't initialize/extend the JSON object if there is an existing implementation(Native or Included). However if the JSON object does not exist, the script will create that object and attach few methods to it (JSON.stringify and JSON.parse to be precise). However in order to make those methods to work, there are other objects (like Date, String, Number and Boolean objects) which need to be extended to support certain methods (like toJSON method). The JSON2 script takes care of extending the required objects as well.
Now coming to the specific issue here (Telerik MVC). I faced the same problem while working with Telerik for one of the Projects. However I was able to trace it. The probable cause is the conflict between Telerik scripts and the current JSON2 script. The Date and Boolean Objects' toJSON method somehow conflicts with Telerik's implmentation of the same method for those two objects which breaks the Telerik script at some places. I have modified the JSON2 library for a more robust check which doesn't fail in any scenario (even on use of Telerik MVC on the page). I have tested the script and it works fine for me, however in case someone finds any further conflicts, please reply back.
var JSON;
if (!JSON) {
JSON = {};
}
(function () {
'use strict';
function f(n) {
// Format integers to have at least two digits.
return n < 10 ? '0' + n : n;
}
if (typeof Date.prototype.toJSON !== 'function') {
Date.prototype.toJSON = function (key) {
return isFinite(this.valueOf())
? this.getUTCFullYear() + '-' +
f(this.getUTCMonth() + 1) + '-' +
f(this.getUTCDate()) + 'T' +
f(this.getUTCHours()) + ':' +
f(this.getUTCMinutes()) + ':' +
f(this.getUTCSeconds()) + /*added - start*/ '.'+
f(this.getUTCMilliseconds()) + /*added - end*/ 'Z'
: null;
};
//pushed the below code outside current if block
// String.prototype.toJSON =
// Number.prototype.toJSON =
// Boolean.prototype.toJSON = function (key) {
// return this.valueOf();
// };
}
/*added - start*/
if (typeof String.prototype.toJSON !== 'function') {
String.prototype.toJSON = function (key) {
return ((typeof this.valueOf === 'function') ? this.valueOf(): this.toString());
};
}
if (typeof Number.prototype.toJSON !== 'function') {
Number.prototype.toJSON = function (key) {
return ((typeof this.valueOf === 'function') ? this.valueOf(): this.toString());
};
}
if (typeof Boolean.prototype.toJSON !== 'function') {
Boolean.prototype.toJSON = function (key) {
return ((typeof this.valueOf === 'function') ? this.valueOf(): this.toString());
};
}
/*added - end*/
var cx = /[\u0000\u00ad\u0600-\u0604\u070f\u17b4\u17b5\u200c-\u200f\u2028-\u202f\u2060-\u206f\ufeff\ufff0-\uffff]/g,
escapable = /[\\\"\x00-\x1f\x7f-\x9f\u00ad\u0600-\u0604\u070f\u17b4\u17b5\u200c-\u200f\u2028-\u202f\u2060-\u206f\ufeff\ufff0-\uffff]/g,
gap,
indent,
meta = { // table of character substitutions
'\b': '\\b',
'\t': '\\t',
'\n': '\\n',
'\f': '\\f',
'\r': '\\r',
'"' : '\\"',
'\\': '\\\\'
},
rep;
function quote(string) {
// If the string contains no control characters, no quote characters, and no
// backslash characters, then we can safely slap some quotes around it.
// Otherwise we must also replace the offending characters with safe escape
// sequences.
escapable.lastIndex = 0;
return escapable.test(string) ? '"' + string.replace(escapable, function (a) {
var c = meta[a];
return typeof c === 'string'
? c
: '\\u' + ('0000' + a.charCodeAt(0).toString(16)).slice(-4);
}) + '"' : '"' + string + '"';
}
function str(key, holder) {
// Produce a string from holder[key].
var i, // The loop counter.
k, // The member key.
v, // The member value.
length,
mind = gap,
partial,
value = holder[key];
// If the value has a toJSON method, call it to obtain a replacement value.
if (value && typeof value === 'object' &&
typeof value.toJSON === 'function') {
value = value.toJSON(key);
}
// If we were called with a replacer function, then call the replacer to
// obtain a replacement value.
if (typeof rep === 'function') {
value = rep.call(holder, key, value);
}
// What happens next depends on the value's type.
switch (typeof value) {
case 'string':
return quote(value);
case 'number':
// JSON numbers must be finite. Encode non-finite numbers as null.
return isFinite(value) ? String(value) : 'null';
case 'boolean':
case 'null':
// If the value is a boolean or null, convert it to a string. Note:
// typeof null does not produce 'null'. The case is included here in
// the remote chance that this gets fixed someday.
return String(value);
// If the type is 'object', we might be dealing with an object or an array or
// null.
case 'object':
// Due to a specification blunder in ECMAScript, typeof null is 'object',
// so watch out for that case.
if (!value) {
return 'null';
}
// Make an array to hold the partial results of stringifying this object value.
gap += indent;
partial = [];
// Is the value an array?
if (Object.prototype.toString.apply(value) === '[object Array]') {
// The value is an array. Stringify every element. Use null as a placeholder
// for non-JSON values.
length = value.length;
for (i = 0; i < length; i += 1) {
partial[i] = str(i, value) || 'null';
}
// Join all of the elements together, separated with commas, and wrap them in
// brackets.
v = partial.length === 0
? '[]'
: gap
? '[\n' + gap + partial.join(',\n' + gap) + '\n' + mind + ']'
: '[' + partial.join(',') + ']';
gap = mind;
return v;
}
// If the replacer is an array, use it to select the members to be stringified.
if (rep && typeof rep === 'object') {
length = rep.length;
for (i = 0; i < length; i += 1) {
if (typeof rep[i] === 'string') {
k = rep[i];
v = str(k, value);
if (v) {
partial.push(quote(k) + (gap ? ': ' : ':') + v);
}
}
}
} else {
// Otherwise, iterate through all of the keys in the object.
for (k in value) {
if (Object.prototype.hasOwnProperty.call(value, k)) {
v = str(k, value);
if (v) {
partial.push(quote(k) + (gap ? ': ' : ':') + v);
}
}
}
}
// Join all of the member texts together, separated with commas,
// and wrap them in braces.
v = partial.length === 0
? '{}'
: gap
? '{\n' + gap + partial.join(',\n' + gap) + '\n' + mind + '}'
: '{' + partial.join(',') + '}';
gap = mind;
return v;
}
}
// If the JSON object does not yet have a stringify method, give it one.
if (typeof JSON.stringify !== 'function') {
JSON.stringify = function (value, replacer, space) {
// The stringify method takes a value and an optional replacer, and an optional
// space parameter, and returns a JSON text. The replacer can be a function
// that can replace values, or an array of strings that will select the keys.
// A default replacer method can be provided. Use of the space parameter can
// produce text that is more easily readable.
var i;
gap = '';
indent = '';
// If the space parameter is a number, make an indent string containing that
// many spaces.
if (typeof space === 'number') {
for (i = 0; i < space; i += 1) {
indent += ' ';
}
// If the space parameter is a string, it will be used as the indent string.
} else if (typeof space === 'string') {
indent = space;
}
// If there is a replacer, it must be a function or an array.
// Otherwise, throw an error.
rep = replacer;
if (replacer && typeof replacer !== 'function' &&
(typeof replacer !== 'object' ||
typeof replacer.length !== 'number')) {
throw new Error('JSON.stringify');
}
// Make a fake root object containing our value under the key of ''.
// Return the result of stringifying the value.
return str('', {'': value});
};
}
// If the JSON object does not yet have a parse method, give it one.
if (typeof JSON.parse !== 'function') {
JSON.parse = function (text, reviver) {
// The parse method takes a text and an optional reviver function, and returns
// a JavaScript value if the text is a valid JSON text.
var j;
function walk(holder, key) {
// The walk method is used to recursively walk the resulting structure so
// that modifications can be made.
var k, v, value = holder[key];
if (value && typeof value === 'object') {
for (k in value) {
if (Object.prototype.hasOwnProperty.call(value, k)) {
v = walk(value, k);
if (v !== undefined) {
value[k] = v;
} else {
delete value[k];
}
}
}
}
return reviver.call(holder, key, value);
}
// Parsing happens in four stages. In the first stage, we replace certain
// Unicode characters with escape sequences. JavaScript handles many characters
// incorrectly, either silently deleting them, or treating them as line endings.
text = String(text);
cx.lastIndex = 0;
if (cx.test(text)) {
text = text.replace(cx, function (a) {
return '\\u' +
('0000' + a.charCodeAt(0).toString(16)).slice(-4);
});
}
// In the second stage, we run the text against regular expressions that look
// for non-JSON patterns. We are especially concerned with '()' and 'new'
// because they can cause invocation, and '=' because it can cause mutation.
// But just to be safe, we want to reject all unexpected forms.
// We split the second stage into 4 regexp operations in order to work around
// crippling inefficiencies in IE's and Safari's regexp engines. First we
// replace the JSON backslash pairs with '#' (a non-JSON character). Second, we
// replace all simple value tokens with ']' characters. Third, we delete all
// open brackets that follow a colon or comma or that begin the text. Finally,
// we look to see that the remaining characters are only whitespace or ']' or
// ',' or ':' or '{' or '}'. If that is so, then the text is safe for eval.
if (/^[\],:{}\s]*$/
.test(text.replace(/\\(?:["\\\/bfnrt]|u[0-9a-fA-F]{4})/g, '#')
.replace(/"[^"\\\n\r]*"|true|false|null|-?\d+(?:\.\d*)?(?:[eE][+\-]?\d+)?/g, ']')
.replace(/(?:^|:|,)(?:\s*\[)+/g, ''))) {
// In the third stage we use the eval function to compile the text into a
// JavaScript structure. The '{' operator is subject to a syntactic ambiguity
// in JavaScript: it can begin a block or an object literal. We wrap the text
// in parens to eliminate the ambiguity.
j = eval('(' + text + ')');
// In the optional fourth stage, we recursively walk the new structure, passing
// each name/value pair to a reviver function for possible transformation.
return typeof reviver === 'function'
? walk({'': j}, '')
: j;
}
// If the text is not JSON parseable, then a SyntaxError is thrown.
throw new SyntaxError('JSON.parse');
};
}
}());
Note: The above code is not my implementation. It is from the source https://github.com/douglascrockford/JSON-js I have just modified it a little to avoid any conflicts with Telerik or otherwise.
I have had exactly the same problem.
I couldn't find any other solution than to edit the json2.js file like you suggested, thanks for that.
However, I found that this would fix the issue for IE7 and still work in IE8/9 as well as firefox, but it now stopped working in Chrome ("this" for [this.valueOf === 'function'] is undefined).
Have you run into that issue, too, or did yours work in Chrome? I'm trying to figure out if this is related to my data or telerik-internal.
Thanks for your post!
Edit:
For now I have just returned null if "this" is undefined/null (in all three functions). Seems to work in all browsers and allows the Telerik grid to rebind without problems.
I don't know how correct this is in the global context of json2.js .toJSON method, though.

PEG for Python style indentation

How would you write a Parsing Expression Grammar in any of the following Parser Generators (PEG.js, Citrus, Treetop) which can handle Python/Haskell/CoffeScript style indentation:
Examples of a not-yet-existing programming language:
square x =
x * x
cube x =
x * square x
fib n =
if n <= 1
0
else
fib(n - 2) + fib(n - 1) # some cheating allowed here with brackets
Update:
Don't try to write an interpreter for the examples above. I'm only interested in the indentation problem. Another example might be parsing the following:
foo
bar = 1
baz = 2
tap
zap = 3
# should yield (ruby style hashmap):
# {:foo => { :bar => 1, :baz => 2}, :tap => { :zap => 3 } }
Pure PEG cannot parse indentation.
But peg.js can.
I did a quick-and-dirty experiment (being inspired by Ira Baxter's comment about cheating) and wrote a simple tokenizer.
For a more complete solution (a complete parser) please see this question: Parse indentation level with PEG.js
/* Initializations */
{
function start(first, tail) {
var done = [first[1]];
for (var i = 0; i < tail.length; i++) {
done = done.concat(tail[i][1][0])
done.push(tail[i][1][1]);
}
return done;
}
var depths = [0];
function indent(s) {
var depth = s.length;
if (depth == depths[0]) return [];
if (depth > depths[0]) {
depths.unshift(depth);
return ["INDENT"];
}
var dents = [];
while (depth < depths[0]) {
depths.shift();
dents.push("DEDENT");
}
if (depth != depths[0]) dents.push("BADDENT");
return dents;
}
}
/* The real grammar */
start = first:line tail:(newline line)* newline? { return start(first, tail) }
line = depth:indent s:text { return [depth, s] }
indent = s:" "* { return indent(s) }
text = c:[^\n]* { return c.join("") }
newline = "\n" {}
depths is a stack of indentations. indent() gives back an array of indentation tokens and start() unwraps the array to make the parser behave somewhat like a stream.
peg.js produces for the text:
alpha
beta
gamma
delta
epsilon
zeta
eta
theta
iota
these results:
[
"alpha",
"INDENT",
"beta",
"gamma",
"INDENT",
"delta",
"DEDENT",
"DEDENT",
"epsilon",
"INDENT",
"zeta",
"DEDENT",
"BADDENT",
"eta",
"theta",
"INDENT",
"iota",
"DEDENT",
"",
""
]
This tokenizer even catches bad indents.
I think an indentation-sensitive language like that is context-sensitive. I believe PEG can only do context-free langauges.
Note that, while nalply's answer is certainly correct that PEG.js can do it via external state (ie the dreaded global variables), it can be a dangerous path to walk down (worse than the usual problems with global variables). Some rules can initially match (and then run their actions) but parent rules can fail thus causing the action run to be invalid. If external state is changed in such an action, you can end up with invalid state. This is super awful, and could lead to tremors, vomiting, and death. Some issues and solutions to this are in the comments here: https://github.com/dmajda/pegjs/issues/45
So what we are really doing here with indentation is creating something like a C-style blocks which often have their own lexical scope. If I were writing a compiler for a language like that I think I would try and have the lexer keep track of the indentation. Every time the indentation increases it could insert a '{' token. Likewise every time it decreases it could inset an '}' token. Then writing an expression grammar with explicit curly braces to represent lexical scope becomes more straight forward.
You can do this in Treetop by using semantic predicates. In this case you need a semantic predicate that detects closing a white-space indented block due to the occurrence of another line that has the same or lesser indentation. The predicate must count the indentation from the opening line, and return true (block closed) if the current line's indentation has finished at the same or shorter length. Because the closing condition is context-dependent, it must not be memoized.
Here's the example code I'm about to add to Treetop's documentation. Note that I've overridden Treetop's SyntaxNode inspect method to make it easier to visualise the result.
grammar IndentedBlocks
rule top
# Initialise the indent stack with a sentinel:
&{|s| #indents = [-1] }
nested_blocks
{
def inspect
nested_blocks.inspect
end
}
end
rule nested_blocks
(
# Do not try to extract this semantic predicate into a new rule.
# It will be memo-ized incorrectly because #indents.last will change.
!{|s|
# Peek at the following indentation:
save = index; i = _nt_indentation; index = save
# We're closing if the indentation is less or the same as our enclosing block's:
closing = i.text_value.length <= #indents.last
}
block
)*
{
def inspect
elements.map{|e| e.block.inspect}*"\n"
end
}
end
rule block
indented_line # The block's opening line
&{|s| # Push the indent level to the stack
level = s[0].indentation.text_value.length
#indents << level
true
}
nested_blocks # Parse any nested blocks
&{|s| # Pop the indent stack
# Note that under no circumstances should "nested_blocks" fail, or the stack will be mis-aligned
#indents.pop
true
}
{
def inspect
indented_line.inspect +
(nested_blocks.elements.size > 0 ? (
"\n{\n" +
nested_blocks.elements.map { |content|
content.block.inspect+"\n"
}*'' +
"}"
)
: "")
end
}
end
rule indented_line
indentation text:((!"\n" .)*) "\n"
{
def inspect
text.text_value
end
}
end
rule indentation
' '*
end
end
Here's a little test driver program so you can try it easily:
require 'polyglot'
require 'treetop'
require 'indented_blocks'
parser = IndentedBlocksParser.new
input = <<END
def foo
here is some indented text
here it's further indented
and here the same
but here it's further again
and some more like that
before going back to here
down again
back twice
and start from the beginning again
with only a small block this time
END
parse_tree = parser.parse input
p parse_tree
I know this is an old thread, but I just wanted to add some PEGjs code to the answers. This code will parse a piece of text and "nest" it into a sort of "AST-ish" structure. It only goes one deep and it looks ugly, furthermore it does not really use the return values to create the right structure but keeps an in-memory tree of your syntax and it will return that at the end. This might well become unwieldy and cause some performance issues, but at least it does what it's supposed to.
Note: Make sure you have tabs instead of spaces!
{
var indentStack = [],
rootScope = {
value: "PROGRAM",
values: [],
scopes: []
};
function addToRootScope(text) {
// Here we wiggle with the form and append the new
// scope to the rootScope.
if (!text) return;
if (indentStack.length === 0) {
rootScope.scopes.unshift({
text: text,
statements: []
});
}
else {
rootScope.scopes[0].statements.push(text);
}
}
}
/* Add some grammar */
start
= lines: (line EOL+)*
{
return rootScope;
}
line
= line: (samedent t:text { addToRootScope(t); }) &EOL
/ line: (indent t:text { addToRootScope(t); }) &EOL
/ line: (dedent t:text { addToRootScope(t); }) &EOL
/ line: [ \t]* &EOL
/ EOF
samedent
= i:[\t]* &{ return i.length === indentStack.length; }
{
console.log("s:", i.length, " level:", indentStack.length);
}
indent
= i:[\t]+ &{ return i.length > indentStack.length; }
{
indentStack.push("");
console.log("i:", i.length, " level:", indentStack.length);
}
dedent
= i:[\t]* &{ return i.length < indentStack.length; }
{
for (var j = 0; j < i.length + 1; j++) {
indentStack.pop();
}
console.log("d:", i.length + 1, " level:", indentStack.length);
}
text
= numbers: number+ { return numbers.join(""); }
/ txt: character+ { return txt.join(""); }
number
= $[0-9]
character
= $[ a-zA-Z->+]
__
= [ ]+
_
= [ ]*
EOF
= !.
EOL
= "\r\n"
/ "\n"
/ "\r"

Resources