I'm trying to read data from the socket and it works fine most of the time.
When I run the app for longer duration - app crashes and crashlytics points the crash to readingSocket() - this function just reads raw data from socket.
Below is code of readingSocket()
-(bool) readingSocket:(NSMutableData*)dataIn readBytes:(ssize_t)quantity error:(NSError **)error {
ssize_t readBytesNow = 0;
ssize_t grossRead= 0;
[dataIn setLength:0];
if (error != nil) {
*error = nil;
}
char *buffer = new char[6144];
do {
ssize_t readBytes = (quantity - grossRead);
readBytesNow = recv((int)raw_Socket, buffer, readBytes , MSG_DONTWAIT);
if (readBytesNow == 0) {
NSLog(#" read error");
delete[] buffer;
return false;
}
Else if (bytesRead < 0) {
if (errno == EAGAIN) {
[NSThread sleepForTimeInterval:0.5f];
NSLog(#" EAGAIN error");
continue;
}
else {
// if error != nil
delete[] buffer;
return false;
}
}
else if (readBytesNow > 0) {
grossRead += readBytesNow;
// doing some operations
}
} while (grossRead < quantity);
delete[] buffer;
return true;
}
I'm already doing so many checks after reading but not sure where could the probable cause for the crash or exception ??
any other better way to handle exception in my above code ?
I can't comment without 50 reputation (new user here), so here goes my comment as an answer.
Warning: I have no idea of the language your code is written, but I'm using my instincts as a C++ programmer (and probably mediocre one at it).
First thing I noticed was this piece of code:
if (error != nil) {
*error = nil;
}
In C world, this would be similar to checking if a pointer is null, but assigning null as its value afterwards.
Second thing to note is this construct:
-(bool) readingSocket:(NSMutableData*)dataIn readBytes:(ssize_t)quantity error:(NSError **)error {
...
char *buffer = new char[6144];
...
ssize_t readBytes = (quantity - grossRead);
When quantity > 6144 i.e. once in a blue moon, your network stack might read more than 6144 bytes which would result in a buffer overflow.
Tangential comments:
1) I think you should note that EAGAIN and EWOULDBLOCK may be the same value but not guaranteed. You might consider checking for both of them if you are not certain that your platform behaves exactly as you think.
An example link to Linux documentation
2) Your logic,
if (readBytesNow == 0) {
...
} Else if (bytesRead < 0) {
...
} else if (readBytesNow > 0) {
...
}
although being verbose, is unnecessary. You can use
if (readBytesNow == 0) {
...
} Else if (bytesRead < 0) {
...
} else {
...
}
to be sure you are not getting an additional comparison. This comparison might get optimised out anyway, but writing this way makes more sense. I had to look again to see "if I am missing something".
Hope these help.
I just replaced Lua with LuaJIT for my project and I got the error saying
Use of undeclared identifier 'lua_len'
How can I change lua_len so it is compatible with Lua 5.1 and LuaJIT?
Here's my code that uses lua_len from the SWIG binding. (In case it helps)
%typemap(in) (int argc, t_atom *argv)
{
if (!lua_istable(L, $input)) {
SWIG_exception(SWIG_RuntimeError, "argument mismatch: table expected");
}
lua_len(L, $input);
$1 = lua_tointeger(L, -1);
if (!$1) {
SWIG_exception(SWIG_RuntimeError, "table is empty");
}
$2 = (t_atom *)getbytes($1 * sizeof(t_atom));
for (int i=0; i<$1; ++i) {
lua_pushinteger(L, i+1);
lua_gettable(L, $input);
if (lua_isnumber(L, -1)) {
$2[i].a_type = A_FLOAT;
$2[i].a_w.w_float = lua_tonumber(L, -1);
}
else if (lua_isstring(L, -1)) {
$2[i].a_type = A_SYMBOL;
$2[i].a_w.w_symbol = gensym(lua_tostring(L, -1));
}
else {
SWIG_exception(SWIG_RuntimeError, "unhandled argument type");
}
}
}
You can backport lua_len to Lua 5.1 by using lua-compat-5.3. If you don't want all of that, you can just use part of it by inlining it into your interface file. In case of lua_len you need
%{
static void lua_len (lua_State *L, int i) {
switch (lua_type(L, i)) {
case LUA_TSTRING:
lua_pushnumber(L, (lua_Number)lua_objlen(L, i));
break;
case LUA_TTABLE:
if (!luaL_callmeta(L, i, "__len"))
lua_pushnumber(L, (lua_Number)lua_objlen(L, i));
break;
case LUA_TUSERDATA:
if (luaL_callmeta(L, i, "__len"))
break;
/* FALLTHROUGH */
default:
luaL_error(L, "attempt to get length of a %s value",
lua_typename(L, lua_type(L, i)));
}
}
%}
I have a requirement to parse data files in "txf" format. The files may contain more than 1000 entries. Since the format is well defined like JSON, I wanted to make a generic parser like JSON, which can serialise and deserialise txf files.
On contrary to JSON, the mark up doesn't have a way to identify an object or an array. If an entry with same tag occurs, we need to consider it as an array.
# Marks the start of an object.
$ Marks the members of an object
/ Marks the end of an object
Following is a sample "txf" file
#Employees
$LastUpdated=2015-02-01 14:01:00
#Employee
$Id=1
$Name=Employee 01
#Departments
$LastUpdated=2015-02-01 14:01:00
#Department
$Id=1
$Name=Department Name
/Department
/Departments
/Employee
#Employee
/Employee
/Employees
I was able to create a generic TXF Parser using NSScanner. But with more entries the performance needs more tweaking.
I wrote the foundation object obtained as plist and compared its performance again the parser I wrote. My parser is around 10 times slower than plist parser.
While plist file size is 5 times more than txf and has more markup characters, I feel that there is a lot of room for optimization.
Any help in that direction is highly appreciated.
EDIT : Including the parsing code
static NSString *const kArray = #"TXFArray";
static NSString *const kBodyText = #"TXFText";
#interface TXFParser ()
/*Temporary variable to hold values of an object*/
#property (nonatomic, strong) NSMutableDictionary *dict;
/*An array to hold the hierarchial data of all nodes encountered while parsing*/
#property (nonatomic, strong) NSMutableArray *stack;
#end
#implementation TXFParser
#pragma mark - Getters
- (NSMutableArray *)stack{
if (!_stack) {
_stack = [NSMutableArray new];
}return _stack;
}
#pragma mark -
- (id)objectFromString:(NSString *)txfString{
[txfString enumerateLinesUsingBlock:^(NSString *string, BOOL *stop) {
if ([string hasPrefix:#"#"]) {
[self didStartParsingTag:[string substringFromIndex:1]];
}else if([string hasPrefix:#"$"]){
[self didFindKeyValuePair:[string substringFromIndex:1]];
}else if([string hasPrefix:#"/"]){
[self didEndParsingTag:[string substringFromIndex:1]];
}else{
//[self didFindBodyValue:string];
}
}]; return self.dict;
}
#pragma mark -
- (void)didStartParsingTag:(NSString *)tag{
[self parserFoundObjectStartForKey:tag];
}
- (void)didFindKeyValuePair:(NSString *)tag{
NSArray *components = [tag componentsSeparatedByString:#"="];
NSString *key = [components firstObject];
NSString *value = [components lastObject];
if (key.length) {
self.dict[key] = value?:#"";
}
}
- (void)didFindBodyValue:(NSString *)bodyString{
if (!bodyString.length) return;
bodyString = [bodyString stringByTrimmingCharactersInSet:[NSCharacterSet illegalCharacterSet]];
if (!bodyString.length) return;
self.dict[kBodyText] = bodyString;
}
- (void)didEndParsingTag:(NSString *)tag{
[self parserFoundObjectEndForKey:tag];
}
#pragma mark -
- (void)parserFoundObjectStartForKey:(NSString *)key{
self.dict = [NSMutableDictionary new];
[self.stack addObject:self.dict];
}
- (void)parserFoundObjectEndForKey:(NSString *)key{
NSDictionary *dict = self.dict;
//Remove the last value of stack
[self.stack removeLastObject];
//Load the previous object as dict
self.dict = [self.stack lastObject];
//The stack has contents, then we need to append objects
if ([self.stack count]) {
[self addObject:dict forKey:key];
}else{
//This is root object,wrap with key and assign output
self.dict = (NSMutableDictionary *)[self wrapObject:dict withKey:key];
}
}
#pragma mark - Add Objects after finding end tag
- (void)addObject:(id)dict forKey:(NSString *)key{
//If there is no value, bailout
if (!dict) return;
//Check if the dict already has a value for key array.
NSMutableArray *array = self.dict[kArray];
//If array key is not found look for another object with same key
if (array) {
//Array found add current object after wrapping with key
NSDictionary *currentDict = [self wrapObject:dict withKey:key];
[array addObject:currentDict];
}else{
id prevObj = self.dict[key];
if (prevObj) {
/*
There is a prev value for the same key. That means we need to wrap that object in a collection.
1. Remove the object from dictionary,
2. Wrap it with its key
3. Add the prev and current value to array
4. Save the array back to dict
*/
[self.dict removeObjectForKey:key];
NSDictionary *prevDict = [self wrapObject:prevObj withKey:key];
NSDictionary *currentDict = [self wrapObject:dict withKey:key];
self.dict[kArray] = [#[prevDict,currentDict] mutableCopy];
}else{
//Simply add object to dict
self.dict[key] = dict;
}
}
}
/*Wraps Object with a key for the serializer to generate txf tag*/
- (NSDictionary *)wrapObject:(id)obj withKey:(NSString *)key{
if (!key ||!obj) {
return #{};
}
return #{key:obj};
}
EDIT 2:
A sample TXF file with more than 1000 entries.
Have you considered using pull-style reads & recursive processing? That would eliminate reading the whole file into memory and also eliminate managing some own stack to keep track how deep you're parsing.
Below an example in Swift. The example works with your sample "txf", but not with the dropbox version; some of your "members" span over multiple lines. If this is a requirement, it can easily be implemented into switch/case "$" section. However, I don't see your own code handling this either. Also, the example doesn't follow the correct Swift error handling yet (the parse method would need an additional NSError parameter)
import Foundation
extension String
{
public func indexOfCharacter(char: Character) -> Int? {
if let idx = find(self, char) {
return distance(self.startIndex, idx)
}
return nil
}
func substringToIndex(index:Int) -> String {
return self.substringToIndex(advance(self.startIndex, index))
}
func substringFromIndex(index:Int) -> String {
return self.substringFromIndex(advance(self.startIndex, index))
}
}
func parse(aStreamReader:StreamReader, parentTagName:String) -> Dictionary<String,AnyObject> {
var dict = Dictionary<String,AnyObject>()
while let line = aStreamReader.nextLine() {
let firstChar = first(line)
let theRest = dropFirst(line)
switch firstChar! {
case "$":
if let idx = theRest.indexOfCharacter("=") {
let key = theRest.substringToIndex(idx)
let value = theRest.substringFromIndex(idx+1)
dict[key] = value
} else {
println("no = sign")
}
case "#":
let subDict = parse(aStreamReader,theRest)
var list = dict[theRest] as? [Dictionary<String,AnyObject>]
if list == nil {
dict[theRest] = [subDict]
} else {
list!.append(subDict)
}
case "/":
if theRest != parentTagName {
println("mismatch... [\(theRest)] != [\(parentTagName)]")
} else {
return dict
}
default:
println("mismatch... [\(line)]")
}
}
println("shouldn't be here...")
return dict
}
var data : Dictionary<String,AnyObject>?
if let aStreamReader = StreamReader(path: "/Users/taoufik/Desktop/QuickParser/QuickParser/file.txf") {
if var line = aStreamReader.nextLine() {
let tagName = line.substringFromIndex(advance(line.startIndex, 1))
data = parse(aStreamReader, tagName)
}
aStreamReader.close()
}
println(JSON(data!))
And the StreamReader was borrowed from https://stackoverflow.com/a/24648951/95976
Edit
see full code https://github.com/tofi9/QuickParser
pull-style line-by-line read in objective-c: How to read data from NSFileHandle line by line?
Edit 2
I rewrote the above in C++11 and got it to run in less than 0.05 seconds (release mode) on a 2012 MBA I5 using the updated file on dropbox. I suspect NSDictionary and NSArray must have some penalty. The code below can be compiled into an objective-c project (file needs have extension .mm):
#include <iostream>
#include <sstream>
#include <string>
#include <fstream>
#include <map>
#include <vector>
using namespace std;
class benchmark {
private:
typedef std::chrono::high_resolution_clock clock;
typedef std::chrono::milliseconds milliseconds;
clock::time_point start;
public:
benchmark(bool startCounting = true) {
if(startCounting)
start = clock::now();
}
void reset() {
start = clock::now();
}
double elapsed() {
milliseconds ms = std::chrono::duration_cast<milliseconds>(clock::now() - start);
double elapsed_secs = ms.count() / 1000.0;
return elapsed_secs;
}
};
struct obj {
map<string,string> properties;
map<string,vector<obj>> subObjects;
};
obj parse(ifstream& stream, string& parentTagName) {
obj obj;
string line;
while (getline(stream, line))
{
auto firstChar = line[0];
auto rest = line.substr(1);
switch (firstChar) {
case '$': {
auto idx = rest.find_first_of('=');
if (idx == -1) {
ostringstream o;
o << "no = sign: " << line;
throw o.str();
}
auto key = rest.substr(0,idx);
auto value = rest.substr(idx+1);
obj.properties[key] = value;
break;
}
case '#': {
auto subObj = parse(stream, rest);
obj.subObjects[rest].push_back(subObj);
break;
}
case '/':
if(rest != parentTagName) {
ostringstream o;
o << "mismatch end of object " << rest << " != " << parentTagName;
throw o.str();
} else {
return obj;
}
break;
default:
ostringstream o;
o << "mismatch line " << line;
throw o.str();
break;
}
}
throw "I don't know why I'm here. Probably because the file is missing an end of object marker";
}
void visualise(obj& obj, int indent = 0) {
for(auto& property : obj.properties) {
cout << string(indent, '\t') << property.first << " = " << property.second << endl;
}
for(auto& subObjects : obj.subObjects) {
for(auto& subObject : subObjects.second) {
cout << string(indent, '\t') << subObjects.first << ": " << endl;
visualise(subObject, indent + 1);
}
}
}
int main(int argc, const char * argv[]) {
try {
obj result;
benchmark b;
ifstream stream("/Users/taoufik/Desktop/QuickParser/QuickParser/Members.txf");
string line;
if (getline(stream, line))
{
string tagName = line.substr(1);
result = parse(stream, tagName);
}
cout << "elapsed " << b.elapsed() << " ms" << endl;
visualise(result);
}catch(string s) {
cout << "error " << s;
}
return 0;
}
Edit 3
See link for full code C++: https://github.com/tofi9/TxfParser
I did some work on your github source - with following 2 changes I got overal improvement of 30% though the major improvement is from "Optimisation 1"
Optimisation 1 - based on your data came with with following work.
+ (int)locate:(NSString*)inString check:(unichar) identifier
{
int ret = -1;
for (int i = 0 ; i < inString.length; i++){
if (identifier == [inString characterAtIndex:i]) {
ret = i;
break;
}
}
return ret;
}
- (void)didFindKeyValuePair:(NSString *)tag{
#if 0
NSArray *components = [tag componentsSeparatedByString:#"="];
NSString *key = [components firstObject];
NSString *value = [components lastObject];
#else
int locate = [TXFParser locate:tag check:'='];
NSString *key = [tag substringToIndex:locate];
NSString *value = [tag substringFromIndex:locate+1];
#endif
if (key.length) {
self.dict[key] = value?:#"";
}
}
Optimisation 2:
- (id)objectFromString:(NSString *)txfString{
[txfString enumerateLinesUsingBlock:^(NSString *string, BOOL *stop) {
#if 0
if ([string hasPrefix:#"#"]) {
[self didStartParsingTag:[string substringFromIndex:1]];
}else if([string hasPrefix:#"$"]){
[self didFindKeyValuePair:[string substringFromIndex:1]];
}else if([string hasPrefix:#"/"]){
[self didEndParsingTag:[string substringFromIndex:1]];
}else{
//[self didFindBodyValue:string];
}
#else
unichar identifier = ([string length]>0)?[string characterAtIndex:0]:0;
if (identifier == '#') {
[self didStartParsingTag:[string substringFromIndex:1]];
}else if(identifier == '$'){
[self didFindKeyValuePair:[string substringFromIndex:1]];
}else if(identifier == '/'){
[self didEndParsingTag:[string substringFromIndex:1]];
}else{
//[self didFindBodyValue:string];
}
#endif
}]; return self.dict;
}
Hope it helps you.
I got a function which should return an intvalue.
- (int)function:(NSString *)input
{
if (input == #"test1")
{
return 0;
}
if (input == #"test2")
{
return 1;
}
if (input == #"test3")
{
return 2;
}
else
{
return 3;
}
}
Here I call the function:
[self function:self.detailItem.type]
The debugger shows input __NSCFString * 0x6b9a0b0 and returns any 29938idontknowvalue.
If I call [self function:#"test1"] everything works fine.
The detailItemis type of TVwhich is a NSManagedObjectwith the attribute type defined as string. Should be a problem with the string-types?
Thank you!
You should compare NSString like this :
if ([input isEqualToString:#"test1"])
{
// Some code here
}
Check that self.detailItem.type is an NSString:
if ([self.detailItem.type isKindOfClass:[NSString class]])
before calling
[self function:self.detailItem.type]
and compare strings like this
[input isEqualToString:#"test1"];
you are currently comparing memory addresses, which in this case will be always different, you should be comparing the strings they contain.
I'm creating a Brainfuck parser (in a BASIC dialect) ultimately to create an interpreter but i've realise it's not as straight forward as i first thought. My problem is that i need a way to accurately parse the matching loop operators within a Brainfuck program. This is an example program:
,>,>++++++++[<------<------>>-]
<<[>[>+>+<<-]>>[<<+>>-]<<<-]
>>>++++++[<++++++++>-],<.>.
'[' = start of loop
']' = end of loop
I need to record the start and end point of each matching loop operator so i can jump around the source as needed. Some loops are alone, some are nested.
What would be the best way to parse this? I was thinking maybe move through the source file creating a 2D array (or such like) recording the start and end positions of each matching operator, but this seems like a lot of 'to'ing and fro'ing' through the source. Is this the best way to do it?
More info: Brainfuck homepage
EDIT: Sample code in any language greatly appreciated.
Have you considered using a Stack data structure to record "jump points" (i.e. the location of the instruction pointer).
So basically, every time you encounter a "[" you push the current location of the instruction pointer on this stack. Whenever you encounter a "]" you reset the instruction pointer to the value that's currently on the top of the stack. When a loop is complete, you pop it off the stack.
Here is an example in C++ with 100 memory cells. The code handles nested loops recursively and although it is not refined it should illustrate the concepts..
char cells[100] = {0}; // define 100 memory cells
char* cell = cells; // set memory pointer to first cell
char* ip = 0; // define variable used as "instruction pointer"
void interpret(static char* program, int* stack, int sp)
{
int tmp;
if(ip == 0) // if the instruction pointer hasn't been initialized
ip = program; // now would be a good time
while(*ip) // this runs for as long as there is valid brainF**k 'code'
{
if(*ip == ',')
*cell = getch();
else if(*ip == '.')
putch(*cell);
else if(*ip == '>')
cell++;
else if(*ip == '<')
cell--;
else if(*ip == '+')
*cell = *cell + 1;
else if(*ip == '-')
*cell = *cell - 1;
else if(*ip == '[')
{
stack[sp+1] = ip - program;
*ip++;
while(*cell != 0)
{
interpret(program, stack, sp + 1);
}
tmp = sp + 1;
while((tmp >= (sp + 1)) || *ip != ']')
{
*ip++;
if(*ip == '[')
stack[++tmp] = ip - program;
else if(*ip == ']')
tmp--;
}
}
else if(*ip == ']')
{
ip = program + stack[sp] + 1;
break;
}
*ip++; // advance instruction
}
}
int _tmain(int argc, _TCHAR* argv[])
{
int stack[100] = {0}; // use a stack of 100 levels, modeled using a simple array
interpret(",>,>++++++++[<------<------>>-]<<[>[>+>+<<-]>>[<<+>>-]<<<-]>>>++++++[<++++++++>-],<.>.", stack, 0);
return 0;
}
EDIT
I just went over the code again and I realized there was a bug in the while loop that would 'skip' parsed loops if the value of the pointer is 0. This is where I made the change:
while((tmp >= (sp + 1)) || *ip != ']') // the bug was tmp > (sp + 1)
{
ip++;
if(*ip == '[')
stack[++tmp] = ip - program;
else if(*ip == ']')
tmp--;
}
Below is an implementation of the same parser but without using recursion:
char cells[100] = {0};
void interpret(static char* program)
{
int cnt; // cnt is a counter that is going to be used
// only when parsing 0-loops
int stack[100] = {0}; // create a stack, 100 levels deep - modeled
// using a simple array - and initialized to 0
int sp = 0; // sp is going to be used as a 'stack pointer'
char* ip = program; // ip is going to be used as instruction pointer
// and it is initialized at the beginning or program
char* cell = cells; // cell is the pointer to the 'current' memory cell
// and as such, it is initialized to the first
// memory cell
while(*ip) // as long as ip point to 'valid code' keep going
{
if(*ip == ',')
*cell = getch();
else if(*ip == '.')
putch(*cell);
else if(*ip == '>')
cell++;
else if(*ip == '<')
cell--;
else if(*ip == '+')
*cell = *cell + 1;
else if(*ip == '-')
*cell = *cell - 1;
else if(*ip == '[')
{
if(stack[sp] != ip - program)
stack[++sp] = ip - program;
*ip++;
if(*cell != 0)
continue;
else
{
cnt = 1;
while((cnt > 0) || *ip != ']')
{
*ip++;
if(*ip == '[')
cnt++;
else if(*ip == ']')
cnt--;
}
sp--;
}
}else if(*ip == ']')
{
ip = program + stack[sp];
continue;
}
*ip++;
}
}
int _tmain(int argc, _TCHAR* argv[])
{
// define our program code here..
char *prg = ",>++++++[<-------->-],[<+>-]<.";
interpret(prg);
return 0;
}
Interesting enough, just a couple days ago, I was writing a brainf*ck interpreter in Java.
One of the issues I was having was that the explanation of the commands at the official page was insufficient, and did not mention the part about nested loops. The Wikipedia page on Brainf*ck has a Commands subsection which describes the correct behavior.
Basically to summarize the problem, the official page says when an instruction is a [ and the current memory location is 0, then jump to the next ]. The correct behavior is to jump to the corresponding ], not the next one.
One way to achieve this behavior is to keep track of the level of nesting. I ended up implementing this by having a counter which kept track of the nesting level.
The following is part of the interpreter's main loop:
do {
if (inst[pc] == '>') { ... }
else if (inst[pc] == '<') { ... }
else if (inst[pc] == '+') { ... }
else if (inst[pc] == '-') { ... }
else if (inst[pc] == '.') { ... }
else if (inst[pc] == ',') { ... }
else if (inst[pc] == '[') {
if (memory[p] == 0) {
int nesting = 0;
while (true) {
++pc;
if (inst[pc] == '[') {
++nesting;
continue;
} else if (nesting > 0 && inst[pc] == ']') {
--nesting;
continue;
} else if (inst[pc] == ']' && nesting == 0) {
break;
}
}
}
}
else if (inst[pc] == ']') {
if (memory[p] != 0) {
int nesting = 0;
while (true) {
--pc;
if (inst[pc] == ']') {
++nesting;
continue;
} else if (nesting > 0 && inst[pc] == '[') {
--nesting;
continue;
} else if (inst[pc] == '[' && nesting == 0) {
break;
}
}
}
}
} while (++pc < inst.length);
Here is the legend for the variable names:
memory -- the memory cells for the data.
p -- pointer to the current memory cell location.
inst -- an array holding the instructions.
pc -- program counter; points to the current instruction.
nesting -- level of the nesting of the current loop. nesting of 0 means that the current location is not in a nested loop.
Basically, when a loop opening [ is encountered, the current memory location is checked to see if the value is 0. If that is the case, a while loop is entered to jump to the corresponding ].
The way the nesting is handled is as follows:
If an [ is encountered while seeking for the corresponding loop closing ], then the nesting variable is incremented by 1 in order to indicate that we have entered a nested loop.
If an ] is encountered, and:
a. If the nesting variable is greater than 0, then the nesting variable is decremented by 1 to indicate that we've left a nested loop.
b. If the nesting variable is 0, then we know that the end of the loop has been encountered, so seeking the end of the loop in the while loop is terminated by executing a break statement.
Now, the next part is to handle the closing of the loop by ]. Similar to the opening of the loop, it will use the nesting counter in order to determine the current nesting level of the loop, and try to find the corresponding loop opening [.
This method may not be the most elegant way to do things, but it seems like it is resource-friendly because it only requires one extra variable to use as a counter for the current nesting level.
(Of course, "resource-friendly" is ignoring the fact that this interpreter was written in Java -- I just wanted to write some quick code and Java just happened to be what I wrote it in.)
The canonical method for parsing a context-free grammar is to use a stack. Anything else and you're working too hard and risking correctness.
You may want to use a parser generator like cup or yacc, as a lot of the dirty work is done for you, but with a language as simple as BF, it may be overkill.
Each time you find a '[', push the current position (or another "marker" token or a "context") on a stack. When you come accross a ']', you're at the end of the loop, and you can pop the marker token from the stack.
Since in BF the '[' already checks for a condition and may need jump past the ']', you may want to have a flag indicating that instructions shall be skipped in the current loop context.
Python 3.0 example of the stack algorithm described by the other posters:
program = """
,>,>++++++++[<------<------>>-]
<<[>[>+>+<<-]>>[<<+>>-]<<<-]
>>>++++++[<++++++++>-],<.>.
"""
def matching_brackets(program):
stack = []
for p, c in enumerate(program, start=1):
if c == '[':
stack.append(p)
elif c == ']':
yield (stack.pop(), p)
print(list(matching_brackets(''.join(program.split()))))
(Well, to be honest, this only finds matching brackets. I don't know brainf*ck, so what to do next, I have no idea.)
And here's the same code I gave as an example earlier in C++, but ported to VB.NET. I decided to post it here since Gary mentioned he was trying to write his parser in a BASIC dialect.
Public cells(100) As Byte
Sub interpret(ByVal prog As String)
Dim program() As Char
program = prog.ToCharArray() ' convert the input program into a Char array
Dim cnt As Integer = 0 ' a counter to be used when skipping over 0-loops
Dim stack(100) As Integer ' a simple array to be used as stack
Dim sp As Integer = 0 ' stack pointer (current stack level)
Dim ip As Integer = 0 ' Instruction pointer (index of current instruction)
Dim cell As Integer = 0 ' index of current memory
While (ip < program.Length) ' loop over the program
If (program(ip) = ",") Then
cells(cell) = CByte(AscW(Console.ReadKey().KeyChar))
ElseIf (program(ip) = ".") Then
Console.Write("{0}", Chr(cells(cell)))
ElseIf (program(ip) = ">") Then
cell = cell + 1
ElseIf (program(ip) = "<") Then
cell = cell - 1
ElseIf (program(ip) = "+") Then
cells(cell) = cells(cell) + 1
ElseIf (program(ip) = "-") Then
cells(cell) = cells(cell) - 1
ElseIf (program(ip) = "[") Then
If (stack(sp) <> ip) Then
sp = sp + 1
stack(sp) = ip
End If
ip = ip + 1
If (cells(cell) <> 0) Then
Continue While
Else
cnt = 1
While ((cnt > 0) Or (program(ip) <> "]"))
ip = ip + 1
If (program(ip) = "[") Then
cnt = cnt + 1
ElseIf (program(ip) = "]") Then
cnt = cnt - 1
End If
End While
sp = sp - 1
End If
ElseIf (program(ip) = "]") Then
ip = stack(sp)
Continue While
End If
ip = ip + 1
End While
End Sub
Sub Main()
' invoke the interpreter
interpret(",>++++++[<-------->-],[<+>-]<.")
End Sub
I don't have sample code, but.
I might try using a stack, along with an algorithm like this:
(executing instruction stream)
Encounter a [
If the pointer == 0, then keep reading until you encounter the ']', and don't execute any instructions until you reach it.. Goto step 1.
If the pointer !=0, then push that position onto a stack.
Continue executing instructions
If you encounter a ]
If pointer==0, pop the [ off of the stack, and proceed (goto step 1)
If pointer != 0, peek at the top of the stack, and go to that position. (goto step 5)
This question is a bit old, but I wanted to say that the answers here helped me decide the route to take when writing my own Brainf**k interpreter. Here's the final product:
#include <stdio.h>
char *S[9999], P[9999], T[9999],
**s=S, *p=P, *t=T, c, x;
int main() {
fread(p, 1, 9999, stdin);
for (; c=*p; ++p) {
if (c == ']') {
if (!x)
if (*t) p = *(s-1);
else --s;
else --x;
} else if (!x) {
if (c == '[')
if (*t) *(s++) = p;
else ++x;
}
if (c == '<') t--;
if (c == '>') t++;
if (c == '+') ++*t;
if (c == '-') --*t;
if (c == ',') *t = getchar();
if (c == '.') putchar(*t);
}
}
}
package interpreter;
import java.awt.event.ActionListener;
import javax.swing.JTextPane;
public class Brainfuck {
final int tapeSize = 0xFFFF;
int tapePointer = 0;
int[] tape = new int[tapeSize];
int inputCounter = 0;
ActionListener onUpdateTape;
public Brainfuck(byte[] input, String code, boolean debugger,
JTextPane output, ActionListener onUpdate) {
onUpdateTape = onUpdate;
if (debugger) {
debuggerBF(input, code, output);
} else {
cleanBF(input, code, output);
}
}
private void debuggerBF(byte[] input, String code, JTextPane output) {
for (int i = 0; i < code.length(); i++) {
onUpdateTape.actionPerformed(null);
switch (code.charAt(i)) {
case '+': {
tape[tapePointer]++;
break;
}
case '-': {
tape[tapePointer]--;
break;
}
case '<': {
tapePointer--;
break;
}
case '>': {
tapePointer++;
break;
}
case '[': {
if (tape[tapePointer] == 0) {
int nesting = 0;
while (true) {
++i;
if (code.charAt(i) == '[') {
++nesting;
continue;
} else if (nesting > 0 && code.charAt(i) == ']') {
--nesting;
continue;
} else if (code.charAt(i) == ']' && nesting == 0) {
break;
}
}
}
break;
}
case ']': {
if (tape[tapePointer] != 0) {
int nesting = 0;
while (true) {
--i;
if (code.charAt(i) == ']') {
++nesting;
continue;
} else if (nesting > 0 && code.charAt(i) == '[') {
--nesting;
continue;
} else if (code.charAt(i) == '[' && nesting == 0) {
break;
}
}
}
break;
}
case '.': {
output.setText(output.getText() + (char) (tape[tapePointer]));
break;
}
case ',': {
tape[tapePointer] = input[inputCounter];
inputCounter++;
break;
}
}
}
}
private void cleanBF(byte[] input, String code, JTextPane output) {
for (int i = 0; i < code.length(); i++) {
onUpdateTape.actionPerformed(null);
switch (code.charAt(i)) {
case '+':{
tape[tapePointer]++;
break;
}
case '-':{
tape[tapePointer]--;
break;
}
case '<':{
tapePointer--;
break;
}
case '>':{
tapePointer++;
break;
}
case '[': {
if (tape[tapePointer] == 0) {
int nesting = 0;
while (true) {
++i;
if (code.charAt(i) == '[') {
++nesting;
continue;
} else if (nesting > 0 && code.charAt(i) == ']') {
--nesting;
continue;
} else if (code.charAt(i) == ']' && nesting == 0) {
break;
}
}
}
break;
}
case ']': {
if (tape[tapePointer] != 0) {
int nesting = 0;
while (true) {
--i;
if (code.charAt(i) == ']') {
++nesting;
continue;
} else if (nesting > 0 && code.charAt(i) == '[') {
--nesting;
continue;
} else if (code.charAt(i) == '[' && nesting == 0) {
break;
}
}
}
break;
}
case '.':{
output.setText(output.getText()+(char)(tape[tapePointer]));
break;
}
case ',':{
tape[tapePointer] = input[inputCounter];
inputCounter++;
break;
}
}
}
}
public int[] getTape() {
return tape;
}
public void setTape(int[] tape) {
this.tape = tape;
}
public void editTapeValue(int counter, int value) {
this.tape[counter] = value;
}
}
This should work. You need to modify it somewhat.
That is actually standard example how brainfuck interpreter works. I modified it to use in my app, brackets are handled there:
case '[': {
if (tape[tapePointer] == 0) {
int nesting = 0;
while (true) {
++i;
if (code.charAt(i) == '[') {
++nesting;
continue;
}
else if (nesting > 0 && code.charAt(i) == ']') {
--nesting;
continue;
}
else if (code.charAt(i) == ']' && nesting == 0) {
break;
}
}
}
break;
}
case ']': {
if (tape[tapePointer] != 0) {
int nesting = 0;
while (true) {
--i;
if (code.charAt(i) == ']') {
++nesting;
continue;
}
else if (nesting > 0 && code.charAt(i) == '[') {
--nesting;
continue;
}
else if (code.charAt(i) == '[' && nesting == 0) {
break;
}
}
}
break;
}
It looks like this question has become a "post your bf interpreter" poll.
So here's mine that I just got working:
#include <assert.h>
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#include <sys/mman.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <fcntl.h>
#include <unistd.h>
void error(char *msg) {
fprintf(stderr, "Error: %s\n", msg);
}
enum { MEMSIZE = 30000 };
char *mem;
char *ptr;
char *prog;
size_t progsize;
int init(char *progname) {
int f,r;
struct stat fs;
ptr = mem = calloc(MEMSIZE, 1);
f = open(progname, O_RDONLY);
assert(f != -1);
r = fstat(f, &fs);
assert(r == 0);
prog = mmap(NULL, progsize = fs.st_size, PROT_READ, MAP_PRIVATE, f, 0);
assert(prog != NULL);
return 0;
}
int findmatch(int ip, char src){
char *p="[]";
int dir[]= { 1, -1 };
int i;
int defer;
i = strchr(p,src)-p;
ip+=dir[i];
for (defer=dir[i]; defer!=0; ip+=dir[i]) {
if (ip<0||ip>=progsize) error("mismatch");
char *q = strchr(p,prog[ip]);
if (q) {
int j = q-p;
defer+=dir[j];
}
}
return ip;
}
int run() {
int ip;
for(ip = 0; ip>=0 && ip<progsize; ip++)
switch(prog[ip]){
case '>': ++ptr; break;
case '<': --ptr; break;
case '+': ++*ptr; break;
case '-': --*ptr; break;
case '.': putchar(*ptr); break;
case ',': *ptr=getchar(); break;
case '[': /*while(*ptr){*/
if (!*ptr) ip=findmatch(ip,'[');
break;
case ']': /*}*/
if (*ptr) ip=findmatch(ip,']');
break;
}
return 0;
}
int cleanup() {
free(mem);
ptr = NULL;
return 0;
}
int main(int argc, char *argv[]) {
init(argc > 1? argv[1]: NULL);
run();
cleanup();
return 0;
}