Parsing an url in c - parsing

I'm trying to understand how the following code works, Im trying to parse an URL to get protocol, hostname and port:
void parse_url(char* url, char** hostname, char** port, char** path) {
//url in this example http://www.example.com:1234/res/page1.php?user=bob#account
char* p;
p = strstr(url, "://");
char* protocol = 0;
if (p) {
protocol = url;
*p = 0; //How does this statment make protocal = 'http'?
p += 3;
}
else {
p = url;
}
*hostname = p;
while (*p && *p != ':' && *p != '/' && *p != '#') ++p;
//as above am I corect that p will include everything up until the condition becomes false which in the case when it reaches ':'?
*port = "80";
if (*p == ':') {
*p++ = 0;
*port = p;
}
while (*p && *p != '/' && *p != '#') ++p;
printf("hostname: %s\n", *hostname);
printf("port: %s\n", *port);
part of the confusion is the way the while is is also used for example
while(condtion){
//do something
}
this is the format Im used too however using pointers to change value is previous statments confuses me a little.

Related

CS50 pset5 Speller [2022] - " :( program is free of memory errors"

I get error ":( program is free of memory errors valgrind tests failed; see log for more information."
Here is my code:
// Implements a dictionary's functionality
#include <ctype.h>
#include <stdbool.h>
#include <string.h>
#include <strings.h>
#include <stdlib.h>
#include <stdio.h>
#include "dictionary.h"
// Represents a node in a hash table
typedef struct node
{
char word[LENGTH + 1];
struct node *next;
}
node;
// TODO: Choose number of buckets in hash table
const unsigned int N = 26;
// Hash table
node *table[N];
//Declare variables
unsigned int word_count;
unsigned int hash_value;
// Returns true if word is in dictionary, else false
bool check(const char *word)
{
// TODO
hash_value = hash(word);
node *cursor = table[hash_value];
// Go in link list
while (cursor != 0)
{
if (strcasecmp(word, cursor->word) == 0)
{
return true;
}
cursor = cursor->next;
}
return false;
}
// Hashes word to a number
unsigned int hash(const char *word)
{
// TODO: Improve this hash function
unsigned long total = 0;
for (int i = 0; i < strlen(word); i++)
{
total += tolower(word[i]);
}
return total % N;
}
// Loads dictionary into memory, returning true if successful, else false
bool load(const char *dictionary)
{
// Open dictionary
FILE *file = fopen(dictionary, "r");
// it would be null if cant be open
if (file == NULL)
{
printf("Unable to open %s\n", dictionary);
return false;
}
// Declare variable words
char word[LENGTH + 1];
//Scan dictionary for strings up until EOF
while (fscanf(file, "%s", word) != EOF)
{
node *n = malloc(sizeof(node));
if (n == NULL)
{
return false;
}
//copy wordds into node
strcpy(n->word, word);
hash_value = hash(word);
n->next = table[hash_value];
table[hash_value] = n;
word_count++;
}
fclose(file);
return true;
}
// Returns number of words in dictionary if loaded, else 0 if not yet loaded
unsigned int size(void)
{
if (word_count > 0)
{
return word_count;
}
return 0;
}
// Unloads dictionary from memory, returning true if successful, else false
bool unload(void)
{
for (int i = 0; i < N; i++)
{
node *cursor = table[i];
while (cursor)
{
node *tmp = cursor;
cursor = cursor->next;
free(tmp);
}
if (cursor == NULL)
{
return true;
}
}
return false;
}
Here are the errors in valgrind check50:
program is free of memory errors valgrind tests failed; see log for more information.
Here is ERR log:
56 bytes in 1 blocks are still reachable in loss record 1 of 1: (file: dictionary.c, line: 80)
And 80th line code is:
while (fscanf(file, "%s", word) != EOF)
{
node *n = malloc(sizeof(node));
if (n == NULL)
{
return false;
}
unload will free one index and return to speller because of this if (cursor == NULL) block. The last node in an index should set cursor to NULL, so function is done. That conditional should be eliminated. There is really no condition in unload that should return false.

wxWidgets serial Commuication

I have absolutely no experience in programming serial communication and since I'm stuck with my code I'd really appreciate your help! Thank you already!
So now to my problem:
I got a generator on which are several different sensors who communicate over CAN with a microcontroller. This mc itself communicates with a device from USBTin again over CAN. On the USBTin, a little board, is mainly a CAN controller and a microcontroller which are precoded from its developer.
So my task now is to open my COM Port, send the right messages to the USBTin (those are "S5" for the baudrate and 'O' for Open CAN) and then receive the data.
First of all the functions and my problem:
The problem is that in my output textfield stands something like "PPPPPPPPPP,Râö". There are always these 10 P's and some random characters. I have no idea where the P's or these additional "Râö" comes from. The actual output string shoud be something like "T1E18001F8". I tested that with hTerm, which is a terminal programm for serial communication.
OPEN:
long Serial::Open()
{
if (IsOpened()) return 0;
#ifdef UNICODE
wstring wtext(port.begin(),port.end());
#else
string wtext = port;
#endif
hComm = CreateFile(wtext.c_str(),
GENERIC_READ | GENERIC_WRITE,
0,
0,
OPEN_EXISTING,
FILE_ATTRIBUTE_NORMAL | FILE_FLAG_OVERLAPPED,
0);
if (hComm == INVALID_HANDLE_VALUE) {return 1;}
if (PurgeComm(hComm, PURGE_TXABORT | PURGE_RXABORT | PURGE_TXCLEAR |
PURGE_RXCLEAR) == 0) {return 2;}//purge
//get initial state
DCB dcbOri;
bool fSuccess;
fSuccess = GetCommState(hComm, &dcbOri);
if (!fSuccess) {return 3;}
DCB dcb1 = dcbOri;
dcb1.BaudRate = baud;
if (parity == 'E') dcb1.Parity = EVENPARITY;
else if (parity == 'O') dcb1.Parity = ODDPARITY;
else if (parity == 'M') dcb1.Parity = MARKPARITY;
else if (parity == 'S') dcb1.Parity = SPACEPARITY;
else if (parity == 'N') dcb1.Parity = NOPARITY;
dcb1.ByteSize = (BYTE)dsize;
if(stopbits==2) dcb1.StopBits = TWOSTOPBITS;
else if (stopbits == 1.5) dcb1.StopBits = ONE5STOPBITS;
else if (stopbits == 1) dcb1.StopBits = ONE5STOPBITS;
dcb1.fOutxCtsFlow = false;
dcb1.fOutxDsrFlow = false;
dcb1.fOutX = false;
dcb1.fDtrControl = DTR_CONTROL_DISABLE;
dcb1.fRtsControl = RTS_CONTROL_DISABLE;
fSuccess = SetCommState(hComm, &dcb1);
delay(60);
if (!fSuccess) {return 4;}
fSuccess = GetCommState(hComm, &dcb1);
if (!fSuccess) {return 5;}
osReader = { 0 };// Create the overlapped event. Must be closed before
exiting to avoid a handle leak.
osReader.hEvent = CreateEvent(NULL, TRUE, FALSE, NULL);
if (osReader.hEvent == NULL) {return 6;}// Error creating overlapped event;
abort.
fWaitingOnRead = FALSE;
osWrite = { 0 };
osWrite.hEvent = CreateEvent(NULL, TRUE, FALSE, NULL);
if (osWrite.hEvent == NULL) {return 7;}
if (!GetCommTimeouts(hComm, &timeouts_ori)) { return 8; } // Error getting
time-outs.
COMMTIMEOUTS timeouts;
timeouts.ReadIntervalTimeout = 20;
timeouts.ReadTotalTimeoutMultiplier = 15;
timeouts.ReadTotalTimeoutConstant = 100;
timeouts.WriteTotalTimeoutMultiplier = 15;
timeouts.WriteTotalTimeoutConstant = 100;
if (!SetCommTimeouts(hComm, &timeouts)) { return 9;} // Error setting time-
outs.
return 0;
}
WRITE:
bool Serial::Write(char *data)
{
if (!IsOpened()) {
return false;
}
BOOL fRes;
DWORD dwWritten;
long n = strlen(data);
if (n < 0) n = 0;
else if(n > 1024) n = 1024;
// Issue write.
if (!WriteFile(hComm, data, n, &dwWritten, &osWrite)) {
if (GetLastError() != ERROR_IO_PENDING) {fRes = FALSE;}// WriteFile
failed, but it isn't delayed. Report error and abort.
else {// Write is pending.
if (!GetOverlappedResult(hComm, &osWrite, &dwWritten, TRUE))
fRes = FALSE;
else fRes = TRUE;// Write operation completed successfully.
}
}
else fRes = TRUE;// WriteFile completed immediately.
return fRes;
}
READCHAR:
char Serial::ReadChar(bool& success)
{
success = false;
if (!IsOpened()) {return 0;}
DWORD dwRead;
DWORD length=1;
BYTE* data = (BYTE*)(&rxchar);
//the creation of the overlapped read operation
if (!fWaitingOnRead) {
// Issue read operation.
if (!ReadFile(hComm, data, length, &dwRead, &osReader)) {
if (GetLastError() != ERROR_IO_PENDING) { /*Error*/}
else { fWaitingOnRead = TRUE; /*Waiting*/}
}
else {if(dwRead==length) success = true;}//success
}
//detection of the completion of an overlapped read operation
DWORD dwRes;
if (fWaitingOnRead) {
dwRes = WaitForSingleObject(osReader.hEvent, READ_TIMEOUT);
switch (dwRes)
{
// Read completed.
case WAIT_OBJECT_0:
if (!GetOverlappedResult(hComm, &osReader, &dwRead, FALSE))
{/*Error*/ }
else {
if (dwRead == length) success = true;
fWaitingOnRead = FALSE;// Reset flag so that another
opertion
can be issued.
}// Read completed successfully.
break;
case WAIT_TIMEOUT:
// Operation isn't complete yet.
break;
default:
// Error in the WaitForSingleObject;
break;
}
}
return rxchar;
}
And Finally the excerpt of the main in wxWidgets to display the received data:
void GUI_1_2Frame::OnConnectButtonClick(wxCommandEvent& (event))
{
char tempString[10] = {0};
bool ReadChar_success = true;
char temp_Char;
/* Preset Serial Port setting */
Serial com(com_x, 115200, 8, NOPARITY, 1);
char* buffer;
if(connection_flag)
{
/* Port was connected, Disconnect Button unsed*/
com.Close();
wxMessageBox(_("Port closed"),_("Info!"),wxICON_INFORMATION);
connection_flag = 0;
ConnectButton->SetLabel("Connect");
TextCtrl1->SetValue("");
}
else
{
/* If open() == true -> INVALID HANDLE */
if(com.Open())
{
wxMessageBox(_("Port not available"),_("ERROR!"),wxICON_ERROR);
}
else /* Port Opened */
{
TextCtrl1->SetValue(com.GetPort());
ConnectButton->SetLabel("Disconnect");
connection_flag = 1;
}
if(com.Write("S5"))
{
TextCtrl1->SetValue("Baudrate sent!\n");
delay(100);
if(com.WriteChar('O'))
{
TextCtrl1->SetValue("Baudrate & Open Command sent!");
int i =0;
while(i<10)
{
temp_Char = com.ReadChar(ReadChar_success);
tempString[i] = temp_Char;
i++;
}
com.WriteChar('C');
com.Close();
//com.readSerialPort(data, MAX_DATA_LENGTH);
TextCtrl2->SetValue(tempString);
//wxMessageOutput::Get()->Printf("%s", tempString);
}
else
{
TextCtrl1->SetValue("Open Command Error!"); }
}
else
{
TextCtrl1->SetValue("Error!");
}
}
}
Since I am not native speaking englisch I say sorry for my language mistakes.
Thank everybody a lot and again I really appreciate every single hint!
Greetings,
MSol

Memory Leak in C and C++ Code

I am trying to return a pointer from a function and use the return in a different function but I am getting memory leak.
The test code which I wrote and detected with memory leak by CPPCheck.
########################################################################
# include < stdio.h >
# include < malloc.h >
# include < string.h >
char* replace ( char* st, char* word, char *replaceWith );
int main ( void )
{
char str[] = "Hello how are ## and what are ## doing ?";
char word[]="##";
char replaceWith[]="you";
printf("%s",replace(str,word,replaceWith));
getchar();
return 0;
}
char* replace(char* st,char* word,char *replaceWith)
{
int i = 0;
char *sr,*s,*ret;
int oldlen;
int count = 0;
int newlen;
int stlen;
s=(char *)malloc(strlen(st) + 1);
strcpy(s, st);
oldlen=strlen(word);
newlen=strlen(replaceWith);
for (i = 0; s[i]! = '\0'; )
{
if( memcmp( &s[i], word, oldlen ) == 0)
{
count++;
i+=oldlen;
}
else
{
i++;
}
}
sr= (char *) malloc (i+1+count*(newlen-oldlen));
ret = (char *) malloc (i+1+count*(newlen-oldlen));
ret=sr;
while(*s)
{
if(memcmp( s, word, oldlen) == 0)
{
memcpy(sr, replaceWith, newlen);
s+ = oldlen;
sr+ = newlen;
}
else
{
*sr++ = *s++;
}
}
*sr = '\0';
return ret;
}
Try this
#include<stdio.h>
#include<malloc.h>
#include<string.h>
char* replace ( char* st, char* word, char *replaceWith );
int main ( void )
{
char str[] = "Hello how are ## and what are ## doing ?";
char word[]="##";
char replaceWith[]="you";
char * ret = replace(str,word,replaceWith);
printf("%s",ret);
free(ret); //freeing the allocated memory
getchar();
return 0;
}
char* replace(char* st,char* word,char *replaceWith)
{
int i = 0;
char *sr,*s,*ret, *temps;
int oldlen;
int count = 0;
int newlen;
int stlen;
s=(char *)malloc(strlen(st) + 1);
temps = s; // storing the address of s in a temp location
strcpy(s, st);
oldlen=strlen(word);
newlen=strlen(replaceWith);
for (i = 0; s[i]!= '\0';)
{
if( memcmp( &s[i], word, oldlen ) == 0)
{
count++;
i+=oldlen;
}
else
{
i++;
}
}
sr= (char *) malloc (i+1+count*(newlen-oldlen));
ret=sr;
while(*s)
{
if(memcmp( s, word, oldlen) == 0)
{
memcpy(sr, replaceWith, newlen);
s += oldlen;
sr += newlen;
}
else
{
*sr++ = *s++;
}
}
*sr = '\0';
free(temps); // freeing the memory allocated for s
return ret;
}
Always free same count with malloc.
free s, sr at end of replace,
use return value of replace instead of direct use on printf
and free return value (return of ret from replace) when not needed.
I have doing lots of experimenting with the memory leak and meanwhile I wrote the following code. Please comment about the pros and cons side of it.
#include <stdio.h>
#include <string.h>
#include <malloc.h>
// Prototype declaration of replaceAll function
static char* replaceAll(char *pSource, char *pWord, char*pWith);
/////////////////////////////////////////////////////////////////////////////
//
// NAME : main
//
// DESCRIPTION : Implementation of main which invokes the replaceAll
// function and displays the output
//
// PARAMETERS : void
//
// RETURNED VALUE : int
//
/////////////////////////////////////////////////////////////////////////////
int main( void )
{
char *finalString = NULL; // To save the base returned address
char srcString[] = "Hello how r you"; // Actual String
char pWord[] = "r"; // Word to be replaced
char pWith[] = "are"; // Word to be replaced with
printf("\n Before Calling the replaceAll function:");
printf("%s",srcString);
printf("\n");
finalString = replaceAll(srcString, pWord, pWith); //calling the replaceAll function
printf("\n After Calling the replaceAll function:");
// Checking if NULL is returned
if( finalString != NULL )
{
//printing the string
printf("%s", finalString);
}
else
{
printf("\n Error: Blank String returned ");
}
return 0;
}
/////////////////////////////////////////////////////////////////////////////
//
// NAME : replaceAll
//
// DESCRIPTION : Implementation of replaceAll function which replaces
// a word in given string with another word
//
// PARAMETERS : char *
//
// RETURNED VALUE : char *
//
/////////////////////////////////////////////////////////////////////////////
static char* replaceAll(char *pSource, char *pWord, char*pWith)
{
char *pSt = NULL; // Pointer to the source String to avoid modifying the pSource
char *pTarget = NULL; // Target pointer to be malloced
char *pTg = NULL; // Pointer to the target string
int count; // Counter
int nWord = strlen (pWord); // length of the word which needs to be replaced
int nWith = strlen (pWith); // length of the word with which the word needs to be replaced
static const char nullP = '\0'; // null character
int szTarget = 0;
// Assigning the base address of the pSource to a temporary and iterate through
for ( pSt = pSource, count = 0; *pSt != nullP; pSt++ )
{
// Count number of occurances of the Word in the String to calculate the length of the final string
if( memcmp( pSt, pWord, nWord ) == 0)
{
count++;
pSt += nWord-1;
}
}
// Calculate the required target Size
szTarget = strlen (pSource) + count * (nWith - nWord) + sizeof (nullP);
// Allocate memory for the target string
pTarget = (char *)malloc(szTarget);
// Check if the malloc function returns sucessfully
if ( pTarget != NULL)
{
// Copying the string with replacement
for (pTg = pTarget, pSt = pSource; *pSt != nullP; )
{
if( memcmp (pSt, pWord, nWord) == 0)
{
memcpy (pTg,pWith,nWith);
pSt += nWord;
pTg += nWith;
}
else
{
*pTg++ = *pSt++;
}
}
// Assigning NULL Character to the target string after copying
*pTg = '\0';
}
return pTarget;
}

can anyone explain what this opencv c++ code means

string getFilename(string s) {
char sep = '/';
char sepExt='.';
#ifdef _WIN32
sep = '\\';
#endif
size_t i = s.rfind(sep, s.length( ));
if (i != string::npos) {
string fn= (s.substr(i+1, s.length( ) - i));
size_t j = fn.rfind(sepExt, fn.length( ));
if (i != string::npos) {
return fn.substr(0,j);
}else{
return fn;
}
}else{
return "";
}
}
a=getFilename(filename); // filename is an image
It looks like It extracts file's name without it's extension and a path to it:
"/home/user/Documents/someimage.jpg" -> "someimage"
size_t i = s.rfind(sep, s.length( )); // find location of the "/"
if (i != string::npos) {
string fn= (s.substr(i+1, s.length( ) - i)); // extract filename with extension -> "someimage.jpg"
size_t j = fn.rfind(sepExt, fn.length( )); // find location of the extension by looking for "."
if (i != string::npos) {
return fn.substr(0,j); // extract filename -> "someimage"
}else{
return fn;
}
}else{
return "";
}

Creating a Brainfuck parser, whats the best method of parsing loop operators?

I'm creating a Brainfuck parser (in a BASIC dialect) ultimately to create an interpreter but i've realise it's not as straight forward as i first thought. My problem is that i need a way to accurately parse the matching loop operators within a Brainfuck program. This is an example program:
,>,>++++++++[<------<------>>-]
<<[>[>+>+<<-]>>[<<+>>-]<<<-]
>>>++++++[<++++++++>-],<.>.
'[' = start of loop
']' = end of loop
I need to record the start and end point of each matching loop operator so i can jump around the source as needed. Some loops are alone, some are nested.
What would be the best way to parse this? I was thinking maybe move through the source file creating a 2D array (or such like) recording the start and end positions of each matching operator, but this seems like a lot of 'to'ing and fro'ing' through the source. Is this the best way to do it?
More info: Brainfuck homepage
EDIT: Sample code in any language greatly appreciated.
Have you considered using a Stack data structure to record "jump points" (i.e. the location of the instruction pointer).
So basically, every time you encounter a "[" you push the current location of the instruction pointer on this stack. Whenever you encounter a "]" you reset the instruction pointer to the value that's currently on the top of the stack. When a loop is complete, you pop it off the stack.
Here is an example in C++ with 100 memory cells. The code handles nested loops recursively and although it is not refined it should illustrate the concepts..
char cells[100] = {0}; // define 100 memory cells
char* cell = cells; // set memory pointer to first cell
char* ip = 0; // define variable used as "instruction pointer"
void interpret(static char* program, int* stack, int sp)
{
int tmp;
if(ip == 0) // if the instruction pointer hasn't been initialized
ip = program; // now would be a good time
while(*ip) // this runs for as long as there is valid brainF**k 'code'
{
if(*ip == ',')
*cell = getch();
else if(*ip == '.')
putch(*cell);
else if(*ip == '>')
cell++;
else if(*ip == '<')
cell--;
else if(*ip == '+')
*cell = *cell + 1;
else if(*ip == '-')
*cell = *cell - 1;
else if(*ip == '[')
{
stack[sp+1] = ip - program;
*ip++;
while(*cell != 0)
{
interpret(program, stack, sp + 1);
}
tmp = sp + 1;
while((tmp >= (sp + 1)) || *ip != ']')
{
*ip++;
if(*ip == '[')
stack[++tmp] = ip - program;
else if(*ip == ']')
tmp--;
}
}
else if(*ip == ']')
{
ip = program + stack[sp] + 1;
break;
}
*ip++; // advance instruction
}
}
int _tmain(int argc, _TCHAR* argv[])
{
int stack[100] = {0}; // use a stack of 100 levels, modeled using a simple array
interpret(",>,>++++++++[<------<------>>-]<<[>[>+>+<<-]>>[<<+>>-]<<<-]>>>++++++[<++++++++>-],<.>.", stack, 0);
return 0;
}
EDIT
I just went over the code again and I realized there was a bug in the while loop that would 'skip' parsed loops if the value of the pointer is 0. This is where I made the change:
while((tmp >= (sp + 1)) || *ip != ']') // the bug was tmp > (sp + 1)
{
ip++;
if(*ip == '[')
stack[++tmp] = ip - program;
else if(*ip == ']')
tmp--;
}
Below is an implementation of the same parser but without using recursion:
char cells[100] = {0};
void interpret(static char* program)
{
int cnt; // cnt is a counter that is going to be used
// only when parsing 0-loops
int stack[100] = {0}; // create a stack, 100 levels deep - modeled
// using a simple array - and initialized to 0
int sp = 0; // sp is going to be used as a 'stack pointer'
char* ip = program; // ip is going to be used as instruction pointer
// and it is initialized at the beginning or program
char* cell = cells; // cell is the pointer to the 'current' memory cell
// and as such, it is initialized to the first
// memory cell
while(*ip) // as long as ip point to 'valid code' keep going
{
if(*ip == ',')
*cell = getch();
else if(*ip == '.')
putch(*cell);
else if(*ip == '>')
cell++;
else if(*ip == '<')
cell--;
else if(*ip == '+')
*cell = *cell + 1;
else if(*ip == '-')
*cell = *cell - 1;
else if(*ip == '[')
{
if(stack[sp] != ip - program)
stack[++sp] = ip - program;
*ip++;
if(*cell != 0)
continue;
else
{
cnt = 1;
while((cnt > 0) || *ip != ']')
{
*ip++;
if(*ip == '[')
cnt++;
else if(*ip == ']')
cnt--;
}
sp--;
}
}else if(*ip == ']')
{
ip = program + stack[sp];
continue;
}
*ip++;
}
}
int _tmain(int argc, _TCHAR* argv[])
{
// define our program code here..
char *prg = ",>++++++[<-------->-],[<+>-]<.";
interpret(prg);
return 0;
}
Interesting enough, just a couple days ago, I was writing a brainf*ck interpreter in Java.
One of the issues I was having was that the explanation of the commands at the official page was insufficient, and did not mention the part about nested loops. The Wikipedia page on Brainf*ck has a Commands subsection which describes the correct behavior.
Basically to summarize the problem, the official page says when an instruction is a [ and the current memory location is 0, then jump to the next ]. The correct behavior is to jump to the corresponding ], not the next one.
One way to achieve this behavior is to keep track of the level of nesting. I ended up implementing this by having a counter which kept track of the nesting level.
The following is part of the interpreter's main loop:
do {
if (inst[pc] == '>') { ... }
else if (inst[pc] == '<') { ... }
else if (inst[pc] == '+') { ... }
else if (inst[pc] == '-') { ... }
else if (inst[pc] == '.') { ... }
else if (inst[pc] == ',') { ... }
else if (inst[pc] == '[') {
if (memory[p] == 0) {
int nesting = 0;
while (true) {
++pc;
if (inst[pc] == '[') {
++nesting;
continue;
} else if (nesting > 0 && inst[pc] == ']') {
--nesting;
continue;
} else if (inst[pc] == ']' && nesting == 0) {
break;
}
}
}
}
else if (inst[pc] == ']') {
if (memory[p] != 0) {
int nesting = 0;
while (true) {
--pc;
if (inst[pc] == ']') {
++nesting;
continue;
} else if (nesting > 0 && inst[pc] == '[') {
--nesting;
continue;
} else if (inst[pc] == '[' && nesting == 0) {
break;
}
}
}
}
} while (++pc < inst.length);
Here is the legend for the variable names:
memory -- the memory cells for the data.
p -- pointer to the current memory cell location.
inst -- an array holding the instructions.
pc -- program counter; points to the current instruction.
nesting -- level of the nesting of the current loop. nesting of 0 means that the current location is not in a nested loop.
Basically, when a loop opening [ is encountered, the current memory location is checked to see if the value is 0. If that is the case, a while loop is entered to jump to the corresponding ].
The way the nesting is handled is as follows:
If an [ is encountered while seeking for the corresponding loop closing ], then the nesting variable is incremented by 1 in order to indicate that we have entered a nested loop.
If an ] is encountered, and:
a. If the nesting variable is greater than 0, then the nesting variable is decremented by 1 to indicate that we've left a nested loop.
b. If the nesting variable is 0, then we know that the end of the loop has been encountered, so seeking the end of the loop in the while loop is terminated by executing a break statement.
Now, the next part is to handle the closing of the loop by ]. Similar to the opening of the loop, it will use the nesting counter in order to determine the current nesting level of the loop, and try to find the corresponding loop opening [.
This method may not be the most elegant way to do things, but it seems like it is resource-friendly because it only requires one extra variable to use as a counter for the current nesting level.
(Of course, "resource-friendly" is ignoring the fact that this interpreter was written in Java -- I just wanted to write some quick code and Java just happened to be what I wrote it in.)
The canonical method for parsing a context-free grammar is to use a stack. Anything else and you're working too hard and risking correctness.
You may want to use a parser generator like cup or yacc, as a lot of the dirty work is done for you, but with a language as simple as BF, it may be overkill.
Each time you find a '[', push the current position (or another "marker" token or a "context") on a stack. When you come accross a ']', you're at the end of the loop, and you can pop the marker token from the stack.
Since in BF the '[' already checks for a condition and may need jump past the ']', you may want to have a flag indicating that instructions shall be skipped in the current loop context.
Python 3.0 example of the stack algorithm described by the other posters:
program = """
,>,>++++++++[<------<------>>-]
<<[>[>+>+<<-]>>[<<+>>-]<<<-]
>>>++++++[<++++++++>-],<.>.
"""
def matching_brackets(program):
stack = []
for p, c in enumerate(program, start=1):
if c == '[':
stack.append(p)
elif c == ']':
yield (stack.pop(), p)
print(list(matching_brackets(''.join(program.split()))))
(Well, to be honest, this only finds matching brackets. I don't know brainf*ck, so what to do next, I have no idea.)
And here's the same code I gave as an example earlier in C++, but ported to VB.NET. I decided to post it here since Gary mentioned he was trying to write his parser in a BASIC dialect.
Public cells(100) As Byte
Sub interpret(ByVal prog As String)
Dim program() As Char
program = prog.ToCharArray() ' convert the input program into a Char array
Dim cnt As Integer = 0 ' a counter to be used when skipping over 0-loops
Dim stack(100) As Integer ' a simple array to be used as stack
Dim sp As Integer = 0 ' stack pointer (current stack level)
Dim ip As Integer = 0 ' Instruction pointer (index of current instruction)
Dim cell As Integer = 0 ' index of current memory
While (ip < program.Length) ' loop over the program
If (program(ip) = ",") Then
cells(cell) = CByte(AscW(Console.ReadKey().KeyChar))
ElseIf (program(ip) = ".") Then
Console.Write("{0}", Chr(cells(cell)))
ElseIf (program(ip) = ">") Then
cell = cell + 1
ElseIf (program(ip) = "<") Then
cell = cell - 1
ElseIf (program(ip) = "+") Then
cells(cell) = cells(cell) + 1
ElseIf (program(ip) = "-") Then
cells(cell) = cells(cell) - 1
ElseIf (program(ip) = "[") Then
If (stack(sp) <> ip) Then
sp = sp + 1
stack(sp) = ip
End If
ip = ip + 1
If (cells(cell) <> 0) Then
Continue While
Else
cnt = 1
While ((cnt > 0) Or (program(ip) <> "]"))
ip = ip + 1
If (program(ip) = "[") Then
cnt = cnt + 1
ElseIf (program(ip) = "]") Then
cnt = cnt - 1
End If
End While
sp = sp - 1
End If
ElseIf (program(ip) = "]") Then
ip = stack(sp)
Continue While
End If
ip = ip + 1
End While
End Sub
Sub Main()
' invoke the interpreter
interpret(",>++++++[<-------->-],[<+>-]<.")
End Sub
I don't have sample code, but.
I might try using a stack, along with an algorithm like this:
(executing instruction stream)
Encounter a [
If the pointer == 0, then keep reading until you encounter the ']', and don't execute any instructions until you reach it.. Goto step 1.
If the pointer !=0, then push that position onto a stack.
Continue executing instructions
If you encounter a ]
If pointer==0, pop the [ off of the stack, and proceed (goto step 1)
If pointer != 0, peek at the top of the stack, and go to that position. (goto step 5)
This question is a bit old, but I wanted to say that the answers here helped me decide the route to take when writing my own Brainf**k interpreter. Here's the final product:
#include <stdio.h>
char *S[9999], P[9999], T[9999],
**s=S, *p=P, *t=T, c, x;
int main() {
fread(p, 1, 9999, stdin);
for (; c=*p; ++p) {
if (c == ']') {
if (!x)
if (*t) p = *(s-1);
else --s;
else --x;
} else if (!x) {
if (c == '[')
if (*t) *(s++) = p;
else ++x;
}
if (c == '<') t--;
if (c == '>') t++;
if (c == '+') ++*t;
if (c == '-') --*t;
if (c == ',') *t = getchar();
if (c == '.') putchar(*t);
}
}
}
package interpreter;
import java.awt.event.ActionListener;
import javax.swing.JTextPane;
public class Brainfuck {
final int tapeSize = 0xFFFF;
int tapePointer = 0;
int[] tape = new int[tapeSize];
int inputCounter = 0;
ActionListener onUpdateTape;
public Brainfuck(byte[] input, String code, boolean debugger,
JTextPane output, ActionListener onUpdate) {
onUpdateTape = onUpdate;
if (debugger) {
debuggerBF(input, code, output);
} else {
cleanBF(input, code, output);
}
}
private void debuggerBF(byte[] input, String code, JTextPane output) {
for (int i = 0; i < code.length(); i++) {
onUpdateTape.actionPerformed(null);
switch (code.charAt(i)) {
case '+': {
tape[tapePointer]++;
break;
}
case '-': {
tape[tapePointer]--;
break;
}
case '<': {
tapePointer--;
break;
}
case '>': {
tapePointer++;
break;
}
case '[': {
if (tape[tapePointer] == 0) {
int nesting = 0;
while (true) {
++i;
if (code.charAt(i) == '[') {
++nesting;
continue;
} else if (nesting > 0 && code.charAt(i) == ']') {
--nesting;
continue;
} else if (code.charAt(i) == ']' && nesting == 0) {
break;
}
}
}
break;
}
case ']': {
if (tape[tapePointer] != 0) {
int nesting = 0;
while (true) {
--i;
if (code.charAt(i) == ']') {
++nesting;
continue;
} else if (nesting > 0 && code.charAt(i) == '[') {
--nesting;
continue;
} else if (code.charAt(i) == '[' && nesting == 0) {
break;
}
}
}
break;
}
case '.': {
output.setText(output.getText() + (char) (tape[tapePointer]));
break;
}
case ',': {
tape[tapePointer] = input[inputCounter];
inputCounter++;
break;
}
}
}
}
private void cleanBF(byte[] input, String code, JTextPane output) {
for (int i = 0; i < code.length(); i++) {
onUpdateTape.actionPerformed(null);
switch (code.charAt(i)) {
case '+':{
tape[tapePointer]++;
break;
}
case '-':{
tape[tapePointer]--;
break;
}
case '<':{
tapePointer--;
break;
}
case '>':{
tapePointer++;
break;
}
case '[': {
if (tape[tapePointer] == 0) {
int nesting = 0;
while (true) {
++i;
if (code.charAt(i) == '[') {
++nesting;
continue;
} else if (nesting > 0 && code.charAt(i) == ']') {
--nesting;
continue;
} else if (code.charAt(i) == ']' && nesting == 0) {
break;
}
}
}
break;
}
case ']': {
if (tape[tapePointer] != 0) {
int nesting = 0;
while (true) {
--i;
if (code.charAt(i) == ']') {
++nesting;
continue;
} else if (nesting > 0 && code.charAt(i) == '[') {
--nesting;
continue;
} else if (code.charAt(i) == '[' && nesting == 0) {
break;
}
}
}
break;
}
case '.':{
output.setText(output.getText()+(char)(tape[tapePointer]));
break;
}
case ',':{
tape[tapePointer] = input[inputCounter];
inputCounter++;
break;
}
}
}
}
public int[] getTape() {
return tape;
}
public void setTape(int[] tape) {
this.tape = tape;
}
public void editTapeValue(int counter, int value) {
this.tape[counter] = value;
}
}
This should work. You need to modify it somewhat.
That is actually standard example how brainfuck interpreter works. I modified it to use in my app, brackets are handled there:
case '[': {
if (tape[tapePointer] == 0) {
int nesting = 0;
while (true) {
++i;
if (code.charAt(i) == '[') {
++nesting;
continue;
}
else if (nesting > 0 && code.charAt(i) == ']') {
--nesting;
continue;
}
else if (code.charAt(i) == ']' && nesting == 0) {
break;
}
}
}
break;
}
case ']': {
if (tape[tapePointer] != 0) {
int nesting = 0;
while (true) {
--i;
if (code.charAt(i) == ']') {
++nesting;
continue;
}
else if (nesting > 0 && code.charAt(i) == '[') {
--nesting;
continue;
}
else if (code.charAt(i) == '[' && nesting == 0) {
break;
}
}
}
break;
}
It looks like this question has become a "post your bf interpreter" poll.
So here's mine that I just got working:
#include <assert.h>
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#include <sys/mman.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <fcntl.h>
#include <unistd.h>
void error(char *msg) {
fprintf(stderr, "Error: %s\n", msg);
}
enum { MEMSIZE = 30000 };
char *mem;
char *ptr;
char *prog;
size_t progsize;
int init(char *progname) {
int f,r;
struct stat fs;
ptr = mem = calloc(MEMSIZE, 1);
f = open(progname, O_RDONLY);
assert(f != -1);
r = fstat(f, &fs);
assert(r == 0);
prog = mmap(NULL, progsize = fs.st_size, PROT_READ, MAP_PRIVATE, f, 0);
assert(prog != NULL);
return 0;
}
int findmatch(int ip, char src){
char *p="[]";
int dir[]= { 1, -1 };
int i;
int defer;
i = strchr(p,src)-p;
ip+=dir[i];
for (defer=dir[i]; defer!=0; ip+=dir[i]) {
if (ip<0||ip>=progsize) error("mismatch");
char *q = strchr(p,prog[ip]);
if (q) {
int j = q-p;
defer+=dir[j];
}
}
return ip;
}
int run() {
int ip;
for(ip = 0; ip>=0 && ip<progsize; ip++)
switch(prog[ip]){
case '>': ++ptr; break;
case '<': --ptr; break;
case '+': ++*ptr; break;
case '-': --*ptr; break;
case '.': putchar(*ptr); break;
case ',': *ptr=getchar(); break;
case '[': /*while(*ptr){*/
if (!*ptr) ip=findmatch(ip,'[');
break;
case ']': /*}*/
if (*ptr) ip=findmatch(ip,']');
break;
}
return 0;
}
int cleanup() {
free(mem);
ptr = NULL;
return 0;
}
int main(int argc, char *argv[]) {
init(argc > 1? argv[1]: NULL);
run();
cleanup();
return 0;
}

Resources