Line break support in parser (TCL) - parsing

So, I have a parser, written in TCL. There are many commands in the parsing file. Now, I need to add support for line breaks.
For ex.
my_command \
arg1 \
arg2 \
arg3
I have something like this.
while { ! [eof $currentFileDescriptor] } {
set line [gets $currentFileDescriptor]
set lst [lindex [regexp -all -inline {^(\s*(\S*)\s*)*(\{(.*)\})?(\s*(\S*)\s*)*$} $line] 0]
set tok [string toupper [lindex $lst 0]]
switch -glob $tok {
"\#*" { }
"MY_COMMAND_1" { parseMyCommand1 $handler $lst }
.....#other commands }
}
incr lnum
}
I am looking for an optimal and effective solution.

It looks like you have defined a domain specific language (DSL) with the parsing implemented in Tcl. You may as well use the Tcl parsing itself to deal with things like line continuation and quote handling. The method to do this is to create a safe interpreter and in the safe interpreter only provide the commands required for your DSL. You then interpret your config file in the safe child interpreter. The wiki page has some examples.
The advantage of this method is that the parsing is handled by the normal Tcl parser. However you can be in complete control of what commands are exposed in the safe interpreter. You can also control the amount of resources it can use (stack and memory) and limit it's visibility of the filesystem or network.
If you don't want to get into this then you just need to implement recognition of backslashed newlines and buffer such lines until you have a complete line. Something like the following (untested):
set linenum 0
set buffer ""
while {[gets $input line] != -1} {
incr linenum
if {[regexp {\\$} $line]} {
append buffer [string range $line 0 end-1]
continue
} else {
append buffer $line
}
ParseCompleteLine $linenum $buffer
set buffer ""
}

Related

How evaluate external command to a Nix value?

I want to parse a file to a Nix list value inside flake.nix.
I have a shell script which does that
perl -007 -nE 'say for m{[(]use-package \s* ([a-z-0-9]+) \s* (?!:nodep)}xsgm' init.el
How can I execute external command while evaluating flake.nix?
programs.emacs = {
enable = true;
extraConfig = builtins.readFile ./init.el;
extraPackages = elpa: (shellCommandToParseFile ./init.el); # Runs shell script
};
You can run ./init.el by the same way you perform any other impure step in Nix: With a derivation.
This might look something vaguely like:
programs.emacs = {
enable = true;
extraConfig = ../init.el;
extraPackages = elpa:
let
packageListNix =
pkgs.runCommand "init-packages.nix" { input = ../init.el; } ''
${pkgs.perl}/bin/perl -007 -nE '
BEGIN {
say "{elpa, ...}: with elpa; [";
say "use-package";
};
END { say "]" };
while (m{[(]use-package \s* ([a-z-0-9]+) \s* (;\S+)?}xsgm) {
next if $2 eq ";builtin";
say $1;
}' "$input" >"$out"
'';
in (import "${packageListNix}" { inherit elpa; });
};
...assuming that, given the contents of your ./init.el, the contents of your resulting el-pkgs.nix is actually valid nix source code.
That said, note that like any other derivation (that isn't either fixed-output or explicitly impure), this happens inside a sandbox with no network access. If the goal of init.el is to connect to a network resource, you should be committing its output to your repository. A major design goal of flakes is to remove impurities; they're not suitable for impure derivations.

Passing strings to .wasm module

I've been stuck on this for a while now and I cannot seem to find good resources to my problem. I am coming from and "only C" background, so most of the web dev stuff is completely new for me.
I wrote a C function float editDistance(char *str1, char *str2) that returns the edit distance of 2 char arrays. Right now the goal is to successfully call this function from a JS environment.
After ensuring that the code works with the recommended Emscipten ccall method, I decided to move on. Now
I use Emscripten to compile the C code with flags -O3, -s WASM=1, -s EXPORTED_FUNCTIONS="['_editDistance']", and -s SIDE_MODULE=1 -s to Wasm. The JS code I'm trying to wrap around my WebAssembly is:
// Allocate memory for the wasm module to run in. (65536*256 bit)
let wasmMemory = new WebAssembly.Memory({
initial: 256
});
let info = {
env: {
abort: function() {},
memoryBase: 0,
tableBase: 0,
memory: wasmMemory,
table: new WebAssembly.Table({initial: 2, element: 'anyfunc'}),
}
}
// Define the strings
let str1 = "abcd";
let str2 = "abcd";
// Allocate memory on the wasm partition for the HEAPU8
let HEAPU8 = new Uint8Array(wasmMemory.buffer);
// Create the char arrays on the heap from the strings
let stackPtr = 0;
let str1Ptr = stackPtr;
stackPtr = stringToASCIIArray(str1, HEAPU8, stackPtr);
let str2Ptr = stackPtr;
stackPtr = stringToASCIIArray(str2, HEAPU8, stackPtr);
// Read the wasm file and instantiate it with the above environment setup. Then
// call the exported function with the string pointers.
let wasmBinaryFile = 'bin/edit_distanceW.wasm';
fetch(wasmBinaryFile, {credentials:"same-origin"})
.then((response) => response.arrayBuffer())
.then((binary) => WebAssembly.instantiate(binary,info))
.then((wa) => alert(wa.instance.exports._editDistance(str1Ptr, str2Ptr)));
// Converts a string to an ASCII byte array on the specified memory
function stringToASCIIArray(str, outU8Array, idx){
let length = str.length + 1;
let i;
for(i=0; i<length; i++){
outU8Array[idx+i] = str.charCodeAt(i);
}
outU8Array[idx+i]=0;
return (idx + length);
}
The generated wasm file when converted to wat demands these imports:
(import "env" "abort" (func (;0;) (type 0)))
(import "env" "memoryBase" (global (;0;) i32))
(import "env" "tableBase" (global (;1;) i32))
(import "env" "memory" (memory (;0;) 256))
(import "env" "table" (table (;0;) 2 anyfunc))
.. and exports these:
(export "__post_instantiate" (func 7))
(export "_editDistance" (func 9))
(export "runPostSets" (func 6))
(elem (;0;) (get_global 1) 8 1))
Now, when I test the code the strings are passed to the C module without a problem. A few function calls are even made on them (strLen) before things go south. In the C function there is this nasty nested loop that does the main computation, iterating thru a 2D array while reading the characters from the strings (C code just been ported from a paper with an ugly pseudo code, so pardon me the variable names):
do{
for(p=0; p<editDistance; p++){
// Do stuff
}
// Do more stuff
editDistance++;
} while(fkp[len2*2-len1][editDistance] != len1);
Before the function enters the for() loop, the module still has the strings on memory str1Ptr=0x00 and str2Ptr=0x05 with the correct length and content. On the contrary, immediately after entering the for() loop the memory gets overwritten by garbage (mostly 0s), corrupting the end result. I suspect some stack saving and restoration problems on the scope change, as the exact same code compiled to my PC using gcc works like a charm.
Any idea what setup I'm missing that hinders the correct completion of the C function?
If you are starting out you probably want to use the emscripten-generated JS glue. That is, don't use SIDE_MODULE=1 and instead output to a files calle .js. The emscripten compiler will then generate both a .js and a .wasm file. You can then include the .js file in your project and it will handle all the loading and setup for you.
If you try to load the wasm file yourself, you will need to do a lot of work to replicate the emscripten environment, which will require a lot of internal details of emscripten. Also, those internal details of subject to change when you update to the new version of emscripten so you are creating more work for yourself.

How to stop parsing and reset yacc?

I am writing a job control shell. I use Yacc and Lex for parsing. The top rule in my grammar is pipeline_list, which is a list of pipelines separated by a comma. Thus, examples of pipelinelists are as follows:
cmd1 | cmd2; cmd3; cmd4 | cmd5 <newline>
cmd1 <newline>
<nothing> <newline>
I represent a pipeline with the pipeline rule (showed below). within that rule, I do the following:
1. call execute_pipeline() to execute the pipeline. execute_pipeline() returns -1 if anything went wrong in the execution of the pipeline.
2. Check the return value of execute_pipeline() and if it is -1, then STOP parsing the rest of the input, AND make sure YACC starts fresh when called again in the main function (shown below). The rationale for doing this is this:
Take, for example, the following pipelinelist: cd ..; ls -al. My intent here would be to move one directory up, and then list its content. However, if execution of the first pipeline (i.e., "cd ..") in the pipelinelist fails, then carrying on to execute the second pipeline (i.e. " ls -al") would list of the contents of the current directory (not the parent), which is wrong! For this reason, when parsing a pipelinelist of length n, if executing of some pipeline k > n fails, then I want to discard the rest of the pipelinelist (i.e., pipelines k+1..n), AND make sure the next invocation of yyparse() starts brand new (i.e. recieve new input from readline() -- see code below).
if tried the following, but it does not work:
pipeline:
simple_command_list redirection_list background pipeline_terminator // ignore these
{
if (execute_pipeline() == -1)
{
// do some stuff
// then call YYABORT, YYACCEPT, or YYERROR, but none of them works
}
}
int main()
{
while(1)
{
char *buffer = readline("> ");
if (buffer)
{
struct yy_buffer_state *bp;
bp = yy_scan_string(buffer);
yy_switch_to_buffer(bp);
yyparse()
yy_delete_buffer(bp);
free(buffer);
} // end if
} // end while
return 0;
} // end main()
You can use YYABORT; in an action to abort the current parse and immediately return from yyparse with a failure. You can use YYACCEPT; to immediately return success from yyparse.
Both of these macros can only be used directly in an action in the grammar -- they can't be used in some other function called by the action.

Env not modify when loading module in modulefile

I would like to load a module into a modulefile (to resolve dependencies).
MyModule:
#%Module########################################
##
## Modulefile
#
proc ModulesHelp { } {
puts stderr "Env for MyProg"
}
proc addPath {var val} {
prepend-path $var $val
}
module load MyOtherModule
addPath PATH /opt/MyModule/bin
MyOtherModule:
#%Module########################################
##
## Modulefile
#
proc ModulesHelp { } {
puts stderr "Env for MyOtherProg"
}
proc addPath {var val} {
prepend-path $var $val
}
addPath PATH /opt/MyOtherModule/bin
When I run module load MyModule, both modules seem to be loaded but environment is not right :
$module list
Currently Loaded Modulefiles:
1) MyModule 2) MyOtherModule
$echo $PATH
/opt/MyModule/bin:/usr/bin:/bin
If I add the line foreach p [array names env] { set tmp $env($p) } or at least set tmp $env(PATH) in the MyModule after the module load MyOtherModule line, the environment is correctly modified. It also work fine if I don't use my function addPath but I use the prepend-path command directly, which is a bit annoying because I would like to do more things in the addPath function of course.
Anyone as an idea on what is going on and what I am missing ?
The prepend-path is probably doing some “clever” stuff to manage a variable; what exactly it is is something I don't know and don't need to know, because we can solve it all using generic Tcl. To make your wrapping of it work, use uplevel to evaluate the code in the proper scope, though you need to consider whether to use the global scope (name #0) or the caller's scope (1, which is the default); they're the same when your procedure addPath is called from the global level, but otherwise can be quite different, and I don't know what other oddness is going on with the modules system processing.
To demonstrate, try this addPath:
proc addPath {var val} {
puts stderr "BEFORE..."
uplevel 1 [list prepend-path $var $val]
puts stderr "AFTER..."
}
We use list to construct the thing to evaluate in the caller's scope, as it is guaranteed to generate substitution-free single-command scripts. (And valid lists too.) This is the whole secret to doing code generation in Tcl: keep it simple, use list to do any quoting required, call a helper procedure (with suitable arguments) when things get complicated, and use uplevel to control evaluation scope.
(NB: upvar can also be useful — it binds local variables to variables in another scope — but isn't what you're recommended to use here. I mention it because it's likely to be useful if you do anything more complex…)

Expand tilde to home directory

I have a program that accepts a destination folder where files will be created. My program should be able to handle absolute paths as well as relative paths. My problem is that I don't know how to expand ~ to the home directory.
My function to expand the destination looks like this. If the path given is absolute it does nothing otherwise it joins the relative path with the current working directory.
import "path"
import "os"
// var destination *String is the user input
func expandPath() {
if path.IsAbs(*destination) {
return
}
cwd, err := os.Getwd()
checkError(err)
*destination = path.Join(cwd, *destination)
}
Since path.Join doesn't expand ~ it doesn't work if the user passes something like ~/Downloads as the destination.
How should I solve this in a cross platform way?
Go provides the package os/user, which allows you to get the current user, and for any user, their home directory:
usr, _ := user.Current()
dir := usr.HomeDir
Then, use path/filepath to combine both strings to a valid path:
if path == "~" {
// In case of "~", which won't be caught by the "else if"
path = dir
} else if strings.HasPrefix(path, "~/") {
// Use strings.HasPrefix so we don't match paths like
// "/something/~/something/"
path = filepath.Join(dir, path[2:])
}
(Note that user.Current() is not implemented in the go playground (likely for security reasons), so I can't give an easily runnable example).
In general the ~ is expanded by your shell before it gets to your program. But there are some limitations.
In general is ill-advised to do it manually in Go.
I had the same problem in a program of mine and what I have understood is that if I use the flag format as --flag=~/myfile, it is not expanded. But if you run --flag ~/myfile it is expanded by the shell (the = is missing and the filename appears as a separate "word").
Normally, the ~ is expanded by the shell before your program sees it.
Adjust how your program acquires its arguments from the command line in a way compatible with the shell expansion mechanism.
One of the possible problems is using exec.Command like this:
cmd := exec.Command("some-binary", someArg) // say 'someArg' is "~/foo"
which will not get expanded. You can, for example use instead:
cmd := exec.Command("sh", "-c", fmt.Sprintf("'some-binary %q'", someArg))
which will get the standard ~ expansion from the shell.
EDIT: fixed the 'sh -c' example.
If you are expanding tilde '~' for use with exec.Command() you should use the users local shell for expansion.
// 'sh', 'bash' and 'zsh' all respect the '-c' argument
cmd := exec.Command(os.Getenv("SHELL"), "-c", "cat ~/.myrc")
cmd.Stdout = os.Stdout
if err := cmd.Run(); err != nil {
fmt.Fprintln(os.Stderr, err)
}
However; when loading application config files such as ~./myrc this solution is not acceptable. The following has worked well for me across multiple platforms
import "os/user"
import "path/filepath"
func expand(path string) (string, error) {
if len(path) == 0 || path[0] != '~' {
return path, nil
}
usr, err := user.Current()
if err != nil {
return "", err
}
return filepath.Join(usr.HomeDir, path[1:]), nil
}
NOTE: usr.HomeDir does not respect $HOME instead determines the home directory by reading the /etc/passwd file via the getpwuid_r syscall on (osx/linux). On windows it uses the OpenCurrentProcessToken syscall to determine the users home directory.
I know this is an old question but there is another option now. You can use go-homedir to expand the tidle to the user's homedir:
myPath := "~/.ssh"
fmt.Printf("path: %s; with expansion: %s", myPath, homedir.Expand(myPath))
This works on go >= 1.12:
if strings.HasPrefix(path, "~/") {
home, _ := os.UserHomeDir()
path = filepath.Join(home, path[2:])
}

Resources