mpi & number of processor - visual-studio-2019

hello i'm using windows and i use both codeblock & visual studio 2019 to programm using mpi.
but after i use this code
int main(int argc, char *argv[])
{
int rank, size;
MPI_Init(&argc, &argv);
MPI_Comm_rank(MPI_COMM_WORLD, &rank);
MPI_Comm_size(MPI_COMM_WORLD, &size);
printf("Hello world! I am %d of %d\n", rank, size);
MPI_Finalize();
return 0;
}
it return me 1 processor while if use echo %NUMBER_OF_PROCESSORS%
on normal cmd it return me 4 .
i was using MPI_SEND & MPI_Recv and i had a A FATAL ERROR AFTER USING THIS CODE
#include <stdio.h>
#include "mpi.h"
int main(int argc, char* argv[])
{
int menum, nproc;
int n, tag, num;
MPI_Status info;
MPI_Init(&argc, &argv);
MPI_Comm_rank(MPI_COMM_WORLD, &menum);
MPI_Comm_size(MPI_COMM_WORLD, &nproc);
printf("menum:%d nproc :%d", menum, nproc);
if (menum == 0) {
printf("\ninsert number: ");
scanf_s(" %d", &n);
tag = 10;
MPI_Send(&n, 1, MPI_INT, 1, tag, MPI_COMM_WORLD);
}
else {
tag = 10;
MPI_Recv(&n, 1, MPI_INT, 0, tag, MPI_COMM_WORLD, &info);
}
MPI_Get_count(&info, MPI_INT, &num);
MPI_Finalize();
return 0;
}
i searched online and as i could understand it's like mpi cant find another processor to send the value.
how can i make it??
is there a way i could use mpi with all my processor with visual studio or codeblock?
job aborted:
[ranks] message
[0] fatal error
Fatal error in MPI_Send: Invalid rank, error stack:
MPI_Send(buf=0x0056F9F0, count=1, MPI_INT, dest=1, tag=10, MPI_COMM_WORLD) failed
Invalid rank has value 1 but must be nonnegative and less than 1

If you write an MPI program and just execute it directly you get what's called singleton init -- The MPI library will start itself up and you get one process in MPI_COMM_WORLD.
To launch a parallel MPI program on Windows, you'll need to consult your MPI implementation. For MS-MPI, you will use the HPC Job Manager (https://learn.microsoft.com/en-us/powershell/high-performance-computing/overview?view=hpc19-ps) .
For Intel-MPI you go through a similar process constructing a host file and registering your windows credentials with each machine (https://software.intel.com/content/www/us/en/develop/documentation/mpi-developer-guide-windows/top.htm)
I don't have any experience with MPI on Windows machines so this is as much help as I can give you. Hope it gets you pointed in the right direction.

Related

is it safe to have two lua thread run parallel on the same lua state without concurrent execution?

we are developing game server using lua.
the server is single threaded, we'll call lua from c++.
every c++ service will create a lua thread from a global lua state which is shared by all service.
the lua script executed by lua thread will call a c api which will make a rpc call to remote server.
then the lua thread is suspened, because it's c function never return.
when the rpc response get back, we'll continue the c code ,which will return to the lua script.
so, we will have multiple lua thread execute parallel on a same global lua state, but they will never run concurrently. and the suspend is not caused but lua yield function, but from the c side.
is it safe to do something like this?
#include <stdio.h>
#include <stdlib.h>
#include <ucontext.h>
#include "lua/lua.hpp"
static ucontext_t uctx_main, uctx_func1, uctx_func2;
lua_State* gLvm;
int gCallCnt = 0;
static int proc(lua_State *L) {
int iID = atoi(lua_tostring(L, -1));
printf("begin proc, %s\n", lua_tostring(L, -1));
if(iID == 1)
{
swapcontext(&uctx_func1, &uctx_main);
}
else
{
swapcontext(&uctx_func2, &uctx_main);
}
printf("end proc, %s\n", lua_tostring(L, -1));
return 0;
}
static void func1(void)
{
gCallCnt++;
printf("hello, func1\n");
lua_State*thread = lua_newthread (gLvm);
lua_getglobal(thread, "proc");
char szTmp[20];
sprintf(szTmp, "%d", gCallCnt);
lua_pushstring(thread, szTmp);
int iRet = lua_resume(thread, gLvm, 1);
printf("lua_resume return:%d\n", iRet);
}
static void func2(void)
{
gCallCnt++;
printf("hello, func2\n");
lua_State*thread = lua_newthread (gLvm);
lua_getglobal(thread, "proc");
char szTmp[20];
sprintf(szTmp, "%d", gCallCnt);
lua_pushstring(thread, szTmp);
int iRet = lua_resume(thread, gLvm, 1);
printf("lua_resume return:%d\n", iRet);
}
int main(int argc, char *argv[]){
int iRet = 0;
gLvm = luaL_newstate();
luaL_openlibs(gLvm);
lua_pushcfunction(gLvm, proc);
lua_setglobal(gLvm, "proc");
char func1_stack[16384];
char func2_stack[16384];
getcontext(&uctx_func1);
uctx_func1.uc_stack.ss_sp = func1_stack;
uctx_func1.uc_stack.ss_size = sizeof(func1_stack);
uctx_func1.uc_link = &uctx_main;
makecontext(&uctx_func1, func1, 0);
getcontext(&uctx_func2);
uctx_func2.uc_stack.ss_sp = func2_stack;
uctx_func2.uc_stack.ss_size = sizeof(func2_stack);
uctx_func2.uc_link = &uctx_main;
makecontext(&uctx_func2, func2, 0);
swapcontext(&uctx_main, &uctx_func1);
swapcontext(&uctx_main, &uctx_func2);
swapcontext(&uctx_main, &uctx_func1);
swapcontext(&uctx_main, &uctx_func2);
printf("hello, main\n");
return 0;
}

Memory leak in a Tcl wrapper

I read all I could find about memory management in the Tcl API, but haven't been able to solve my problem so far. I wrote a Tcl extension to access an existing application. It works, except for a serious issue: memory leak.
I tried to reproduce the problem with minimal code, which you can find at the end of the post. The extension defines a new command, recordings, in namespace vtcl. The recordings command creates a list of 10000 elements, each element being a new command. Each command has data attached to it, which is the name of a recording. The name subcommand of each command returns the name of the recording.
I run the following Tcl code with tclsh to reproduce the problem:
load libvtcl.so
for {set ii 0} {$ii < 1000} {incr ii} {
set recs [vtcl::recordings]
foreach r $recs {rename $r ""}
}
The line foreach r $recs {rename $r ""} deletes all the commands at each iteration, which frees the memory of the piece of data attached to each command (I can see that in gdb). I can also see in gdb that the reference count of variable recs goes to 0 at each iteration so that the contents of the list is freed. Nonetheless, I see the memory of the process running tclsh going up at each iteration.
I have no more idea what else I could try. Help will be greatly appreciated.
#include <stdio.h>
#include <string.h>
#include <tcl.h>
static void DecrementRefCount(ClientData cd);
static int ListRecordingsCmd(ClientData cd, Tcl_Interp *interp, int objc,
Tcl_Obj *CONST objv[]);
static int RecordingCmd(ClientData cd, Tcl_Interp *interp, int objc,
Tcl_Obj *CONST objv[]);
static void
DecrementRefCount(ClientData cd)
{
Tcl_Obj *obj = (Tcl_Obj *) cd;
Tcl_DecrRefCount(obj);
return;
}
static int
ListRecordingsCmd(ClientData cd, Tcl_Interp *interp, int objc,
Tcl_Obj *CONST objv[])
{
char name_buf[20];
Tcl_Obj *rec_list = Tcl_NewListObj(0, NULL);
for (int ii = 0; ii < 10000; ii++)
{
static int obj_id = 0;
Tcl_Obj *cmd;
Tcl_Obj *rec_name;
cmd = Tcl_NewStringObj ("rec", -1);
Tcl_AppendObjToObj (cmd, Tcl_NewIntObj (obj_id++));
rec_name = Tcl_NewStringObj ("DM", -1);
snprintf(name_buf, sizeof(name_buf), "%04d", ii);
Tcl_AppendStringsToObj(rec_name, name_buf, (char *) NULL);
Tcl_IncrRefCount(rec_name);
Tcl_CreateObjCommand (interp, Tcl_GetString (cmd), RecordingCmd,
(ClientData) rec_name, DecrementRefCount);
Tcl_ListObjAppendElement (interp, rec_list, cmd);
}
Tcl_SetObjResult (interp, rec_list);
return TCL_OK;
}
static int
RecordingCmd(ClientData cd, Tcl_Interp *interp, int objc, Tcl_Obj *CONST objv[])
{
Tcl_Obj *rec_name = (Tcl_Obj *)cd;
char *subcmd;
subcmd = Tcl_GetString (objv[1]);
if (strcmp (subcmd, "name") == 0)
{
Tcl_SetObjResult (interp, rec_name);
}
else
{
Tcl_Obj *result = Tcl_NewStringObj ("", 0);
Tcl_AppendStringsToObj (result,
"bad command \"",
Tcl_GetString (objv[1]),
"\"",
(char *) NULL);
Tcl_SetObjResult (interp, result);
return TCL_ERROR;
}
return TCL_OK;
}
int
Vtcl_Init(Tcl_Interp *interp)
{
#ifdef USE_TCL_STUBS
if (Tcl_InitStubs(interp, "8.5", 0) == NULL) {
return TCL_ERROR;
}
#endif
if (Tcl_PkgProvide(interp, "vtcl", "0.0.1") != TCL_OK)
return TCL_ERROR;
Tcl_CreateNamespace(interp, "vtcl", (ClientData) NULL,
(Tcl_NamespaceDeleteProc *) NULL);
Tcl_CreateObjCommand(interp, "::vtcl::recordings", ListRecordingsCmd,
(ClientData) NULL, (Tcl_CmdDeleteProc *) NULL);
return TCL_OK;
}
The management of the Tcl_Obj * reference counts looks absolutely correct, but I do wonder whether you're freeing all the other resources associated with a particular instance in your real code. It might also be something else entirely; your code is not the only thing in Tcl that allocates memory! Furthermore, the default memory allocator in Tcl does not actually return memory to the OS, but instead holds onto it until the process ends. Figuring out what is wrong can be tricky.
You can try doing a build of Tcl with the --enable-symbols=mem passed to configure. That makes Tcl build in an extra command, memory, which allows more extensive checking of memory management behaviour (it also does things like ensure that memory is never written to after it is freed). It's not enabled by default because it has a substantial performance hit, but it could well help you track down what's going on. (The memory info subcommand is where to get started.)
You could also try adding -DPURIFY to the CFLAGS when building; it completely disables the Tcl memory allocator (so memory checking tools like — commercial — Purify and — OSS — Electric Fence can get accurate information, instead of getting very confused by Tcl's high-performance thread-aware allocator) and may allow you to figure out what is going on.
I found where the leak is. In function ListRecordingsCmd, I replaced line
Tcl_AppendObjToObj (cmd, Tcl_NewIntObj (obj_id++));
with
Tcl_Obj *obj = Tcl_NewIntObj (obj_id++);
Tcl_AppendObjToObj (cmd, obj);
Tcl_DecrRefCount(obj);
The memory allocated to store the object id was not released. The memory used by the tclsh process is now stable.

Calling MPI functions from multiple threads

I want to implement the following thing using MPI and Pthreads but facing some error:
Each processor will have 2 threads. Each processor's one thread will be sending data to other processors and the other thread will be receiving data from other processors. When I am implementing it, it is giving segmentation fault error with some messages like "current bytes -40, total bytes 0, remote id 5".
Just for testing purpose, when I am using only one thread per processor and that is either sending or receiving data, then the errors do NOT occur.
I found the info "In general, there may be problems if multiple threads make MPI calls. The program may fail or behave unexpectedly. If MPI calls must be made from within a thread, they should be made only by one thread." in the following link: https://computing.llnl.gov/tutorials/pthreads/
I want to use two threads per processor where one thread will use MPI_Send function to send some data and the other thread will receive MPI_Recv function to receive data without using any locking mechanism. Does anyone has any idea how to implement this or how to use multiple threads to call MPI functions without using mutex or locking mechanism?
Here is the code:
int rank, size, msg_num;
// thread function for sending messages
void *Send_Func_For_Thread(void *arg)
{
int send, procnum, x;
send = rank;
for(x=0; x < msg_num; x++)
{
procnum = rand()%size;
if(procnum != rank)
MPI_Send(&send, 1, MPI_INT, procnum, 0, MPI_COMM_WORLD);
}
// sending special message to other processors with tag = 128 to signal the finishing of sending message
for (x = 0; x < size; x++)
{
if(x != rank)
MPI_Send(&send, 1, MPI_INT, x, 128, MPI_COMM_WORLD);
}
pthread_exit((void *)NULL);
}
// thread function for receiving messages
void *Recv_Func_For_Thread(void *arg)
{
MPI_Status status;
int recv, counter = 0;
while(counter != size - 1)
{
MPI_Recv(&recv, 1, MPI_INT, MPI_ANY_SOURCE, MPI_ANY_TAG, MPI_COMM_WORLD, &status);
if(status.MPI_TAG == 128)
counter++;
}
pthread_exit((void *)NULL);
}
int main(int argc, char **argv)
{
void *stat;
pthread_attr_t attr;
pthread_t thread[2];
MPI_Init(&argc, &argv);
MPI_Comm_rank(MPI_COMM_WORLD, &rank); // rank -> rank of this processor
MPI_Comm_size(MPI_COMM_WORLD, &size); // size -> total number of processors
srand((unsigned)time(NULL));
msg_num = atoi(argv[1]);
pthread_attr_init(&attr);
pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_JOINABLE);
// thread 0 will be sending messages
pthread_create(&thread[0], &attr, Send_Func_For_Thread, (void *)0);
// thread 1 will be receiving messages
pthread_create(&thread[1], &attr, Recv_Func_For_Thread, (void *)1);
pthread_attr_destroy(&attr);
pthread_join(thread[0], &stat);
pthread_join(thread[1], &stat);
cout << "Finished : Proc " << rank << "\n";
MPI_Finalize();
pthread_exit((void *)NULL);
return 0;
}
Compile:
========
module load mvapich2/gcc; mpicxx -lpthread -o demo demo.cpp
Run:
====
mpiexec -comm mpich2-pmi demo 10000000
I ran this program with 3 processors and got segmentation fault.
(Since you haven't provided an example, the following is just speculation.)
You must initialize MPI using MPI_Init_thread() instead of MPI_Init(). If I understand your explanation correctly, the "required" argument must have the value MPI_THREAD_MULTIPLE. If MPI_Init_thread() then returns a lower level thread support in the "provided" argument, it means that your MPI implementation doesn't support MPI_THREAD_MULTIPLE; in that case you must do something else. See http://www.mpi-forum.org/docs/mpi-20-html/node165.htm .
It worked with only one line change with MPICH2.
Instead of using MPI_Init, use the following line:
int provided;
MPI_Init_thread(&argc, &argv, MPI_THREAD_MULTIPLE, &provided);
Thanks all of you for your help and prompt replies!

My lua script load .so library, how can I write the host program with Lua 5.2?

I searched and tried for days. The problem is this:
I wrote a script which load a shared library locker.so, it runs well with lua interpretor, but I can not write out the correct host program.
My lua script load_so.lua is very simple:
locker = require("locker")
print(type(locker))
for k, v in pairs(locker) do
print(k, v)
end
My host program is:
int main(int argc, const char *argv[])
{
lua_State * L = luaL_newstate();
luaL_openlibs(L);
if (luaL_dofile(L, "load_so.lua") != 0) {
fprintf(stderr, "luaL_dofile error: %s\n", lua_tostring(L, -1));
lua_pop(L, 1);
}
lua_close(L);
return 0;
}
When I run my host program, error print out:
luaL_dofile error: error loading module 'locker' from file './locker.so':
./locker.so: undefined symbol: lua_pushstring
And the locker.c:
static int elock_get(lua_State * L) {...}
static int elock_set(lua_State * L) {...}
static const struct luaL_Reg lockerlib[] = {
{"get", elock_get},
{"set", elock_set},
{NULL, NULL}
};
int luaopen_locker(lua_State *L)
{
//luaL_newlib(L, lockerlib);
//lua_pushvalue(L, -1);
//lua_setglobal(L, LOCKER_LIBNAME);
//set_info(L);
luaL_newlibtable(L, lockerlib);
luaL_setfuncs(L, lockerlib, 0);
return 1;
}
Most articles, books, questions shows how to do it in Lua 5.1, and yes, the program runs correctly in Lua 5.1. But how can I make it support Lua 5.2, and why?
P.S: I don't want to load the library in my C host program like luaL_requiref(L, "locker", luaopen_locker, 1), because I don't know which .so library will load in Lua script.
Thanks.
In Linux, if you're linking liblua.a statically into your main program, you need to use -Wl,-E when linking to export the Lua API symbols; this is how the standard command line interpreter is built.

Forcing a Lua script to exit

How do you end a long running Lua script?
I have two threads, one runs the main program and the other controls a user supplied Lua script. I need to kill the thread that's running Lua, but first I need the script to exit.
Is there a way to force a script to exit?
I have read that the suggested approach is to return a Lua exception. However, it's not garanteed that the user's script will ever call an api function ( it could be in a tight busy loop). Further, the user could prevent errors from causing his script to exit by using a pcall.
You could use setjmp and longjump, just like the Lua library does internally. That will get you out of pcalls and stuff just fine without need to continuously error, preventing the script from attempting to handle your bogus errors and still getting you out of execution. (I have no idea how well this plays with threads though.)
#include <stdio.h>
#include <setjmp.h>
#include "lua.h"
#include "lualib.h"
#include "lauxlib.h"
jmp_buf place;
void hook(lua_State* L, lua_Debug *ar)
{
static int countdown = 10;
if (countdown > 0)
{
--countdown;
printf("countdown: %d!\n", countdown);
}
else
{
longjmp(place, 1);
}
}
int main(int argc, const char *argv[])
{
lua_State* L = luaL_newstate();
luaL_openlibs(L);
lua_sethook(L, hook, LUA_MASKCOUNT, 100);
if (setjmp(place) == 0)
luaL_dostring(L, "function test() pcall(test) print 'recursing' end pcall(test)");
lua_close(L);
printf("Done!");
return 0;
}
You could set a variable somewhere in your program and call it something like forceQuitLuaScript. Then, you use a hook, described here to run every n instructions. After n instructions, it'll run your hook which just checks if forceQuitLuaScript is set, and if it is do any clean up you need to do and kill the thread.
Edit: Here's a cheap example of how it could work, only this is single threaded. This is just to illustrate how you might handle pcall and such:
#include <stdlib.h>
#include "lauxlib.h"
void hook(lua_State* L, lua_Debug *ar)
{
static int countdown = 10;
if (countdown > 0)
{
--countdown;
printf("countdown: %d!\n", countdown);
}
else
{
// From now on, as soon as a line is executed, error
// keep erroring until you're script reaches the top
lua_sethook(L, hook, LUA_MASKLINE, 0);
luaL_error(L, "");
}
}
int main(int argc, const char *argv[])
{
lua_State* L = luaL_newstate();
luaL_openlibs(L);
lua_sethook(L, hook, LUA_MASKCOUNT, 100);
// Infinitely recurse into pcalls
luaL_dostring(L, "function test() pcall(test) print 'recursing' end pcall(test)");
lua_close(L);
printf("Done!");
return 0;
}
The way to end a script is to raise an error by calling error. However, if the user has called the script via pcall then this error will be caught.
It seems like you could terminate the thread externally (from your main thread) since the lua script is user supplied and you can't signal it to exit.
If that isn't an option, you could try the debug API. You could use lua_sethook to enable you to regain control assuming you have a way to gracefully terminate your thread in the hook.
I haven't found a way to cleanly kill a thread that is executing a long running lua script without relying on some intervention from the script itself. Here are some approaches I have taken in the past:
If the script is long running it is most likely in some loop. The script can check the value of some global variable on each iteration. By setting this variable from outside of the script you can then terminate the thread.
You can start the thread by using lua_resume. The script can then exit by using yield().
You could provide your own implementation of pcall that checks for a specific type of error. The script could then call error() with a custom error type that your version of pcall could watch for:
function()
local there_is_an_error = do_something()
if (there_is_an_error) then
error({code = 900, msg = "Custom error"})
end
end
possibly useless, but in the lua I use (luaplayer or PGELua), I exit with
os.exit()
or
pge.exit()
If you're using coroutines to start the threads, you could maybe use coroutine.yield() to stop it.
You might wanna take look at
https://github.com/amilamad/preemptive-task-scheduler-for-lua
project. its preemptive scheduler for lua.
It uses a lua_yeild function inside the hook. So you can suspend your lua thread. It also uses longjmp inside but its is much safer.
session:destroy();
Use this single line code on that where you are want to destroy lua script.
lua_KFunction cont(lua_State* L);
int my_yield_with_res(lua_State* L, int res) {
cout << " my_yield_with_res \n" << endl;
return lua_yieldk(L, 0, lua_yield(L, res), cont(L));/* int lua_yieldk(lua_State * L, int res, lua_KContext ctx, lua_KFunction k);
Приостанавливает выполнение сопрограммы(поток). Когда функция C вызывает lua_yieldk, работающая
сопрограмма приостанавливает свое выполнение и вызывает lua_resume, которая начинает возврат данной сопрограммы.
Параметр res - это число значений из стека, которые будут переданы в качестве результатов в lua_resume.
Когда сопрограмма снова возобновит выполнение, Lua вызовет заданную функцию продолжения k для продолжения выполнения
приостановленной C функции(смотрите §4.7). */
};
int hookFunc(lua_State* L, lua_Debug* ar) {
cout << " hookFunc \n" << endl;
return my_yield_with_res(L, 0);// хук./
};
lua_KFunction cont(lua_State* L) {// функция продолжения.
cout << " hooh off \n" << endl;
lua_sethook(L, (lua_Hook)hookFunc, LUA_MASKCOUNT, 0);// отключить хук foo.
return 0;
};
struct Func_resume {
Func_resume(lua_State* L, const char* funcrun, unsigned int Args) : m_L(L), m_funcrun(funcrun), m_Args(Args) {}
//имена функций, кол-во агрументов.
private:
void func_block(lua_State* L, const char* functionName, unsigned int Count, unsigned int m_Args) {
lua_sethook(m_L, (lua_Hook)hookFunc, LUA_MASKCOUNT, Count); //вызов функции с заданной паузой.
if (m_Args == 0) {
lua_getglobal(L, functionName);// получить имя функции.
lua_resume(L, L, m_Args);
}
if (m_Args != 0) {
int size = m_Args + 1;
lua_getglobal(L, functionName);
for (int i = 1; i < size; i++) {
lua_pushvalue(L, i);
}
lua_resume(L, L, m_Args);
}
};
public:
void Update(float dt) {
unsigned int Count = dt * 100.0;// Время работы потока.
func_block(m_L, m_funcrun, Count, m_Args);
};
~Func_resume() {}
private:
lua_State* m_L;
const char* m_funcrun; // имя функции.
unsigned int m_Count;// число итерации.
unsigned int m_Args;
};
const char* LUA = R"(
function main(y)
--print(" func main arg, a = ".. a.." y = ".. y)
for i = 1, y do
print(" func main count = ".. i)
end
end
)";
int main(int argc, char* argv[]) {
lua_State* L = luaL_newstate();/*Функция создает новое Lua состояние. */
luaL_openlibs(L);
luaL_dostring(L, LUA);
//..pushlua(L, 12);
pushlua(L, 32);
//do {
Func_resume func_resume(L, "main", 2);
func_resume.Update(1.7);
lua_close(L);
// } while (LUA_OK != lua_status(L)); // Пока поток не завершен.
return 0;
};

Resources