I would like to create a code that will:
create recursive threads without using loops, the threads has to execute certain routine. I am using Pthread_create on ubuntu
#include <pthread.h>
#include <stdio.h>
#define NUM_THREADS 8
void *PrintHello(void *threadid)
{
printf("\n%d: Hello World!\n", threadid);
pthread_exit(NULL);
}
int main (int argc, char *argv[])
{
pthread_t threads[NUM_THREADS];
int rc, t;
for(t=0; t<NUM_THREADS; t++)
{
printf("Creating thread %d\n", t);
rc = pthread_create(&threads[t], NULL, PrintHello, (void *)t);
if (rc)
{
printf("ERROR; return code from pthread_create() is %d\n", rc);
exit(-1);
}
}
pthread_exit(NULL);
}
Yes you can create thread without using for loop , I have modified your code and used function recursion to create pthread..
#include <pthread.h>
#include <stdio.h>
#define NUM_THREADS 8
pthread_t threads[NUM_THREADS];
void *PrintHello(void *threadid)
{
printf("\n%d: Hello World!\n", threadid);
pthread_exit(NULL);
}
void create_thread(int n){
if (n > 0 ){
//Create thread
printf("Creating thread %d\n", ((NUM_THREADS - n) + 1) );
//NUM_THREADS - n to start index from 0
int rc = pthread_create(&threads[NUM_THREADS - n], NULL, PrintHello, (void *)(NUM_THREADS - n));
if (rc)
{
printf("ERROR; return code from pthread_create() is %d\n", rc);
exit(-1);
}
n--;
create_thread(n);
}
return;
}
int main(int argc, char *argv[])
{
int rc, t;
create_thread( NUM_THREADS );
//Wait to finish all thread
for (t = 0; t < NUM_THREADS; t++)
{
pthread_join(threads[t],NULL);
}
pthread_exit(NULL);
return 0;
}
Hope this will help you.
Related
#include <stdlib.h>
#include <stdio.h>
#include <strings.h>
#include <sys/mman.h>
#include <time.h>
#define HUGEPAGE 2048*1024
void *normal_malloc(int len)
{
void *ptr = malloc(len);
bzero(ptr, len);
return ptr;
}
void *trans_malloc(int len)
{
void *ptr = NULL;
int ret = posix_memalign(&ptr, HUGEPAGE, len);
if(ret) perror("posix_memalign");
ret = madvise(ptr, len, MADV_HUGEPAGE);
bzero(ptr, len);
return ptr;
}
void *mmap_malloc(int len)
{
void *ptr = mmap(NULL, len, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS|MAP_HUGETLB, -1,0);
return ptr;
}
int main(int argc, char **argv)
{
char *ptr = NULL;
int len = HUGEPAGE*256;
srand(time(NULL));
switch(argc){
case 1: ptr = normal_malloc(len);break;
case 2: ptr = trans_malloc(len);break;
case 3: ptr = mmap_malloc(len); break;
}
long j = 0;
for(int i=0;i<len;i++){
j += ptr[rand()%len];
}
return 0;
}
I use normal malloc and posix_memalign and mmap to test performance.
My test result is :
malloc cost about 29.7s, posix_memalign cost about 23.5s, and mmap is very near with malloc.
Both posix_memalign and mmap uses hugepages. Why one has obvious improvement, the other not? Do I use mmap in the wrong way?
I don't do bzero for mmap since the man pages says "its contents are initialized to zero".
i am trying to create a ring of mpi processes where each MPI process will launch a pthread and the threads will perform the ring, i used pthread so i can use the MPI processes to do another task. It seems that i can't use MPI_send or MPI_Recv inside a pthread, i have no compilation error but i do have a run time error.
i compile using this command
mpicc -lpthread threaded_ring.c
this is the runtime error
a.out:28372 terminated with signal 11 at PC=2aaaaaae312d SP=2aaab0771860. Backtrace:
/usr/lib64/libpsm_infinipath.so.1(psmi_mpool_get+0xd)[0x2aaaaaae312d]
a.out:28366 terminated with signal 11 at PC=333c00c110 SP=2aaab02d9698. Backtrace:
/lib64/libpthread.so.0(pthread_spin_lock+0x0)[0x333c00c110]
/usr/lib64/libpsm_infinipath.so.1(psmi_amsh_short_request+0x180)[0x2aaaaaad31b0]
/usr/lib64/libpsm_infinipath.so.1(+0xd9f6)[0x2aaaaaad49f6]
/usr/lib64/libpsm_infinipath.so.1(psm_mq_send+0x41)[0x2aaaaaaf5d51]
/usr/local/mpi/mvapich2/intel12/1.8.1/lib/libmpich.so.3(psm_send_pkt+0xb1)[0x2aaaaae0af21]
/usr/local/mpi/mvapich2/intel12/1.8.1/lib/libmpich.so.3(psm_istartmsgv+0x130)[0x2aaaaae0a010]
/usr/local/mpi/mvapich2/intel12/1.8.1/lib/libmpich.so.3(MPIDI_CH3_iStartMsgv+0x6)[0x2aaaaaddf1e6]
/usr/local/mpi/mvapich2/intel12/1.8.1/lib/libmpich.so.3(MPIDI_CH3_EagerContigSend+0x89)[0x2aaaaada6e39]
/usr/local/mpi/mvapich2/intel12/1.8.1/lib/libmpich.so.3(MPID_Send+0x116)[0x2aaaaade3136]
/usr/local/mpi/mvapich2/intel12/1.8.1/lib/libmpich.so.3(MPI_Send+0xf8)[0x2aaaaae2a408]
./a.out[0x4022ba]
/lib64/libpthread.so.0[0x333c0077f1]
/lib64/libc.so.6(clone+0x6d)[0x333bce570d]
a.out:28373 terminated with signal 11 at PC=333bf9d428 SP=2aaab0771838. Backtrace:
a.out:28370 terminated with signal 11 at PC=2aaaaaae312d SP=2aaab0771860. Backtrace:
here is my code
#include <mpi.h>
#include <stdio.h>
#include <stdlib.h>
#include <pthread.h>
#include <time.h>
void *ring_func(void *p)
{
int token=1;
int world_rank;
MPI_Comm_rank(MPI_COMM_WORLD, &world_rank);
int world_size;
MPI_Comm_size(MPI_COMM_WORLD, &world_size);
if (world_rank==0){
MPI_Send(&token, 1, MPI_INT, (world_rank + 1) % world_size, 0,
MPI_COMM_WORLD);
}
if (world_rank != 0) {
MPI_Recv(&token, 1, MPI_INT, world_rank - 1, 0, MPI_COMM_WORLD,
MPI_STATUS_IGNORE);
printf("Process %d received token %d from process %d\n", world_rank, token,
world_rank - 1);
}
pthread_exit(NULL);
}
int main(int argc, char** argv) {
// Initialize the MPI threaded environment
int provided;
MPI_Init_thread(&argc, &argv, MPI_THREAD_MULTIPLE , &provided);
if (provided < MPI_THREAD_MULTIPLE)
{
printf("Error: the MPI library doesn't provide the required thread level\n");
MPI_Abort(MPI_COMM_WORLD, 0);
}
pthread_t ring ;
pthread_create (&ring, NULL, ring_func, NULL) ;
MPI_Barrier(MPI_COMM_WORLD);
MPI_Finalize();
}
Thanks to Hristo lliev i was able to solve the problem, the problem was that the main thread was finsishing before my pthread but when i added pthread_join the main thread waited for the pthread to join before calling MPI_Finalize(). Here is the new code
#include <mpi.h>
#include <stdio.h>
#include <stdlib.h>
#include <pthread.h>
#include <time.h>
void *ring_func(void *p)
{
int token;
// Receive from the lower process and send to the higher process. Take care
// of the special case when you are the first process to prevent deadlock.
int world_rank;
MPI_Comm_rank(MPI_COMM_WORLD, &world_rank);
int world_size;
MPI_Comm_size(MPI_COMM_WORLD, &world_size);
if (world_rank != 0) {
MPI_Recv(&token, 1, MPI_INT, world_rank - 1, 0, MPI_COMM_WORLD,
MPI_STATUS_IGNORE);
printf("Process %d received token %d from process %d\n", world_rank, token,
world_rank - 1);
} else {
// Set the token's value if you are process
token = -1;
}
MPI_Send(&token, 1, MPI_INT, (world_rank + 1) % world_size, 0,
MPI_COMM_WORLD);
if (world_rank == 0) {
// sleep(20);
MPI_Recv(&token, 1, MPI_INT, world_size - 1, 0, MPI_COMM_WORLD,
MPI_STATUS_IGNORE);
printf("Process %d received token %d from process %d\n", world_rank, token,
world_size - 1);
}
pthread_exit(NULL);
}
int main(int argc, char** argv) {
// Initialize the MPI threaded environment
int provided;
MPI_Init_thread(&argc, &argv, MPI_THREAD_MULTIPLE , &provided);
if (provided != MPI_THREAD_MULTIPLE)
{
printf("Error: the MPI library doesn't provide the required thread level\n");
MPI_Abort(MPI_COMM_WORLD, 0);
}
pthread_t ring ;
pthread_create (&ring, NULL, ring_func, NULL) ;
pthread_join(ring,NULL);
MPI_Barrier(MPI_COMM_WORLD);
MPI_Finalize();
}
My speedup-example.cpp source code is shown below
#include <pthread.h>
#include <stdio.h>
#include <stdlib.h>
#include <errno.h>
#include <assert.h>
#include "tern/user.h"
#define N 8
#define M 10000
int nwait = 0;
int nexit = 0;
volatile long long sum;
long loops = 6e3;
pthread_mutex_t mutex;
pthread_cond_t cond;
pthread_barrier_t bar;
void set_affinity(int core_id) {
cpu_set_t cpuset;
CPU_ZERO(&cpuset);
CPU_SET(core_id, &cpuset);
assert(pthread_setaffinity_np(pthread_self(), sizeof(cpu_set_t), &cpuset) ==0);
}
void* thread_func(void *arg) {
set_affinity((int)(long)arg);
for (int j = 0; j < M; j++) {
pthread_mutex_lock(&mutex);
nwait++;
for (long i = 0; i < loops; i++) // This is the key of speedup for parrot: the mutex needs to be a little bit congested.
sum += i;
pthread_cond_wait(&cond, &mutex);
pthread_mutex_unlock(&mutex);
soba_wait(0);
pthread_barrier_wait(&bar);
for (long i = 0; i < loops; i++)
sum += i*i*i*i*i*i;
//fprintf(stderr, "compute thread %u %d\n", (unsigned)thread, sched_getcpu());
}
}
int main(int argc, char *argv[]) {
set_affinity(23);
soba_init(0, N, 20);
pthread_t th[N];
int ret;
pthread_cond_init(&cond, NULL);
pthread_barrier_init(&bar, NULL, N);
for(unsigned i=0; i<N; ++i) {
ret = pthread_create(&th[i], NULL, thread_func, (void*)i);
assert(!ret && "pthread_create() failed!");
}
for (int j = 0; j < M; j++) {
while (nwait < N) {
sched_yield();
}
pthread_mutex_lock(&mutex);
nwait = 0;
//fprintf(stderr, "broadcast %u %d\n", (unsigned)pthread_self(), sched_getcpu());
pthread_cond_broadcast(&cond);
pthread_mutex_unlock(&mutex);
}
for(unsigned i=0; i<N; ++i)
pthread_join(th[i], NULL);
exit(0);
}
I already succeeded wrote the mk of speedup-example.cpp
gcc speedup-example.cpp -o speedup-example -O2 -g \-I$XTERN_ROOT/include -L$XTERN_ROOT/dync_hook -Wl,--rpath,$XTERN_ROOT/dync_hook -lxtern-annot \-lpthread
But when I want to run it, problems occur.
For example
$ time ./speedup-example
It informs me that
speedup-example.cpp:23: void set_affinity(int): Assertion `pthread_setaffinity_np(pthread_self(), sizeof(cpu_set_t), &cpuset) ==0' failed.
Can someone help me solve this problem? Many thanks.
David Shwartz helped me alot and now it kinda works...
do you have any idea for more elgant way to parse the input, if the input consists more than 2 numbers to add which need to be processed by the child ? I want the child to get only two integers so that's why I created the shared memory so the father will send the child the result(shared memory) + another integer.
Thank you all.
#include <sys/types.h>
#include <sys/ipc.h>
#include <sys/shm.h>
#include <string.h>
#include <signal.h>
#include <stdio.h>
#include <stdlib.h>
volatile int *shared=0;
int shmid;
int main()
{
char line[256];
int readByte;
int fd[2]; //pipe to son, who processes addition
int pid;
shmid=shmget ( IPC_PRIVATE, sizeof(int) , 0600 );
shared=shmat ( shmid, 0 , 0);
if ( pipe(fd) )
{
perror("pipe");
exit(-1);
}
pid=fork();
if (pid!=0) // father
{
close (fd[0]);
readByte=read(0, line, 256);
line[readByte-1]='\0';
printf("%d",readByte);
int arr[2];
int i=0;
int j=0;
int flag=0;
char num[10];
while (i<readByte)
{
if (line[i]=='+' )
{
i++;
j=0;
flag=1;
}
while (line[i]!='+' && line[i]!='\0')
{
num[j]=line[i];
i++;
j++;
}
num[j]='\0';
if (flag==0)
arr[0]=atoi(num);
else
{
arr[1]=atoi(num);
i++;
}
}
printf("first %d\n",arr[0]);
printf("sec %d\n",arr[1]);
write(fd[1], &arr, sizeof(arr));
wait(NULL);
printf ( "%d\n" , *shared );
}
else
// son
{
int arr[2];
int sum;
readByte = read(fd[0], &arr, sizeof(arr));
printf("son printing: %d\n",arr[0]);
printf("son printing: %d\n",arr[1]);
sum =arr[0]+arr[1];
*shared=sum;
close (fd[0]);
shmdt ( (const void *) shared );
}
shmdt ( (const void *) shared );
shmctl ( shmid , IPC_RMID , 0 );
close(fd[1]);
return 0;
}
You throw away the return value of shmat. And you expect shared to be shared, but it's just a regular variable. Also, you need to prevent the compiler from optimizing away accesses to the shared memory. Here it is with all the fatal bugs fixed:
#include <sys/types.h>
#include <sys/ipc.h>
#include <sys/shm.h>
#include <string.h>
#include <signal.h>
#include <stdio.h>
#include <stdlib.h>
volatile int *shared;
int shmid;
int main()
{
int s,i;
shmid=shmget ( IPC_PRIVATE, sizeof(int), 0600 );
shared=shmat ( shmid, 0 , 0);
*shared=100;
printf ( "%d\n" , *shared);
if ( fork()==0 ) // son
{
*shared=1000;
shmdt ( (const void *) shared );
}
else // father
{
wait ( &s );
printf ( "%d\n" , *shared);
shmdt ( (const void *) shared );
shmctl ( shmid , IPC_RMID , 0 );
}
return 0;
}
According to https://github.com/signal11/hidapi/issues/72 HIDAPI ought to be thread safe on Linux machines. However, I can't get it working at all. This is what I do:
#ifdef WIN32
#include <windows.h>
#endif
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <pthread.h>
#include <stdlib.h>
#include <assert.h>
#include "hidapi.h"
hid_device *handle;
static void *TaskCode(void *argument)
{
int res;
//hid_device *handle;
unsigned char buf[64];
// res = hid_init();
// if( res == -1 )
// {
// return (void*)1;
// }
//
// handle = hid_open(0x0911, 0x251c, NULL);
// if( handle == NULL )
// {
// return (void*)2;
// }
printf( "while 2\n");
while( 1 )
{
memset( buf, 64, 0 );
res = hid_read(handle, buf, 0);
if( res == -1 )
{
return (void*)3;
}
printf( "received %d bytes\n", res);
for (int i = 0; i < res; i++)
printf("Byte %d: %02x ", i+1, buf[i]);
//printf( "%02x ", buf[0]);
fflush(stdout);
}
return (void*)0;
}
int main(int argc, char* argv[])
{
int res;
//hid_device *handle;
unsigned char buf[65];
res = hid_init();
if( res == -1 )
{
return 1;
}
handle = hid_open(0x0911, 0x251c, NULL);
if( handle == NULL )
{
return 2;
}
hid_set_nonblocking( handle, 0 );
pthread_t thread;
int rc = pthread_create(&thread, NULL, TaskCode, NULL);
printf( "while 1\n");
while(1)
{
int a = getchar();
if( a == 'a')
{
// Get Device Type (cmd 0x82). The first byte is the report number (0x0).
buf[0] = 0x0;
buf[1] = 0x82;
res = hid_write(handle, buf, 65);
if( res != -1 )
printf( "write ok, transferred %d bytes\n", res );
else
{
printf( "write error\n" );
char* str = hid_error(handle);
printf( "error: %s\n", str );
return 1;
}
}
else if( a== 'b')
break;
}
void* trc;
rc = pthread_join(thread, &trc);
printf( "rc code: %d\n", (int)trc );
// Finalize the hidapi library
res = hid_exit();
return 0;
}
If I don't use the global handle, I get 'write error' every time. If I do, as in the example, formally everything works but hid_read always returns 0 bytes... Of course, if I do simple hid_write() followed by hid_read(), I'll get the correct reply to the command 0x82 as intended. I'm really lost here, am I overlooking something?
EDIT: to clarify, zero bytes return also for everything, incl. buttons on mouse etc. So it seems to work but the data buffer is always zero bytes.
Shame on me, a dumb mistake. The code should be:
memset( buf, 0, 64 );
res = hid_read(handle, buf, 64);
and then it works. Should sleep more and write less!