Understanding of CUDA's cudaGetSymbolAddress

Understanding of CUDA's cudaGetSymbolAddress - memory

I am new to CUDA, now I'm trying to understand how cudaGetSymbolAddress works. I get an unexpected Segmentation Fault in a really simple code. What I do is the following:
I declare a global device variable(device_int)
In main() I ensure that the definition was correct by setting its value in a kernel
I create a pointer (host_pointer_to_device_int) in host memory and make it point to device_int via cudaGetSymbolAddress
I create one more pointer (host_pointer_to_host_int) and try to cudaMemcpy the value from host_pointer_to_device_int to host_pointer_to_host_int
All these operations finish with no errors, but I get Segmentation Fault when trying to print the host_pointer_to_host_int's value. Here is the code:
#include <iostream>
#include <cassert>
using namespace std;
__device__ int device_int;
__global__ void kernel()
{
device_int = 1000;
}
int main()
{
kernel<<<1, 1>>>();
assert(cudaGetLastError() == cudaSuccess); // The above operation executed successfully
int *host_pointer_to_device_int;
cudaGetSymbolAddress((void **)&host_pointer_to_device_int, device_int);
assert(cudaGetLastError() == cudaSuccess); // The above operation executed successfully
int *host_pointer_to_host_int;
// Copy the device_int's value
cudaMemcpy((void **)&host_pointer_to_host_int, host_pointer_to_device_int,
sizeof(int), cudaMemcpyDeviceToHost);
assert(cudaGetLastError() == cudaSuccess); // The above operation executed successfully
cout << *host_pointer_to_host_int << endl; // Segmentation fault
}

My mistake was not in misunderstanding how cudaGetSymbolAddress works, but in using cudaMemcpy with wrong parameters types: I expected cudaMemcpy to allocate memory for me, so I've cast my variables to the wrong types.
The corrected code is:
#include <iostream>
#include <cassert>
using namespace std;
__device__ int device_int;
__global__ void kernel()
{
device_int = 1000;
}
int main()
{
kernel<<<1, 1>>>();
assert(cudaGetLastError() == cudaSuccess);
int *host_pointer_to_device_int;
/* Get a pointer to device_int. After this, I won't be able to access it,
* but I'm going to copy its value with cudaMemcpy */
cudaGetSymbolAddress((void **)&host_pointer_to_device_int, device_int);
assert(cudaGetLastError() == cudaSuccess); // The above operation executed successfully
int host_int;
// Copy the device_int's value
cudaMemcpy(&host_int, host_pointer_to_device_int,
sizeof(int), cudaMemcpyDeviceToHost);
assert(cudaGetLastError() == cudaSuccess); // The above operation executed successfully
cout << host_int << endl; // Everything's fine!
}
Thanks to #talonmies for helping me in figuring it out.

Related

Why get_env(US_VHOST_DATA) is NULL on first invocation of main() in handlers/main.c

I'm using G-Wan v4.12.31.
Does anybody know why get_env(US_VHOST_DATA) returns NULL on first invocation of main() in handlers/main.c?
int init(int argc, char *argv[])
{
u32 *states = (u32 *)get_env(argv, US_HANDLER_STATES);
*states = (1 << HDL_AFTER_READ);
return 0;
}
int main(int argc, char *argv[])
{
void **vhost = (void **)get_env(argv, US_VHOST_DATA);
printf(%p\n", vhost); //NULL first time, non-NULL all other times
return 255;
}

The get_env(US_VHOST_DATA) call returns the virtual host information.
There is no host information available for G-WAN as long as the client request has not been parsed.
This is why no meaningfull host can be returned before the HDL_AFTER_PARSE handler state and why G-WAN returns NULL.

shared object in pthread

I want to write a sample program in which 16 threads have access to a shared object with huge size like 10gb. I know that I can use pthread_mutex_t to get the lock on the object, but how can I make it efficient so that two or more thread can modify disjoint part of the shared object simultaneously?

Maybe you can create an array of 10 pthread_mutex_t's, one for each 1gb range, and lock the appropriate mutex for the range you'll be modifying?

What about using a sempahore. You can initialize semaphore with number of threads that shares the resources.
/* Includes */
#include <unistd.h> /* Symbolic Constants */
#include <sys/types.h> /* Primitive System Data Types */
#include <errno.h> /* Errors */
#include <stdio.h> /* Input/Output */
#include <stdlib.h> /* General Utilities */
#include <pthread.h> /* POSIX Threads */
#include <string.h> /* String handling */
#include <semaphore.h> /* Semaphore */
void semhandler ( void *ptr );
sem_t mutex;
int cntr=0; /* shared variable */
int main()
{
int arg[2];
pthread_t thread1;
pthread_t thread2;
arg[0] = 0;
arg[1] = 1;
/* initialize mutex to 2 to share resource with two threads*/
/* Seconds Argumnet "0" makes the semaphore local to the process */
sem_init(&mutex, 0, 2);
pthread_create (&thread1, NULL, (void *) &semhandler, (void *) &arg[0]);
pthread_create (&thread2, NULL, (void *) &semhandler, (void *) &arg[1]);
pthread_join(thread1, NULL);
pthread_join(thread2, NULL);
sem_destroy(&mutex);
exit(0);
} /* main() */
void semhandler ( void *ptr )
{
int x;
x = *((int *) ptr);
printf("Thrd %d: Waiting to enter critical region...\n", x);
sem_wait(&mutex); /* down semaphore */
if( x == 1 )
cntr++;
/* START CRITICAL REGION */
printf("Thrd %d: Now in critical region...\n", x);
printf("Thrd %d: New Counter Value: %d\n", x, cntr);
printf("Thrd %d: Exiting critical region...\n", x);
/* END CRITICAL REGION */
sem_post(&mutex); /* up semaphore */
pthread_exit(0); /* exit thread */
}

BCB 6.0 "raised exception class EAccessViolation with message 'Access violation at address'"

I'm newer to C++. I have written some code, but when i run it, there's always this:
raised exception class
EAccessViolation with message 'Access
violation at address'
i don't understand this. Would you like to help me solve it? It's important to me. Really, really thank you!
#include <iostream>
#include <fstream>
#include <string>
#include <vector>
#include <math.h>
#include <conio.h>
#define k 2
#define minoffset 0.5
using namespace std;
struct Point
{
double X;
double Y;
};
vector<Point> dataprocess();
void k_means(vector<Point> points,int N);
double getdistance(Point p1,Point p2)
{ double distance;
distance=sqrt((p1.X-p2.X)*(p1.X-p2.X)+(p1.Y-p2.Y)*(p1.Y-p2.Y));
return distance;
}
int getmindis(Point p,Point means[])
{
int i;
int c;
double dis=getdistance(p,means[0]);
for(i=1;i<k;i++)
{
double term=getdistance(p,means[i]);
if(term<dis)
{
c=i;
dis=term;
}
}
return c;
}
Point getmeans(vector<Point> points)
{
int i;
double sumX,sumY;
Point p;
int M=points.size();
for(i=0;i<M;i++)
{
sumX=points[i].X;
sumY=points[i].Y;
}
p.X=sumX/M;
p.Y=sumY/M;
return p;
}
int main()
{ int N;
vector<Point> stars;
stars=dataprocess();
N=stars.size();
cout<<"the size is:"<<N<<endl;
k_means(stars,N);
getch();
}
vector<Point> dataprocess()
{
int i;
int N;
double x,y;
vector<Point> points;
Point p;
string import_file;
cout<<"input the filename:"<<endl;
cin>>import_file;
ifstream infile(import_file.c_str());
if(!infile)
{
cout<<"read error!"<<endl;
}
else
{
while(infile>>x>>y)
{
p.X=x;
p.Y=y;
points.push_back(p);
}
}
N=points.size();
cout<<"output the file data:"<<endl;
for(i=0;i<N;i++)
{
cout<<"the point"<<i+1<<"is:X="<<points[i].X<<" Y="<<points[i].Y<<endl;
}
return points;
}
void k_means(vector<Point> points,int N)
{
int i;
int j;
int index;
vector<Point> clusters[k];
Point means[k];
Point newmeans[k];
double d,offset=0;
bool flag=1;
cout<<"there will be"<<k<<"clusters,input the original means:"<<endl;
for(i=0;i<k;i++)
{
cout<<"k"<<i+1<<":"<<endl;
cin>>means[i].X>>means[i].Y;
}
while(flag)
{
for(i=0;i<N;i++)
{
index=getmindis(points[i],means);
clusters[index].push_back(points[i]);
}
for(j=0;j<k;j++)
{
newmeans[j]=getmeans(clusters[j]);
offset=getdistance(newmeans[j],means[j]);
}
if(offset>d)
{
d=offset;
}
flag=(minoffset<d)?true:false;
for(i=0;i<k;i++)
{
means[i]=newmeans[i];
clusters[i].clear();
}
}
for(i=0;i<k;i++)
{
cout<<"N"<<i+1<<"="<<clusters[i].size()<<endl;
cout<<"the center of k"<<i+1<<"is:"<<means[i].X<<" "<<means[i].Y<< endl;
}
}

You surely have some algo errors in you code. It is difficult to deal with code without input data, that caused an error, but let's try:
First, lets look at function Point getmeans(vector<Point> points)
it is supposed to evaluate mean coordinates for cluster of points: if you pass an empty cluster to this function it will cause an error:
look here -
int M=points.size()
and here -
for(i=0;i<M;i++)
{
sumX=points[i].X;
sumY=points[i].Y;
}
if your cluster is empty than M will be zero and you loop will iterate 2^31 times (until 32 bit integer overflow) and each time you will try to read values of nonexistent vector items
So, You have to test if you vector is not empty before running main function loop and you have to decide which mean values should be assigned for zero cluster (May be you need an additional flag for empty cluster which will be checked before dealing with cluster's mean values)
Then lets examine function int getmindis(Point p,Point means[]) and, also, a place, where we call it:
index=getmindis(points[i],means); clusters[index].push_back(points[i]);
This function assings points to clusters. cluster number is ruled by c variable. If input point doesn't fit to any cluster, function will return uninitialized variable (holding any possible value) which. then is used as vector index of nonexisting element - possible access violation error
You probably have to initialize c to zero in declaration
Tell us when you will be ready with errors described above and also show us a sample input file (one which causes errors, if all datasets cause errors, show us the smallest one)

how one thread can be killed in another thread

Actually,the main scenerio is that : from main thread there are two thread running.By using conditional variable,two threads will be running and sleeping and then it will return to main thread.I mean I dont want different output pattern.just one pattern:from main->thread1->thread2->main.
I have written a code for C thread.It shows the result I want sometimes and sometimes not.as for example,the output is:
I am in thread 1
before conditional wait
I am in thread 2
before conditional release
i am again in thread 2
i am again in thread 1
main exits here
The problem is sometimes "main exits here" does not execute.Please help me.It is to be noted that I cant use pthread_join().my code is given below
#include <stdio.h>
#include <stdlib.h>
#include <pthread.h>
#include <semaphore.h>
pthread_mutex_t gLock;
pthread_cond_t gCondition;
pthread_mutex_t mLock;
pthread_cond_t mCondition;
void initialize()
{
pthread_mutex_init(&gLock, NULL);
pthread_cond_init (&gCondition, NULL);
pthread_mutex_init(&mLock, NULL);
pthread_cond_init (&mCondition, NULL);
return;
}
void * threadOne(void * msg)
{
printf("%s \n",(char*) msg);
printf("before conditional wait\n");
pthread_mutex_lock(&gLock);
pthread_cond_wait(&gCondition,&gLock);
pthread_mutex_unlock(&gLock);
printf("i am again in thread 1\n");
pthread_mutex_lock(&mLock);
pthread_cond_signal(&mCondition);
pthread_mutex_unlock(&mLock);
}
void * threadTwo(void * msg)
{
printf("%s\n",(char*)msg);
printf("before conditional release\n");
pthread_mutex_lock(&gLock);
pthread_cond_signal(&gCondition);
pthread_mutex_unlock(&gLock);
printf("i am again in thread 2\n");
}
int main()
{
pthread_t thread1;
pthread_t thread2;
char * msg1="I am in thread 1";
char * msg2="I am in thread 2";
initialize();
pthread_create(&thread1,NULL,threadOne,(void*) msg1);
pthread_create(&thread2,NULL,threadTwo,(void*) msg2);
pthread_mutex_lock(&mLock);
pthread_cond_wait(&mCondition,&mLock);
pthread_mutex_unlock(&mLock);
printf("main exits here");
return 0;
}

The problem is that you are using the condition variable incorrectly. A condition variable is just a notification mechanism, not a flag. It has no internal state other than the list of threads currently waiting. Consequently, if main() has not actually executed as far as the pthread_cond_wait() call when the other threads call pthread_cond_signal() then the signal is lost, and main() will wait forever.
You need to use a separate flag associated with the condition variable. main() can then check this flag, and only wait if the flag is not set. Also, it must check this flag in a loop, to ensure that "spurious wakeups" are handled, where pthread_cond_wait() returns without a corresponding signal. The same applies to the notification between threadOne and threadTwo.
#include <stdio.h>
#include <stdlib.h>
#include <pthread.h>
#include <semaphore.h>
pthread_mutex_t gLock;
pthread_cond_t gCondition;
int gFlag=0;
pthread_mutex_t mLock;
pthread_cond_t mCondition;
int mFlag=0;
void initialize()
{
pthread_mutex_init(&gLock, NULL);
pthread_cond_init (&gCondition, NULL);
pthread_mutex_init(&mLock, NULL);
pthread_cond_init (&mCondition, NULL);
}
void * threadOne(void * msg)
{
printf("%s \n",(char*) msg);
printf("before conditional wait\n");
pthread_mutex_lock(&gLock);
while(!gFlag)
{
pthread_cond_wait(&gCondition,&gLock);
}
pthread_mutex_unlock(&gLock);
printf("i am again in thread 1\n");
pthread_mutex_lock(&mLock);
mFlag=1;
pthread_cond_signal(&mCondition);
pthread_mutex_unlock(&mLock);
}
void * threadTwo(void * msg)
{
printf("%s\n",(char*)msg);
printf("before conditional release\n");
pthread_mutex_lock(&gLock);
gFlag=1;
pthread_cond_signal(&gCondition);
pthread_mutex_unlock(&gLock);
printf("i am again in thread 2\n");
}
int main()
{
pthread_t thread1;
pthread_t thread2;
char * msg1="I am in thread 1";
char * msg2="I am in thread 2";
initialize();
pthread_create(&thread1,NULL,threadOne,(void*) msg1);
pthread_create(&thread2,NULL,threadTwo,(void*) msg2);
pthread_mutex_lock(&mLock);
while(!mFlag)
{
pthread_cond_wait(&mCondition,&mLock);
}
pthread_mutex_unlock(&mLock);
printf("main exits here");
return 0;
}

Modifying PC in jmp_buf to go to another function

For a user-lever thread library, I need to figure out jumping to a function by modifying PC value stored in jmp_buf.
This is what I have written:
jmp_buf env;
void print (void) {
printf("\nHello World!");
}
static int ptr_mangle(int p) {
unsigned int ret;
asm(" movl %1, %%eax;\n"
" xorl %%gs:0x18, %%eax;"
" roll $0x9, %%eax;"
" movl %%eax, %0;"
: "=r"(ret)
: "r"(p)
: "%eax"
);
return ret;
}
int main() {
int i = setjmp(env);
env[0].__jmpbuf[5] = ptr_mangle(print);
longjmp(env, 2);
return 0;
}
I am trying to modify PC in jmp_buf by setting it to the address of the function I am trying to jump to.
I am getting a segmentation fault.
I am unable to figure out what exactly needs to be done. Do I need to modify SP as well?
Any help would be very much appreciated.

What are you trying to do? Are you not checking for the return value of setjmp? I don't think you are doing this correctly. Have a look at the sample code below to see what would be the output be:
#include <stdio.h>
#include <setjmp.h>
#include <stdlib.h>
void subroutine(jmp_buf);
int main(void)
{
int value;
jmp_buf jumper;
value = setjmp(jumper);
if (value != 0)
{
printf("Longjmp with value %d\n", value);
exit(value);
}
printf("About to call subroutine ... \n");
subroutine(jumper);
return 0;
}
void subroutine(jmp_buf jumper)
{
longjmp(jumper,1);
}
The output would be:
About to call subroutine...
Longjmp with a value of 1.
Which begs the question - why are you trying to modify the IP? It sounds like you overwrote something or the code 'jumped' off into the woods and trampled something and came back with a hard landing i.e. segfault.
The variable env is specifically a struct, do not use an array subscript as you have done. I suspect that is why you got a segfault...
Hope this helps,
Best regards,
Tom.

Develop Reference

ios ruby-on-rails asp.net-mvc docker delphi jenkins grails google-sheets machine-learning dart

Understanding of CUDA's cudaGetSymbolAddress - memory

Related

Why get_env(US_VHOST_DATA) is NULL on first invocation of main() in handlers/main.c

shared object in pthread

BCB 6.0 "raised exception class EAccessViolation with message 'Access violation at address'"

how one thread can be killed in another thread

Modifying PC in jmp_buf to go to another function

Categories

Resources