how can I free this malloc from function in main?
void insert(int data){
struct node* temp = (struct node*)malloc(sizeof(struct node));
temp->data = data;
temp->next = head;
head = temp;
}
here is the main function, I did try to free the head but still the memory still increasing every time add some inputs
int main(){
head = NULL;
int n, data;
srand(time(NULL));
again:
printf("how many numbers to be entered in linked list? ");
scanf("%d", &n);
for(int i=0; i<n; i++){
data = rand() % 100;
insert(data);
print();
}
goto again;
free(head);
}
Related
#include<stdio.h>
#include<stdlib.h>
struct node{
char data;
struct node*next;
};
struct node* top =NULL;
void push(char x){
struct node* temp =(struct node*)malloc(sizeof(struct node));
if(temp==NULL){
printf("Stacks Overflow\n");
return;
}
else{
temp->data = x;
temp->next = top;
top = temp;
}
}
char pop(){
if(top==NULL){
printf("Stacks Underflow\n");
return -1;
}
else{
char x;
struct node* p = top;
top = top->next;
x = p->data;
free(p);
return x;
}
}
char peek(int pos){
struct node*p = top;
int i;
for(i=0;p!=NULL && i<pos-1;i++)
p = p->next;
return (p!=NULL)?p->data:-1;
}
int main(){
char n, x;
printf("Enter the Number of Character to be pushed\n");
scanf("%d", &n);
for(char i=0;i<n;i++){
printf("Enter the Character\n");
x = getchar();
push(x);
}
while(top){
printf("%d,", pop());
}
}
why this code not work with char(n, x) but work fine with int(x, n).
I tried to implement char stack using linked list.
It works fine with integer but doesnot work with character.
When I use to insert char, one input get skipped but this doesnot happen in int .. Why?
I've produced this kernel
struct csrFormat {
int M, N;
int *IRP;
int *JA;
double *AS;
};
struct vector {
int dim;
double *val;
};
double csrSIMDReduction(struct csrFormat *csr, struct vector *vec, struct vector *res, int nz){
int M = csr->M;
int nonzerosPerRow = nz/M;
int chunk_size = 10000/nonzerosPerRow;
double *val = res->val;
struct timeval start,end;
gettimeofday(&start, NULL);
//IRP is the row_pointer array
//JA is the column indices array
//AS is the values array
int i,j,tmp;
#pragma omp parallel
{
#pragma omp for private(i, j, tmp) schedule(dynamic, chunk_size)
for (i=0; i<M; i++)
{
double result = 0.0;
#pragma omp simd reduction(+ : result)
for (j = csr->IRP[i]; j < csr->IRP[i+1]; j++)
{
tmp = csr->JA[j];
result += csr->AS[j] * vec->val[tmp];
}
val[i] = result;
}
}
gettimeofday(&end, NULL);
res->dim = M;
//printf("%ld.%06ld\n", start.tv_sec, start.tv_usec);
//printf("%ld.%06ld\n", end.tv_sec, end.tv_usec);
long t = (end.tv_sec - start.tv_sec)*1000000.0 + end.tv_usec - start.tv_usec;
return (double) t;
}
but i'm having a doubt: how does the vectorization works?
I mean, from what i've understood, the inner cicle is runned running multiple iterations together, but how can a single thread do this?
EDIT: code updated.
I get _CrtIsValidHeapPointer(pUserData) error when running the code above.
Sometimes the code works perfectly, and sometimes this message appears. So I guess the problem is related to the memory allocation. But I've gone through the code many times and the numbers make sence to me (and also when debugging).
I noticed it happens in line "free(str_temp)" at the debugging.
The relevant code is here:
int main(){
int n;
int len;
char *str;
char command[3];
printf("Enter your string:\n");
scanf("%d", &n);
str = malloc(n+1);
scanf("%s", str);
while (1){
printf(">");
scanf("%s", command);
if (compare(command, "ml")) {
int k;
scanf("%d", &k);
multiply(str, n, k);
printf("Current string is %s\n", str);
n = ln(str);
continue;
}
free(str);
return 0;
}
void multiply(char *str, int n, int k) {
char *str_temp = malloc(n+1);
int i;
int j;
int q;
for (i = 0; i < n; i++){
str_temp[i] = str[i];
}
str_temp[n] = '\0';
free(str);
*str = malloc(n*k+1);
for (i = 0; i < k; i++){
for (j = 0; j < n; j++){
str[i*n + j] = str_temp[j];
}
}
str[n*k] = '\0';
free(str_temp);
}
Try to use message defination
void multiply(char **str, int n, int k)//Use **str(double pointer) instead of *str.
And call it like
multiply(&str, n, k);
I am runnig the follwoing code using shared memory:
__global__ void computeAddShared(int *in , int *out, int sizeInput){
//not made parameters gidata and godata to emphasize that parameters get copy of address and are different from pointers in host code
extern __shared__ float temp[];
int tid = blockIdx.x * blockDim.x + threadIdx.x;
int ltid = threadIdx.x;
temp[ltid] = 0;
while(tid < sizeInput){
temp[ltid] += in[tid];
tid+=gridDim.x * blockDim.x; // to handle array of any size
}
__syncthreads();
int offset = 1;
while(offset < blockDim.x){
if(ltid % (offset * 2) == 0){
temp[ltid] = temp[ltid] + temp[ltid + offset];
}
__syncthreads();
offset*=2;
}
if(ltid == 0){
out[blockIdx.x] = temp[0];
}
}
int main(){
int size = 16; // size of present input array. Changes after every loop iteration
int cidata[] = {1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16};
/*FILE *f;
f = fopen("invertedList.txt" , "w");
a[0] = 1 + (rand() % 8);
fprintf(f, "%d,",a[0]);
for( int i = 1 ; i< N; i++){
a[i] = a[i-1] + (rand() % 8) + 1;
fprintf(f, "%d,",a[i]);
}
fclose(f);*/
int* gidata;
int* godata;
cudaMalloc((void**)&gidata, size* sizeof(int));
cudaMemcpy(gidata,cidata, size * sizeof(int), cudaMemcpyHostToDevice);
int TPB = 4;
int blocks = 10; //to get things kicked off
cudaEvent_t start, stop;
cudaEventCreate(&start);
cudaEventCreate(&stop);
cudaEventRecord(start, 0);
while(blocks != 1 ){
if(size < TPB){
TPB = size; // size is 2^sth
}
blocks = (size+ TPB -1 ) / TPB;
cudaMalloc((void**)&godata, blocks * sizeof(int));
computeAddShared<<<blocks, TPB,TPB>>>(gidata, godata,size);
cudaFree(gidata);
gidata = godata;
size = blocks;
}
//printf("The error by cuda is %s",cudaGetErrorString(cudaGetLastError()));
cudaEventRecord(stop, 0);
cudaEventSynchronize(stop);
float elapsedTime;
cudaEventElapsedTime(&elapsedTime , start, stop);
printf("time is %f ms", elapsedTime);
int *output = (int*)malloc(sizeof(int));
cudaMemcpy(output, gidata, sizeof(int), cudaMemcpyDeviceToHost);
//Cant free either earlier as both point to same location
cudaError_t chk = cudaFree(godata);
if(chk!=0){
printf("First chk also printed error. Maybe error in my logic\n");
}
printf("The error by threadsyn is %s", cudaGetErrorString(cudaGetLastError()));
printf("The sum of the array is %d\n", output[0]);
getchar();
return 0;
}
Clearly, the first while loop in computeAddShared is causing out of bounds error because I am allocating 4 bytes to shared memory. Why does cudamemcheck not catch this. Below is the output of cuda-memcheck
========= CUDA-MEMCHECK
time is 12.334816 msThe error by threadsyn is no errorThe sum of the array is 13
6
========= ERROR SUMMARY: 0 errors
Shared memory allocation granularity. The Hardware undoubtedly has a page size for allocations (probably the same as the L1 cache line side). With only 4 threads per block, there will "accidentally" be enough shared memory in a single page to let you code work. If you used a sensible number of threads block (ie. a round multiple of the warp size) the error would be detected because there would not be enough allocated memory.
My program is supposed to fork three processes. Each of these processes will create three threads and fork two additional processes. These two additional processes will create three threads.
Here is my code. I've tried to keep things simple with nested loops. I think at some point I might be forking more processes or creating more threads.
#include <pthread.h>
#include <stdio.h>
#include <stdlib.h>
void *printme(void* Array){
int *Arr = (int *) Array;
int len = sizeof(Arr) / sizeof(int);
if (len == 1){
printf("I'm thread %d.%d",Arr[0],Arr[1]);
}
else if (len == 2){
printf("I'm thread %d.%d.%d",Arr[0],Arr[1],Arr[2]);
}
printf("\n");
pthread_exit(NULL);
}
int main(void){
int i, j, k, l;
int threadLevel1[2];
int threadLevel2[3];
printf("\n");
for (i = 1 ; i < 4 ; i++){ // Loop to fork the three main processes.
if (fork() != 0){
sleep(4);
}
else{
//The newly forked process will create three threads and fork two additional processes.
for (j = 1 ; j < 4 ; j++){
pthread_t t;
threadLevel1[0] = i;
threadLevel1[1] = j;
if (pthread_create(&t, NULL, printme, (void*) threadLevel1) != 0){
perror("pthread_create");
exit(1);
}
}
for (k = 1; k < 3 ; k++){
pid_t a = fork();
if (a != 0){
sleep(2);
}
else if (a == -1){
perror("fork"); /* display error message */
exit(0);
}
else{
for (l = 1 ; l < 4 ; l++){
pthread_t t;
threadLevel2[0] = i;
threadLevel2[1] = k;
threadLevel2[2] = l;
if (pthread_create(&t, NULL, printme, (void*) threadLevel2)!=0) {
perror("pthread_create");
exit(1);
}
}
}
}
}
}
return 0;
}
You have a problem in your code here:
void *printme(void* Array){
int *Arr = (int *) Array;
int len = sizeof(Arr) / sizeof(int);
The value len will always be the same no matter what is passed in to printme. That's because C passes arrays as pointers, not as objects with embedded lengths.