hugepages allocated by mmap is slower than posix_memalign - huge-pages

#include <stdlib.h>
#include <stdio.h>
#include <strings.h>
#include <sys/mman.h>
#include <time.h>
#define HUGEPAGE 2048*1024
void *normal_malloc(int len)
{
void *ptr = malloc(len);
bzero(ptr, len);
return ptr;
}
void *trans_malloc(int len)
{
void *ptr = NULL;
int ret = posix_memalign(&ptr, HUGEPAGE, len);
if(ret) perror("posix_memalign");
ret = madvise(ptr, len, MADV_HUGEPAGE);
bzero(ptr, len);
return ptr;
}
void *mmap_malloc(int len)
{
void *ptr = mmap(NULL, len, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS|MAP_HUGETLB, -1,0);
return ptr;
}
int main(int argc, char **argv)
{
char *ptr = NULL;
int len = HUGEPAGE*256;
srand(time(NULL));
switch(argc){
case 1: ptr = normal_malloc(len);break;
case 2: ptr = trans_malloc(len);break;
case 3: ptr = mmap_malloc(len); break;
}
long j = 0;
for(int i=0;i<len;i++){
j += ptr[rand()%len];
}
return 0;
}
I use normal malloc and posix_memalign and mmap to test performance.
My test result is :
malloc cost about 29.7s, posix_memalign cost about 23.5s, and mmap is very near with malloc.
Both posix_memalign and mmap uses hugepages. Why one has obvious improvement, the other not? Do I use mmap in the wrong way?
I don't do bzero for mmap since the man pages says "its contents are initialized to zero".

Related

How to discover physical address corresponding to PCIe device memory?

I'm trying to access a PCIe device memory from a user space program. I open the file: /sys/bus/pci/devices/0000:3b:00.0/resource0 and then I call mmap that will return a virtual address.
When writing at this virtual address (VA) the MMU will translate it to a physical address (PA), the memory controller will convert the write to the PA into a TLP to request a write to the PCIe device. (AFAIU)
How can I get the physical address that is being used? I had a look to /proc//maps and I see that there is an address that coincides with the PCIe bar0 address (0xa0000000).
But this address seems too low, it overlaps with DDR memory.
I also tried this program to convert VA to PA but it doesn't seem to give sensible results for such mapping:
virt2phys$ cat v2p.c
#define _XOPEN_SOURCE 700
#include <fcntl.h> /* open */
#include <stdint.h> /* uint64_t */
#include <stdio.h> /* printf */
#include <stdlib.h> /* size_t */
#include <unistd.h> /* pread, sysconf */
typedef struct {
uint64_t pfn : 55;
unsigned int soft_dirty : 1;
unsigned int file_page : 1;
unsigned int swapped : 1;
unsigned int present : 1;
} PagemapEntry;
/* Parse the pagemap entry for the given virtual address.
*
* #param[out] entry the parsed entry
* #param[in] pagemap_fd file descriptor to an open /proc/pid/pagemap file
* #param[in] vaddr virtual address to get entry for
* #return 0 for success, 1 for failure
*/
int pagemap_get_entry(PagemapEntry *entry, int pagemap_fd, uintptr_t vaddr)
{
size_t nread;
ssize_t ret;
uint64_t data;
uintptr_t vpn;
vpn = vaddr / sysconf(_SC_PAGE_SIZE);
nread = 0;
while (nread < sizeof(data)) {
ret = pread(pagemap_fd, ((uint8_t*)&data) + nread, sizeof(data) - nread,
vpn * sizeof(data) + nread);
nread += ret;
if (ret <= 0) {
return 1;
}
}
entry->pfn = data & (((uint64_t)1 << 55) - 1);
entry->soft_dirty = (data >> 55) & 1;
entry->file_page = (data >> 61) & 1;
entry->swapped = (data >> 62) & 1;
entry->present = (data >> 63) & 1;
return 0;
}
/* Convert the given virtual address to physical using /proc/PID/pagemap.
*
* #param[out] paddr physical address
* #param[in] pid process to convert for
* #param[in] vaddr virtual address to get entry for
* #return 0 for success, 1 for failure
*/
int virt_to_phys_user(uintptr_t *paddr, pid_t pid, uintptr_t vaddr)
{
char pagemap_file[BUFSIZ];
int pagemap_fd;
snprintf(pagemap_file, sizeof(pagemap_file), "/proc/%ju/pagemap", (uintmax_t)pid);
pagemap_fd = open(pagemap_file, O_RDONLY);
if (pagemap_fd < 0) {
return 1;
}
PagemapEntry entry;
if (pagemap_get_entry(&entry, pagemap_fd, vaddr)) {
return 1;
}
close(pagemap_fd);
*paddr = (entry.pfn * sysconf(_SC_PAGE_SIZE)) + (vaddr % sysconf(_SC_PAGE_SIZE));
return 0;
}
int main(int argc, char **argv)
{
pid_t pid;
uintptr_t vaddr, paddr = 0;
if (argc < 3) {
printf("Usage: %s pid vaddr(in hex)\n", argv[0]);
return EXIT_FAILURE;
}
pid = strtoull(argv[1], NULL, 0);
vaddr = strtoull(argv[2], NULL, 16);
if (virt_to_phys_user(&paddr, pid, vaddr)) {
fprintf(stderr, "error: virt_to_phys_user\n");
return EXIT_FAILURE;
};
printf("0x%jx\n", (uintmax_t)paddr);
return EXIT_SUCCESS;
}

How to Create threads recursively without Loops

I would like to create a code that will:
create recursive threads without using loops, the threads has to execute certain routine. I am using Pthread_create on ubuntu
#include <pthread.h>
#include <stdio.h>
#define NUM_THREADS 8
void *PrintHello(void *threadid)
{
printf("\n%d: Hello World!\n", threadid);
pthread_exit(NULL);
}
int main (int argc, char *argv[])
{
pthread_t threads[NUM_THREADS];
int rc, t;
for(t=0; t<NUM_THREADS; t++)
{
printf("Creating thread %d\n", t);
rc = pthread_create(&threads[t], NULL, PrintHello, (void *)t);
if (rc)
{
printf("ERROR; return code from pthread_create() is %d\n", rc);
exit(-1);
}
}
pthread_exit(NULL);
}
Yes you can create thread without using for loop , I have modified your code and used function recursion to create pthread..
#include <pthread.h>
#include <stdio.h>
#define NUM_THREADS 8
pthread_t threads[NUM_THREADS];
void *PrintHello(void *threadid)
{
printf("\n%d: Hello World!\n", threadid);
pthread_exit(NULL);
}
void create_thread(int n){
if (n > 0 ){
//Create thread
printf("Creating thread %d\n", ((NUM_THREADS - n) + 1) );
//NUM_THREADS - n to start index from 0
int rc = pthread_create(&threads[NUM_THREADS - n], NULL, PrintHello, (void *)(NUM_THREADS - n));
if (rc)
{
printf("ERROR; return code from pthread_create() is %d\n", rc);
exit(-1);
}
n--;
create_thread(n);
}
return;
}
int main(int argc, char *argv[])
{
int rc, t;
create_thread( NUM_THREADS );
//Wait to finish all thread
for (t = 0; t < NUM_THREADS; t++)
{
pthread_join(threads[t],NULL);
}
pthread_exit(NULL);
return 0;
}
Hope this will help you.

set CPU affinity of a particular pthread failure

My speedup-example.cpp source code is shown below
#include <pthread.h>
#include <stdio.h>
#include <stdlib.h>
#include <errno.h>
#include <assert.h>
#include "tern/user.h"
#define N 8
#define M 10000
int nwait = 0;
int nexit = 0;
volatile long long sum;
long loops = 6e3;
pthread_mutex_t mutex;
pthread_cond_t cond;
pthread_barrier_t bar;
void set_affinity(int core_id) {
cpu_set_t cpuset;
CPU_ZERO(&cpuset);
CPU_SET(core_id, &cpuset);
assert(pthread_setaffinity_np(pthread_self(), sizeof(cpu_set_t), &cpuset) ==0);
}
void* thread_func(void *arg) {
set_affinity((int)(long)arg);
for (int j = 0; j < M; j++) {
pthread_mutex_lock(&mutex);
nwait++;
for (long i = 0; i < loops; i++) // This is the key of speedup for parrot: the mutex needs to be a little bit congested.
sum += i;
pthread_cond_wait(&cond, &mutex);
pthread_mutex_unlock(&mutex);
soba_wait(0);
pthread_barrier_wait(&bar);
for (long i = 0; i < loops; i++)
sum += i*i*i*i*i*i;
//fprintf(stderr, "compute thread %u %d\n", (unsigned)thread, sched_getcpu());
}
}
int main(int argc, char *argv[]) {
set_affinity(23);
soba_init(0, N, 20);
pthread_t th[N];
int ret;
pthread_cond_init(&cond, NULL);
pthread_barrier_init(&bar, NULL, N);
for(unsigned i=0; i<N; ++i) {
ret = pthread_create(&th[i], NULL, thread_func, (void*)i);
assert(!ret && "pthread_create() failed!");
}
for (int j = 0; j < M; j++) {
while (nwait < N) {
sched_yield();
}
pthread_mutex_lock(&mutex);
nwait = 0;
//fprintf(stderr, "broadcast %u %d\n", (unsigned)pthread_self(), sched_getcpu());
pthread_cond_broadcast(&cond);
pthread_mutex_unlock(&mutex);
}
for(unsigned i=0; i<N; ++i)
pthread_join(th[i], NULL);
exit(0);
}
I already succeeded wrote the mk of speedup-example.cpp
gcc speedup-example.cpp -o speedup-example -O2 -g \-I$XTERN_ROOT/include -L$XTERN_ROOT/dync_hook -Wl,--rpath,$XTERN_ROOT/dync_hook -lxtern-annot \-lpthread
But when I want to run it, problems occur.
For example
$ time ./speedup-example
It informs me that
speedup-example.cpp:23: void set_affinity(int): Assertion `pthread_setaffinity_np(pthread_self(), sizeof(cpu_set_t), &cpuset) ==0' failed.
Can someone help me solve this problem? Many thanks.

SCTP Multistreaming: infinite loop

I have a simple client-server application on SCTP! The client connects to server opening 3 streams and the server sends a file per stream. The problem is that I don't know how to control the 3 streams, to know when sctp_rcvmsg() from a stream i 0 it would mean that the file transmission is ended for that stream...but it seems that sctp_recvmsg() never stops. here's my code.
CLIENT
#include <sys/types.h>
#include <sys/socket.h>
#include <signal.h>
#include <netinet/in.h>
#include <netinet/sctp.h>
#include <arpa/inet.h>
#include <string.h>
#include <unistd.h>
#include <stdio.h>
#include <sys/ioctl.h>
#include <net/if.h>
#include <stdlib.h>
#include <time.h>
#define BUFFERSIZE 1024
int main(int argc, char** argv) {
int i, sockCliSCTP, flags, res;
/* Server netwrok informations */
struct sockaddr_in servAddr;
/* To get which stream it has received data from */
struct sctp_sndrcvinfo sndrcvinfo;
/* Init message to setup number of streams */
struct sctp_initmsg initmsg;
/* Catching events */
struct sctp_event_subscribe events;
/* Buffer to receive files */
char buffer[BUFFERSIZE];
/* Remove previous recently used files */
remove("first.txt");
remove("second.txt");
remove("third.txt");
char ipServ[32] = "127.0.0.1";
short int servPort = 29008;
/* BEGIN SCTP PART */
/* Creating client socket for SCTP protocol */
sockCliSCTP = socket( AF_INET, SOCK_STREAM, IPPROTO_SCTP );
/* Specify that a maximum of 3 streams will be available per socket */
memset( &initmsg, 0, sizeof(initmsg) );
initmsg.sinit_num_ostreams = 3; /* output streams */
initmsg.sinit_max_instreams = 3; /* input streams */
initmsg.sinit_max_attempts = 2;
setsockopt(sockCliSCTP, IPPROTO_SCTP, SCTP_INITMSG, &initmsg, sizeof(initmsg) );
/* Initializing server network data structs */
bzero( (void *)&servAddr, sizeof(servAddr) );
servAddr.sin_family = AF_INET;
inet_pton(AF_INET, ipServ, &servAddr.sin_addr);
servAddr.sin_port = htons(29008);
int sizeServ = sizeof(servAddr);
/* Connect to server */
res = connect(sockCliSCTP, (struct sockaddr *)&servAddr, sizeof(servAddr));
if (res < 0) {
printf("Connection to server refused!\n");
exit(1);
}
memset( (void *)&events, 0, sizeof(events) );
events.sctp_data_io_event = 1;
res = setsockopt(sockCliSCTP, SOL_SCTP, SCTP_EVENTS, (const void *)&events, sizeof(events));
if (res < 0) {
printf("setsockopt failed!\n");
exit(1);
}
/* The clients simply waits and receives for three files from the server.
* The size of the files is increased each time this client is launched. */
FILE *oneF, *twoF, *threeF;
oneF = fopen("first.txt", "a"); /* Stream 0 */
twoF = fopen("second.txt", "a"); /* Stream 1 */
threeF = fopen("third.txt", "a"); /* Stream 2 */
/* To measure time */
time_t timeStart;
time_t timeEnd;
time_t timeRes = 0;
time(&timeStart);
int count0 = 0, count1 = 0, count2 = 0;
int checkRead[3];
for(i = 0; i<3; i++) {
checkRead[i] = 1;
}
/* Receiving in parallel the files from 3 streams */
while(checkRead[0] || checkRead[1] || checkRead[2]) {
printf("%d %d %d\n", checkRead[0], checkRead[1], checkRead[2]);
res = sctp_recvmsg(sockCliSCTP, (void*)buffer, sizeof(buffer), (struct sockaddr*)&servAddr, (socklen_t *)&sizeServ, &sndrcvinfo, &flags);
if (res == 0) {
printf("%d stream is zero\n", sndrcvinfo.sinfo_stream);
checkRead[sndrcvinfo.sinfo_stream] = 0;
continue;
}
/* Check from which stream the data came in */
switch(sndrcvinfo.sinfo_stream) {
/* Write on file oneF --> first.txt */
case 0:
count0++;
printf("Message received from stream 0\n");
//printf("%s\n\n", buffer);
fprintf(oneF, "%s", buffer);
break;
/* Write on file twoF --> second.txt */
case 1:
count1++;
printf("Message received from stream 1\n");
//printf("%s\n\n", buffer);
fprintf(twoF, "%s", buffer);
break;
/* Write on file threeF --> third.txt */
case 2:
count2++;
printf("Message received from stream 2\n");
//printf("%s\n\n", buffer);
fprintf(threeF, "%s", buffer);
break;
}
memset(buffer, 0, sizeof(buffer));
sleep(1);
}
close(sockCliSCTP);
time(&timeEnd);
timeRes = timeEnd - timeStart;
printf("Time elapsed is: %d seconds\n", (int)timeRes);
printf("%d messages on stream 0,\n %d messages on stream 1,\n %d messages on stream 2\n", count0, count1, count2);
}
AND THE SERVER:
#include <sys/types.h>
#include <sys/socket.h>
#include <signal.h>
#include <netinet/in.h>
#include <netinet/sctp.h>
#include <arpa/inet.h>
#include <string.h>
#include <unistd.h>
#include <stdio.h>
#include <sys/ioctl.h>
#include <net/if.h>
#include <stdlib.h>
#include <time.h>
#include <sys/stat.h>
#include <fcntl.h>
#define BUFFERSIZE 1024
int main(int argc, char** argv) {
int sockCli, sockServ, one, two, three, i, res;
struct sockaddr_in client, server;
/* data struct to declarate streams */
struct sctp_initmsg initmsg;
/* buffer to read from file */
char buffer[BUFFERSIZE];
/* socket server listening */
sockServ = socket(AF_INET, SOCK_STREAM, IPPROTO_SCTP);
bzero( (void *)&client, sizeof(client));
bzero( (void *)&server, sizeof(server));
/* Preparing sever data struct and bind() */
bzero( (void *)&server, sizeof(server) );
server.sin_family = AF_INET;
server.sin_addr.s_addr = htonl( INADDR_ANY );
server.sin_port = htons(29008);
bind(sockServ, (struct sockaddr *)&server, sizeof(server));
/* Maximum of 3 streams will be available per socket */
memset( &initmsg, 0, sizeof(initmsg) );
initmsg.sinit_num_ostreams = 3;
initmsg.sinit_max_instreams = 3;
initmsg.sinit_max_attempts = 2;
res = setsockopt(sockServ, IPPROTO_SCTP, SCTP_INITMSG, &initmsg, sizeof(initmsg));
if (res < 0) {
printf("setsockopt() failed!\n");
exit(1);
}
/* Preparing the three files to be sent */
one = open("files/first.txt", O_RDONLY);
if (one < 0) {
printf("Error on opening first file!\n");
exit(1);
}
two = open("files/second.txt", O_RDONLY);
if (two < 0) {
printf("Error on opening second file!\n");
exit(1);
}
three = open("files/third.txt", O_RDONLY);
if (three < 0) {
printf("Error on opening third files!\n");
exit(1);
}
int checkFiles[3];
for(i=0; i<3; i++) {
checkFiles[i] = 1;
}
res = listen(sockServ, 5);
if (res < 0) {
printf("listen() failed!\n");
exit(1);
}
while(1) {
ssize_t readRes;
int len = sizeof(client);
sockCli = accept(sockServ, (struct sockaddr*)&client, &len);
if (sockCli < 0) {
printf("Error on accept()!\n");
exit(1);
}
printf("Associated to client!\n");
while(1) {
memset(buffer, 0, sizeof(buffer));
if ((readRes = read(one, (void*)buffer, sizeof(buffer))) > 0) {
sctp_sendmsg(sockCli, (void*)buffer, (size_t)strlen(buffer), NULL, 0, 0, 0, 0 /* stream number */, 0, 0);
}
memset(buffer, 0, sizeof(buffer));
if ((readRes = read(two, (void*)buffer, sizeof(buffer))) > 0) {
sctp_sendmsg(sockCli, (void*)buffer, (size_t)strlen(buffer), NULL, 0, 0, 0, 1 /* stream number */, 0, 0);
}
memset(buffer, 0, sizeof(buffer));
if ((readRes = read(three, (void*)buffer, sizeof(buffer))) > 0) {
sctp_sendmsg(sockCli, (void*)buffer, (size_t)strlen(buffer), NULL, 0, 0, 0, 2 /* stream number */, 0, 0);
}
else {break;}
}
close(sockCli);
close(one);
close(two);
close(three);
}
}
Where am I making a mistake? :(
sctp_recvmsg doesn't receive a message from a single stream. It simply returns ANY message received and then the application can figure out which stream the message came from.
After all the data has been received by your client when this code executes:
res = sctp_recvmsg(sockCliSCTP, (void*)buffer, sizeof(buffer), (struct sockaddr*)&servAddr, (socklen_t *)&sizeServ, &sndrcvinfo, &flags);
if (res == 0) {
printf("%d stream is zero\n", sndrcvinfo.sinfo_stream);
checkRead[sndrcvinfo.sinfo_stream] = 0;
continue;
}
res becomes 0 since no message is received and the sndrcvinfo structure does not get changed. So sndrcvinfo.sinfo_stream will remain equal to whatever stream the last message came from and you will get stuck in a loop since you won't change the checkRead[] values.
There's some other errors that will make the server/client behave strangely.
For example you can't run the client twice in a row without a a segmentation fault since the server closes the file descriptors and won't send any data the second time. Because of this you will segfault when you do:
checkRead[sndrcvinfo.sinfo_stream] = 0;
since sndrcvinfo will be a null pointer.

HIDAPI in two threads

According to https://github.com/signal11/hidapi/issues/72 HIDAPI ought to be thread safe on Linux machines. However, I can't get it working at all. This is what I do:
#ifdef WIN32
#include <windows.h>
#endif
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <pthread.h>
#include <stdlib.h>
#include <assert.h>
#include "hidapi.h"
hid_device *handle;
static void *TaskCode(void *argument)
{
int res;
//hid_device *handle;
unsigned char buf[64];
// res = hid_init();
// if( res == -1 )
// {
// return (void*)1;
// }
//
// handle = hid_open(0x0911, 0x251c, NULL);
// if( handle == NULL )
// {
// return (void*)2;
// }
printf( "while 2\n");
while( 1 )
{
memset( buf, 64, 0 );
res = hid_read(handle, buf, 0);
if( res == -1 )
{
return (void*)3;
}
printf( "received %d bytes\n", res);
for (int i = 0; i < res; i++)
printf("Byte %d: %02x ", i+1, buf[i]);
//printf( "%02x ", buf[0]);
fflush(stdout);
}
return (void*)0;
}
int main(int argc, char* argv[])
{
int res;
//hid_device *handle;
unsigned char buf[65];
res = hid_init();
if( res == -1 )
{
return 1;
}
handle = hid_open(0x0911, 0x251c, NULL);
if( handle == NULL )
{
return 2;
}
hid_set_nonblocking( handle, 0 );
pthread_t thread;
int rc = pthread_create(&thread, NULL, TaskCode, NULL);
printf( "while 1\n");
while(1)
{
int a = getchar();
if( a == 'a')
{
// Get Device Type (cmd 0x82). The first byte is the report number (0x0).
buf[0] = 0x0;
buf[1] = 0x82;
res = hid_write(handle, buf, 65);
if( res != -1 )
printf( "write ok, transferred %d bytes\n", res );
else
{
printf( "write error\n" );
char* str = hid_error(handle);
printf( "error: %s\n", str );
return 1;
}
}
else if( a== 'b')
break;
}
void* trc;
rc = pthread_join(thread, &trc);
printf( "rc code: %d\n", (int)trc );
// Finalize the hidapi library
res = hid_exit();
return 0;
}
If I don't use the global handle, I get 'write error' every time. If I do, as in the example, formally everything works but hid_read always returns 0 bytes... Of course, if I do simple hid_write() followed by hid_read(), I'll get the correct reply to the command 0x82 as intended. I'm really lost here, am I overlooking something?
EDIT: to clarify, zero bytes return also for everything, incl. buttons on mouse etc. So it seems to work but the data buffer is always zero bytes.
Shame on me, a dumb mistake. The code should be:
memset( buf, 0, 64 );
res = hid_read(handle, buf, 64);
and then it works. Should sleep more and write less!

Resources