How to discover physical address corresponding to PCIe device memory?

How to discover physical address corresponding to PCIe device memory? - memory

I'm trying to access a PCIe device memory from a user space program. I open the file: /sys/bus/pci/devices/0000:3b:00.0/resource0 and then I call mmap that will return a virtual address.
When writing at this virtual address (VA) the MMU will translate it to a physical address (PA), the memory controller will convert the write to the PA into a TLP to request a write to the PCIe device. (AFAIU)
How can I get the physical address that is being used? I had a look to /proc//maps and I see that there is an address that coincides with the PCIe bar0 address (0xa0000000).
But this address seems too low, it overlaps with DDR memory.
I also tried this program to convert VA to PA but it doesn't seem to give sensible results for such mapping:
virt2phys$ cat v2p.c
#define _XOPEN_SOURCE 700
#include <fcntl.h> /* open */
#include <stdint.h> /* uint64_t */
#include <stdio.h> /* printf */
#include <stdlib.h> /* size_t */
#include <unistd.h> /* pread, sysconf */
typedef struct {
uint64_t pfn : 55;
unsigned int soft_dirty : 1;
unsigned int file_page : 1;
unsigned int swapped : 1;
unsigned int present : 1;
} PagemapEntry;
/* Parse the pagemap entry for the given virtual address.
*
* #param[out] entry the parsed entry
* #param[in] pagemap_fd file descriptor to an open /proc/pid/pagemap file
* #param[in] vaddr virtual address to get entry for
* #return 0 for success, 1 for failure
*/
int pagemap_get_entry(PagemapEntry *entry, int pagemap_fd, uintptr_t vaddr)
{
size_t nread;
ssize_t ret;
uint64_t data;
uintptr_t vpn;
vpn = vaddr / sysconf(_SC_PAGE_SIZE);
nread = 0;
while (nread < sizeof(data)) {
ret = pread(pagemap_fd, ((uint8_t*)&data) + nread, sizeof(data) - nread,
vpn * sizeof(data) + nread);
nread += ret;
if (ret <= 0) {
return 1;
}
}
entry->pfn = data & (((uint64_t)1 << 55) - 1);
entry->soft_dirty = (data >> 55) & 1;
entry->file_page = (data >> 61) & 1;
entry->swapped = (data >> 62) & 1;
entry->present = (data >> 63) & 1;
return 0;
}
/* Convert the given virtual address to physical using /proc/PID/pagemap.
*
* #param[out] paddr physical address
* #param[in] pid process to convert for
* #param[in] vaddr virtual address to get entry for
* #return 0 for success, 1 for failure
*/
int virt_to_phys_user(uintptr_t *paddr, pid_t pid, uintptr_t vaddr)
{
char pagemap_file[BUFSIZ];
int pagemap_fd;
snprintf(pagemap_file, sizeof(pagemap_file), "/proc/%ju/pagemap", (uintmax_t)pid);
pagemap_fd = open(pagemap_file, O_RDONLY);
if (pagemap_fd < 0) {
return 1;
}
PagemapEntry entry;
if (pagemap_get_entry(&entry, pagemap_fd, vaddr)) {
return 1;
}
close(pagemap_fd);
*paddr = (entry.pfn * sysconf(_SC_PAGE_SIZE)) + (vaddr % sysconf(_SC_PAGE_SIZE));
return 0;
}
int main(int argc, char **argv)
{
pid_t pid;
uintptr_t vaddr, paddr = 0;
if (argc < 3) {
printf("Usage: %s pid vaddr(in hex)\n", argv[0]);
return EXIT_FAILURE;
}
pid = strtoull(argv[1], NULL, 0);
vaddr = strtoull(argv[2], NULL, 16);
if (virt_to_phys_user(&paddr, pid, vaddr)) {
fprintf(stderr, "error: virt_to_phys_user\n");
return EXIT_FAILURE;
};
printf("0x%jx\n", (uintmax_t)paddr);
return EXIT_SUCCESS;
}

Related

Windows DPDK L2fwd- Receiving packets out of sequence

I am validating DPDK receive functionality & for this I'm shooting a pcap externally &
added code in l2fwd to dump received packets to pcap, the l2fwd dumped pcap have all the packets from shooter but some of them are not in sequence.
Shooter is already validated.
DPDK version in use-21.11
link of the pcap used : https://wiki.wireshark.org/uploads/__moin_import__/attachments/SampleCaptures/tcp-ecn-sample.pcap
Out of order packets are random. For the first run I saw no jumbled packets but was able to replicate the issue on second run with the 2nd,3rd,4th packets jumbled having order 3,4,2.
Below is snipped from l2fwd example & our modifications as //TESTCODE..
/* Read packet from RX queues. 8< */
for (i = 0; i < qconf->n_rx_port; i++) {
portid = qconf->rx_port_list[i];
nb_rx = rte_eth_rx_burst(portid, 0,
pkts_burst, MAX_PKT_BURST);
port_statistics[portid].rx += nb_rx;
for (j = 0; j < nb_rx; j++) {
m = pkts_burst[j];
// TESTCODE_STARTS
uint8_t* pkt = rte_pktmbuf_mtod(m, uint8_t*);
dump_to_pcap(pkt, rte_pktmbuf_pkt_len(m));
// TESTCODE_ENDS
rte_prefetch0(rte_pktmbuf_mtod(m, void *));
l2fwd_simple_forward(m, portid);
}
}
/* >8 End of read packet from RX queues. */
Below is code for dump_to_pcap
static int
dump_to_pcap(uint8_t* pkt, int pkt_len)
{
static FILE* fp = NULL;
static int init_file = 0;
if (0 == init_file) {
printf("Creating pcap\n");
char pcap_filename[256] = { 0 };
char Two_pcap_filename[256] = { 0 };
currentDateTime(pcap_filename);
sprintf(Two_pcap_filename,".\\Rx_%d_%s.pcap", 0, pcap_filename);
printf("FileSName to Create: %s\n", Two_pcap_filename);
fp = fopen(Two_pcap_filename, "wb");
if (NULL == fp) {
printf("Unable to open file\n");
fp = NULL;
}
else {
printf("File create success..\n");
init_file = 1;
typedef struct pcap_file_header1 {
unsigned int magic; // a 32-bit "magic number"
unsigned short version_major; //a 16-bit major version number
unsigned short version_minor; //a 16-bit minor version number
unsigned int thiszone; //a 32-bit "time zone offset" field that's actually not used, so ou can (and probably should) just make it 0
unsigned int sigfigs; //a 32-bit "time stamp accuracy" field that's not actually used,so you can (and probably should) just make it 0;
unsigned int snaplen; //a 32-bit "snapshot length" field
unsigned int linktype; //a 32-bit "link layer type" field
}dumpFileHdr;
dumpFileHdr file_hdr;
file_hdr.magic = 2712847316; //0xa1b2c3d4;
file_hdr.version_major = 2;
file_hdr.version_minor = 4;
file_hdr.thiszone = 0;
file_hdr.sigfigs = 0;
file_hdr.snaplen = 65535;
file_hdr.linktype = 1;
fwrite((void*)(&file_hdr), sizeof(dumpFileHdr), 1, fp);
//printf("Pcap Header written\n");
}
}
typedef struct pcap_pkthdr1 {
unsigned int ts_sec; /* time stamp */
unsigned int ts_usec;
unsigned int caplen; /* length of portion present */
unsigned int len; /* length this packet (off wire) */
}dumpPktHdr;
dumpPktHdr pkt_hdr;
static int ts_sec = 1;
pkt_hdr.ts_sec = ts_sec++;
pkt_hdr.ts_usec = 0;
pkt_hdr.caplen = pkt_hdr.len = pkt_len;
if (NULL != fp) {
fwrite((void*)(&pkt_hdr), sizeof(dumpPktHdr), 1, fp);
fwrite((void*)(pkt), pkt_len, 1, fp);
fflush(fp);
}
return 0;
}

cudaMalloc and cudaMemcpy not working on kernel call

I have an array already initialized that I am trying to use in each thread of the kernel call (each thread uses a different part of the array so there are no dependencies). I create the array and save memory on the device using cudaMalloc and the array is copied from host to device using cudaMemcpy.
I pass the pointer returned by cudaMalloc to the kernel call to be used by each thread.
int SIZE = 100;
int* data = new int[SIZE];
int* d_data = 0;
cutilSafeCall( cudaMalloc(&d_data, SIZE * sizeof(int)) );
for (int i = 0; i < SIZE; i++)
data[i] = i;
cutilSafeCall( cudaMemcpy(d_data, data, SIZE * sizeof(int), cudaMemcpyHostToDevice) );
This code was taken from here.
For the kernel call.
kernel<<<blocks, threads>>> (results, d_data);
I keep track of the results from each thread by using the struct Result. The next code works without errors.
__global__ void mainKernel(Result res[], int* data){
int x = data[0];
}
But when I assign that value to res:
__global__ void mainKernel(Result res[], int* data){
int threadId = (blockIdx.x * blockDim.x) + threadIdx.x;
int x = data[0];
res[threadId].x = x;
}
An error is raised:
cudaSafeCall() Runtime API error in file , line 355 : an illegal memory access was encountered.
The same error appears with any operation involving the use of that pointer
__global__ void mainKernel(Result res[], int* data){
int threadId = (blockIdx.x * blockDim.x) + threadIdx.x;
int x = data[0];
if (x > 10)
res[threadId].x = 5;
}
There is no problem with the definition of res. Assigning any other value to res[threadId].x does not give me any error.
This is the output of running cuda-memcheck:
========= Invalid __global__ read of size 4
========= at 0x00000150 in mainKernel(Result*, int*)
========= by thread (86,0,0) in block (49,0,0)
========= Address 0x13024c0000 is out of bounds
========= Saved host backtrace up to driver entry point at kernel launch time
========= Host Frame:/usr/lib/x86_64-linux-gnu/libcuda.so.1 (cuLaunchKernel + 0x2cd) [0x150d6d]
========= Host Frame:./out [0x2cc4b]
========= Host Frame:./out [0x46c23]
========= Host Frame:./out [0x3e37]
========= Host Frame:./out [0x3ca1]
========= Host Frame:./out [0x3cd6]
========= Host Frame:./out [0x39e9]
========= Host Frame:/lib/x86_64-linux-gnu/libc.so.6 (__libc_start_main + 0xf5) [0x21ec5]
========= Host Frame:./out [0x31b9]
EDIT:
This is an example of the full code:
#include <stdio.h>
#include <stdlib.h>
#include <math.h>
#include <string.h>
#include <iostream>
#include <assert.h>
typedef struct
{
int x,y,z;
} Result;
__global__ void mainKernel(Result pResults[], int* dataimage)
{
int threadId = (blockIdx.x * blockDim.x) + threadIdx.x;
int xVal = dataimage[0];
if (xVal > 10)
pResults[threadId].x = 5;
}
int main (int argc, char** argv)
{
int NUM_THREADS = 5*5;
int SIZE = 100;
int* data = new int[SIZE];
int* d_data = 0;
cutilSafeCall( cudaMalloc(&d_data, SIZE * sizeof(int)) );
for (int i = 0; i < SIZE; i++)
data[i] = i;
cutilSafeCall( cudaMemcpy(d_data, data, SIZE * sizeof(int), cudaMemcpyHostToDevice) );
unsigned int GPU_ID = 1; // not actually :-)
// unsigned int GPU_ID = cutGetMaxGflopsDeviceId() ;
cudaSetDevice(GPU_ID);
Result * results_GPU = 0;
cutilSafeCall( cudaMalloc( &results_GPU, NUM_THREADS * sizeof(Result)) );
Result * results_CPU = 0;
cutilSafeCall( cudaMallocHost( &results_CPU, NUM_THREADS * sizeof(Result)) );
mainKernel<<<5,5>>> ( results_GPU, d_data );
cudaThreadSynchronize();
cutilSafeCall( cudaMemcpy(results_CPU, results_GPU, NUM_THREADS * sizeof(Result),cudaMemcpyDeviceToHost) );
cutilSafeCall(cudaFree(results_GPU));
cutilSafeCall(cudaFreeHost(results_CPU));
cudaThreadExit();
} // ()

Your problem lies in this sequence of calls:
cutilSafeCall( cudaMalloc(&d_data, SIZE * sizeof(int)) );
for (int i = 0; i < SIZE; i++)
data[i] = i;
cutilSafeCall( cudaMemcpy(d_data, data, SIZE * sizeof(int), cudaMemcpyHostToDevice) );
unsigned int GPU_ID = 1;
cudaSetDevice(GPU_ID);
Result * results_GPU = 0;
cutilSafeCall( cudaMalloc( &results_GPU, NUM_THREADS * sizeof(Result)) );
Result * results_CPU = 0;
cutilSafeCall( cudaMallocHost( &results_CPU, NUM_THREADS * sizeof(Result)) );
mainKernel<<<5,5>>> ( results_GPU, d_data );
What is effectively happening is that you are allocating d_data and running your kernel on different GPUs, and d_data is not valid on the GPU you are launching the kernel on.
In detail, because you call cudaMalloc for d_data before cudaSetDevice, you are allocating d_data on the default device, and then explicitly allocating results_GPU and running the kernel on device 1. Clearly device 1 and the default device are not the same GPU (enumeration of devices usually starts at 0 in the runtime API).
If you change the code like this:
unsigned int GPU_ID = 1;
cutilSafeCall(cudaSetDevice(GPU_ID));
cutilSafeCall( cudaMalloc(&d_data, SIZE * sizeof(int)) );
for (int i = 0; i < SIZE; i++)
data[i] = i;
cutilSafeCall( cudaMemcpy(d_data, data, SIZE * sizeof(int), cudaMemcpyHostToDevice) );
Result * results_GPU = 0;
cutilSafeCall( cudaMalloc( &results_GPU, NUM_THREADS * sizeof(Result)) );
Result * results_CPU = 0;
cutilSafeCall( cudaMallocHost( &results_CPU, NUM_THREADS * sizeof(Result)) );
mainKernel<<<5,5>>> ( results_GPU, d_data );
i.e. select the non-default device before any allocations are made, the problem should disappear. The reason this doesn't happen with your very simple kernel:
__global__ void mainKernel(Result res[], int* data){
int x = data[0];
}
is simply that the CUDA compiler performs very aggressive optimisations by default, and because the result of the read of data[0] isn't actually used, the entire read can be optimised away and you are left with an empty stub kernel which doesn't do anything. Only when the result of the load from memory is used in a memory write will the code not be optimised away during compilation. You can confirm this yourself by dissassembling the code emitted by the compiler, if you are curious.
Note that there are ways to make this work on multi-GPU systems which supported it, via peer-to-peer access, but that must be explicitly configured in your code for that facility to be used.

Checking code integrity in iOS

How could I guarantee the integrity of the code of an iOS app? I've been taking a look to Apple's Security Overview document, would code signing be enough? Is there any other recommended mechanism to guarantee the code integrity?
Thanks in advance

I had a same problem. This is easy on OS X but somewhat difficult in iOS because iOS doesn't have API like SecStaticCodeCheckValidity.
There are two sections in mach-o binary that you can use to ensure integrity of the app.
LC_ENCRYPTION_INFO
LC_CODE_SIGNATURE
1. LC_ENCRYPTION_INFO
First, LC_ENCRYPTION_INFO stores informations about 'app store encryption'. Once an app is uploaded to app store, app is encrypted before it is released to users.
binary before uploading to appstore or decrypted
otool -l [binary] | grep LC_ENCRYPTION_INFO -A5
cmd LC_ENCRYPTION_INFO
cmdsize 20
cryptoff 16384
cryptsize 5783552
cryptid 0
--
cmd LC_ENCRYPTION_INFO_64
cmdsize 24
cryptoff 16384
cryptsize 6635520
cryptid 0
pad 0
binary after uploading to appstore (encrypted)
otool -l [binary] | grep LC_ENCRYPTION_INFO -A5
cmd LC_ENCRYPTION_INFO
cmdsize 20
cryptoff 16384
cryptsize 5783552
cryptid 1
--
cmd LC_ENCRYPTION_INFO_64
cmdsize 24
cryptoff 16384
cryptsize 6635520
cryptid 1
pad 0
As you can see, 'cryptid' is set to 1 when app is uploaded. So checking 'cryptid' bit will tell us if the binary is encrypted or not.
You may think that this can be bypassed easily by just setting the bit to 1, but then OS will try to decrypt the binary which will make the codes to unrecognizable bytes.
bool isBinaryEncrypted()
{
// checking current binary's LC_ENCRYPTION_INFO
const void *binaryBase;
struct load_command *machoCmd;
const struct mach_header *machoHeader;
NSString *path = [[NSBundle mainBundle] executablePath];
NSData *filedata = [NSData dataWithContentsOfFile:path];
binaryBase = (char *)[filedata bytes];
machoHeader = (const struct mach_header *) binaryBase;
if(machoHeader->magic == FAT_CIGAM)
{
unsigned int offset = 0;
struct fat_arch *fatArch = (struct fat_arch *)((struct fat_header *)machoHeader + 1);
struct fat_header *fatHeader = (struct fat_header *)machoHeader;
for(uint32_t i = 0; i < ntohl(fatHeader->nfat_arch); i++)
{
if(sizeof(int *) == 4 && !(ntohl(fatArch->cputype) & CPU_ARCH_ABI64)) // check 32bit section for 32bit architecture
{
offset = ntohl(fatArch->offset);
break;
}
else if(sizeof(int *) == 8 && (ntohl(fatArch->cputype) & CPU_ARCH_ABI64)) // and 64bit section for 64bit architecture
{
offset = ntohl(fatArch->offset);
break;
}
fatArch = (struct fat_arch *)((uint8_t *)fatArch + sizeof(struct fat_arch));
}
machoHeader = (const struct mach_header *)((uint8_t *)machoHeader + offset);
}
if(machoHeader->magic == MH_MAGIC) // 32bit
{
machoCmd = (struct load_command *)((struct mach_header *)machoHeader + 1);
}
else if(machoHeader->magic == MH_MAGIC_64) // 64bit
{
machoCmd = (struct load_command *)((struct mach_header_64 *)machoHeader + 1);
}
for(uint32_t i=0; i < machoHeader->ncmds && machoCmd != NULL; i++){
if(machoCmd->cmd == LC_ENCRYPTION_INFO)
{
struct encryption_info_command *cryptCmd = (struct encryption_info_command *) machoCmd;
return cryptCmd->cryptid;
}
if(machoCmd->cmd == LC_ENCRYPTION_INFO_64)
{
struct encryption_info_command_64 *cryptCmd = (struct encryption_info_command_64 *) machoCmd;
return cryptCmd->cryptid;
}
machoCmd = (struct load_command *)((uint8_t *)machoCmd + machoCmd->cmdsize);
}
return FALSE; // couldn't find cryptcmd
}
2. LC_CODE_SIGNATURE
LC_CODE_SIGNATURE is the section that /usr/bin/codesign actually refers when checking validity of the binary. But parsing the section is a little bit more difficult than parsing LC_ENCRYPTION_INFO, because it's undocumented and there are no types like signature_info_command.
LC_CODE_SIGNATURE contains hashes of all of the binary except the section itself, and hashes are adjusted whenever it's re-signed.
I ported the codes of /usr/bin/codesign to parse this section. check here and SecStaticCode::validateExecutable defined in here
CodeSigning.h
#ifndef CodeSigning_h
#define CodeSigning_h
#include <stdio.h>
// codes from https://opensource.apple.com/source/Security/Security-55179.1/libsecurity_codesigning/lib/cscdefs.h
enum {
CSMAGIC_REQUIREMENT = 0xfade0c00, /* single Requirement blob */
CSMAGIC_REQUIREMENTS = 0xfade0c01, /* Requirements vector (internal requirements) */
CSMAGIC_CODEDIRECTORY = 0xfade0c02, /* CodeDirectory blob */
CSMAGIC_EMBEDDED_SIGNATURE = 0xfade0cc0, /* embedded form of signature data */
CSMAGIC_DETACHED_SIGNATURE = 0xfade0cc1, /* multi-arch collection of embedded signatures */
CSSLOT_CODEDIRECTORY = 0, /* slot index for CodeDirectory */
};
/*
* Structure of an embedded-signature SuperBlob
*/
typedef struct __BlobIndex {
uint32_t type; /* type of entry */
uint32_t offset; /* offset of entry */
} CS_BlobIndex;
typedef struct __SuperBlob {
uint32_t magic; /* magic number */
uint32_t length; /* total length of SuperBlob */
uint32_t count; /* number of index entries following */
CS_BlobIndex index[]; /* (count) entries */
/* followed by Blobs in no particular order as indicated by offsets in index */
} CS_SuperBlob;
/*
* C form of a CodeDirectory.
*/
typedef struct __CodeDirectory {
uint32_t magic; /* magic number (CSMAGIC_CODEDIRECTORY) */
uint32_t length; /* total length of CodeDirectory blob */
uint32_t version; /* compatibility version */
uint32_t flags; /* setup and mode flags */
uint32_t hashOffset; /* offset of hash slot element at index zero */
uint32_t identOffset; /* offset of identifier string */
uint32_t nSpecialSlots; /* number of special hash slots */
uint32_t nCodeSlots; /* number of ordinary (code) hash slots */
uint32_t codeLimit; /* limit to main image signature range */
uint8_t hashSize; /* size of each hash in bytes */
uint8_t hashType; /* type of hash (cdHashType* constants) */
uint8_t spare1; /* unused (must be zero) */
uint8_t pageSize; /* log2(page size in bytes); 0 => infinite */
uint32_t spare2; /* unused (must be zero) */
/* followed by dynamic content as located by offset fields above */
} CS_CodeDirectory;
static inline const CS_CodeDirectory *findCodeDirectory(const CS_SuperBlob *embedded)
{
if (embedded && ntohl(embedded->magic) == CSMAGIC_EMBEDDED_SIGNATURE) {
const CS_BlobIndex *limit = &embedded->index[ntohl(embedded->count)];
const CS_BlobIndex *p;
for (p = embedded->index; p < limit; ++p)
if (ntohl(p->type) == CSSLOT_CODEDIRECTORY) {
const unsigned char *base = (const unsigned char *)embedded;
const CS_CodeDirectory *cd = (const CS_CodeDirectory *)(base + ntohl(p->offset));
if (ntohl(cd->magic) == CSMAGIC_CODEDIRECTORY){
return cd;
}
else{
break;
}
}
}
// not found
return NULL;
}
//
unsigned char validateSlot(const void *data, size_t length, size_t slot, const CS_CodeDirectory *codeDirectory);
#endif /* CodeSigning_h */
CodeSigning.c
#include "CodeSigning.h"
#include <stdio.h>
#include <string.h>
#import <CommonCrypto/CommonDigest.h>
unsigned char validateSlot(const void *data, size_t length, size_t slot, const CS_CodeDirectory *codeDirectory)
{
uint8_t digest[CC_SHA1_DIGEST_LENGTH + 1] = {0, };
CC_SHA1(data, (CC_LONG)length, digest);
return (memcmp(digest, (void *)((char *)codeDirectory + ntohl(codeDirectory->hashOffset) + 20*slot), 20) == 0);
}
parsing the section
void checkCodeSignature(void *binaryContent){
struct load_command *machoCmd;
const struct mach_header *machoHeader;
machoHeader = (const struct mach_header *) binaryContent;
if(machoHeader->magic == FAT_CIGAM){
unsigned int offset = 0;
struct fat_arch *fatArch = (struct fat_arch *)((struct fat_header *)machoHeader + 1);
struct fat_header *fatHeader = (struct fat_header *)machoHeader;
for(uint32_t i = 0; i < ntohl(fatHeader->nfat_arch); i++)
{
if(sizeof(int *) == 4 && !(ntohl(fatArch->cputype) & CPU_ARCH_ABI64)) // check 32bit section for 32bit architecture
{
offset = ntohl(fatArch->offset);
break;
}
else if(sizeof(int *) == 8 && (ntohl(fatArch->cputype) & CPU_ARCH_ABI64)) // and 64bit section for 64bit architecture
{
offset = ntohl(fatArch->offset);
break;
}
fatArch = (struct fat_arch *)((uint8_t *)fatArch + sizeof(struct fat_arch));
}
machoHeader = (const struct mach_header *)((uint8_t *)machoHeader + offset);
}
if(machoHeader->magic == MH_MAGIC) // 32bit
{
machoCmd = (struct load_command *)((struct mach_header *)machoHeader + 1);
}
else if(machoHeader->magic == MH_MAGIC_64) // 64bit
{
machoCmd = (struct load_command *)((struct mach_header_64 *)machoHeader + 1);
}
for(uint32_t i=0; i < machoHeader->ncmds && machoCmd != NULL; i++){
if(machoCmd->cmd == LC_CODE_SIGNATURE)
{
struct linkedit_data_command *codeSigCmd = (struct linkedit_data_command *) machoCmd;
const CS_SuperBlob *codeEmbedded = (const CS_SuperBlob *)&((char *)machoHeader)[codeSigCmd->dataoff];
void *binaryBase = (void *)machoHeader;
const CS_BlobIndex curIndex = codeEmbedded->index[0];
const CS_CodeDirectory *codeDirectory = (const CS_CodeDirectory *)((char *)codeEmbedded + ntohl(curIndex.offset));
size_t pageSize = codeDirectory->pageSize ? (1 << codeDirectory->pageSize) : 0;
size_t remaining = ntohl(codeDirectory->codeLimit);
size_t processed = 0;
for(size_t slot = 0; slot < ntohl(codeDirectory->nCodeSlots); ++slot){
size_t size = MIN(remaining, pageSize);
if(!validateSlot(binaryBase+processed, size, slot, codeDirectory)){
return;
}
processed += size;
remaining -= size;
}
printf("[*] Code is valid!");
}
}
machoCmd = (struct load_command *)((uint8_t *)machoCmd + machoCmd->cmdsize);
}

obtaining scsi(including SAS and FC) hardisk model and serial number

I have recently been playing around with some hard drive stuff. Now what I want to do is print out the model and serial number of harddisk. Sata drives are very easy with ioctl. scsi on the other hand I have to send an inquiry command. I found a very helpful site which explains everything and even has a example program: http://tldp.org/HOWTO/archived/SCSI-Programming-HOWTO/SCSI-Programming-HOWTO-24.html
but I only get nothing or gibberish as a result if I print it out. I even had to fix the program as stdlib wasn't included and the function Inquiry returned a local variable. But I have no idea how to fix it...
#define DEVICE "/dev/sdb"
/* Example program to demonstrate the generic SCSI interface */
#include <stdlib.h>
#include <stdio.h>
#include <unistd.h>
#include <string.h>
#include <fcntl.h>
#include <errno.h>
#include <scsi/sg.h>
#define SCSI_OFF sizeof(struct sg_header)
static unsigned char cmd[SCSI_OFF + 18]; /* SCSI command buffer */
int fd; /* SCSI device/file descriptor */
/* process a complete scsi cmd. Use the generic scsi interface. */
static int handle_scsi_cmd(unsigned cmd_len, /* command length */
unsigned in_size, /* input data size */
unsigned char *i_buff, /* input buffer */
unsigned out_size, /* output data size */
unsigned char *o_buff /* output buffer */
)
{
int status = 0;
struct sg_header *sg_hd;
/* safety checks */
if (!cmd_len) return -1; /* need a cmd_len != 0 */
if (!i_buff) return -1; /* need an input buffer != NULL */
#ifdef SG_BIG_BUFF
if (SCSI_OFF + cmd_len + in_size > SG_BIG_BUFF) return -1;
if (SCSI_OFF + out_size > SG_BIG_BUFF) return -1;
#else
if (SCSI_OFF + cmd_len + in_size > 4096) return -1;
if (SCSI_OFF + out_size > 4096) return -1;
#endif
if (!o_buff) out_size = 0;
/* generic scsi device header construction */
sg_hd = (struct sg_header *) i_buff;
sg_hd->reply_len = SCSI_OFF + out_size;
sg_hd->twelve_byte = cmd_len == 12;
sg_hd->result = 0;
#if 0
sg_hd->pack_len = SCSI_OFF + cmd_len + in_size; /* not necessary */
sg_hd->pack_id; /* not used */
sg_hd->other_flags; /* not used */
#endif
/* send command */
status = write( fd, i_buff, SCSI_OFF + cmd_len + in_size );
if ( status < 0 || status != SCSI_OFF + cmd_len + in_size ||
sg_hd->result ) {
/* some error happened */
fprintf( stderr, "write(generic) result = 0x%x cmd = 0x%x\n",
sg_hd->result, i_buff[SCSI_OFF] );
perror("");
return status;
}
if (!o_buff) o_buff = i_buff; /* buffer pointer check */
/* retrieve result */
status = read( fd, o_buff, SCSI_OFF + out_size);
if ( status < 0 || status != SCSI_OFF + out_size || sg_hd->result ) {
/* some error happened */
fprintf( stderr, "read(generic) result = 0x%x cmd = 0x%x\n",
sg_hd->result, o_buff[SCSI_OFF] );
fprintf( stderr, "read(generic) sense "
"%x %x %x %x %x %x %x %x %x %x %x %x %x %x %x %x\n",
sg_hd->sense_buffer[0], sg_hd->sense_buffer[1],
sg_hd->sense_buffer[2], sg_hd->sense_buffer[3],
sg_hd->sense_buffer[4], sg_hd->sense_buffer[5],
sg_hd->sense_buffer[6], sg_hd->sense_buffer[7],
sg_hd->sense_buffer[8], sg_hd->sense_buffer[9],
sg_hd->sense_buffer[10], sg_hd->sense_buffer[11],
sg_hd->sense_buffer[12], sg_hd->sense_buffer[13],
sg_hd->sense_buffer[14], sg_hd->sense_buffer[15]);
if (status < 0)
perror("");
}
/* Look if we got what we expected to get */
if (status == SCSI_OFF + out_size) status = 0; /* got them all */
return status; /* 0 means no error */
}
#define INQUIRY_CMD 0x12
#define INQUIRY_CMDLEN 6
#define INQUIRY_REPLY_LEN 96
#define INQUIRY_VENDOR 8 /* Offset in reply data to vendor name */
/* request vendor brand and model */
static unsigned char *Inquiry ( void )
{
unsigned char Inqbuffer[ SCSI_OFF + INQUIRY_REPLY_LEN ];
unsigned char cmdblk [ INQUIRY_CMDLEN ] =
{ INQUIRY_CMD, /* command */
0, /* lun/reserved */
0, /* page code */
0, /* reserved */
INQUIRY_REPLY_LEN, /* allocation length */
0 };/* reserved/flag/link */
memcpy( cmd + SCSI_OFF, cmdblk, sizeof(cmdblk) );
/*
* +------------------+
* | struct sg_header | <- cmd
* +------------------+
* | copy of cmdblk | <- cmd + SCSI_OFF
* +------------------+
*/
if (handle_scsi_cmd(sizeof(cmdblk), 0, cmd,
sizeof(Inqbuffer) - SCSI_OFF, Inqbuffer )) {
fprintf( stderr, "Inquiry failed\n" );
exit(2);
}
return (Inqbuffer + SCSI_OFF);
}
void main( void )
{
fd = open(DEVICE, O_RDWR);
if (fd < 0) {
fprintf( stderr, "Need read/write permissions for "DEVICE".\n" );
exit(1);
}
/* print some fields of the Inquiry result */
printf( "||%s||", Inquiry() + INQUIRY_VENDOR );
}

pass structure to kernel local memory

I have problem with passing structure to kernel local memory. Here is the kernel kode:
typedef struct data {
unsigned long wId; // group_id
unsigned long iId[1]; // global_item_id
} DATA;
__kernel void tKernel(__global DATA *x, __local DATA tmp) {
int wd = get_work_dim();
// x dimension
int xGrId = get_group_id(0);
int xLId = get_local_id(0);
int xGlId = get_global_id(0);
x += xGrId;
x->wId = tmp.wId;
x->iId[xLId] = ++tmp.wId;
}
Here is the host code:
#include <stdio.h>
#include <stdlib.h>
#include <assert.h>
#ifdef __APPLE__
#include <OpenCL/opencl.h>
#else
#include <CL/cl.h>
#endif
#define GLOBAL_ITEM_SIZE (1)
#define LOCAL_ITEM_SIZE (1)
#define MAX_SOURCE_SIZE (0x100000)
typedef struct data {
unsigned long wId;
unsigned long iId[LOCAL_ITEM_SIZE];
} DATA;
int main() {
cl_platform_id platform_id = NULL;
cl_device_id device_id = NULL;
cl_context context = NULL;
cl_command_queue commandQueue = NULL;
cl_mem cmPinnedBufOut = NULL;
DATA *cDataOut = NULL;
cl_program program = NULL;
cl_kernel kernel = NULL;
cl_uint ret_num_devices;
cl_uint ret_num_platforms;
cl_int ret;
size_t group_size = GLOBAL_ITEM_SIZE / LOCAL_ITEM_SIZE;
FILE *fp;
const char fileName[] = "./kernel.cl";
size_t source_size;
char *source_str;
/* Load kernel source file */
fp = fopen(fileName, "r");
if (!fp) {
fprintf(stderr, "Failed to load kernel.\n");
exit(EXIT_FAILURE);
}
source_str = (char *)malloc(MAX_SOURCE_SIZE);
source_size = fread(source_str, 1, MAX_SOURCE_SIZE, fp);
fclose(fp);
/* Create OpenCL Context */
context = clCreateContext( NULL, 1, &device_id, NULL, NULL, &ret);
/* Create command queue with measurment of preformance */
commandQueue = clCreateCommandQueue(context, device_id, 0, &ret);
/* Create memory object */
cmPinnedBufOut = clCreateBuffer(context, CL_MEM_WRITE_ONLY | CL_MEM_ALLOC_HOST_PTR, group_size * sizeof(DATA), NULL, &ret);
cDataOut = (DATA *)malloc(group_size * sizeof(DATA));
/* Create kernel program from source file */
program = clCreateProgramWithSource(context, 1, (const char **)&source_str, (const size_t *)&source_size, &ret);
assert(ret == CL_SUCCESS);
ret = clBuildProgram(program, 1, &device_id, NULL, NULL, NULL);
if (ret != CL_SUCCESS) {
printf("\nFail to build the program\n");
char buffer[10240];
clGetProgramBuildInfo(program, device_id, CL_PROGRAM_BUILD_LOG, sizeof(buffer), buffer, NULL);
printf("%s\n", buffer);
exit(EXIT_FAILURE);
}
/* Create data parallel OpenCL kernel */
kernel = clCreateKernel(program, "tKernel", &ret);
assert(ret == CL_SUCCESS);
/* Set OpenCL kernel arguments */
ret = clSetKernelArg(kernel, 0, sizeof(cl_mem), (void *)&cmPinnedBufOut);
assert(ret == CL_SUCCESS);
DATA tmp;
tmp.wId = 66;
ret = clSetKernelArg(kernel, 1, sizeof(DATA), &tmp);
assert(ret == CL_SUCCESS);
size_t global_item_size = GLOBAL_ITEM_SIZE;
size_t local_item_size = LOCAL_ITEM_SIZE;
/* Execute OpenCL kernel as data parallel */
ret = clEnqueueNDRangeKernel(commandQueue, kernel, 1, NULL, &global_item_size, &local_item_size, 0, NULL, NULL);
if (ret == CL_INVALID_WORK_GROUP_SIZE) {
printf("Invalid work group size: error when compute group size: %lu/%lu", global_item_size, local_item_size);
exit(EXIT_FAILURE);
}
assert(ret == CL_SUCCESS);
/* Transfer result to host */
ret = clEnqueueReadBuffer(commandQueue, cmPinnedBufOut, CL_TRUE, 0, group_size * sizeof(DATA), cDataOut, 0, NULL, NULL);
assert(ret == CL_SUCCESS);
/* Display Results */
for (int i = 0; i < group_size; i++) {
printf("%d: -> group_id %lu ~> work_item_ids: ", i, cDataOut[i].wId);
for (int j = 0; j < LOCAL_ITEM_SIZE; j++)
printf("%2lu, ", cDataOut[i].iId[j]);
printf("\n");
}
printf("\n");
/* Finalization */
ret = clFlush(commandQueue);
ret = clFinish(commandQueue); // blockink function, wait until all queue cmd are finished
ret = clReleaseKernel(kernel);
ret = clReleaseProgram(program);
ret = clReleaseCommandQueue(commandQueue);
ret = clReleaseContext(context);
free(source_str);
return 0;
}
So I expected as result 0: -> group_id 66 ~> work_item_ids: 67,
But I get 0: -> group_id 0 ~> work_item_ids: 1,
From this I conclude that the hh structure with the number 66 was not reading correctly
by the kernel. I try to put this same way on integer number and this works perfectly.
So my question is, am I doing something wrong, or there isn't way to copy data structure from host to device local memory, or is there another way to doing this?

The clSetKernelArg for __local buffers only specifies the size, and the pointer must be 0. See OpenCL spec 5.7.2. There is no way you can initialize local memory from the host.

Develop Reference

ios ruby-on-rails asp.net-mvc docker delphi jenkins grails google-sheets machine-learning dart

How to discover physical address corresponding to PCIe device memory? - memory

Related

Windows DPDK L2fwd- Receiving packets out of sequence

cudaMalloc and cudaMemcpy not working on kernel call

Checking code integrity in iOS

obtaining scsi(including SAS and FC) hardisk model and serial number

pass structure to kernel local memory

Categories

Resources