Fragment function seems properly written but Metal complains - metal

TL;DR: Metal doesn't seem to detect what my vertex shader returns
I have these two functions written in MSL :
vertex float4 base_image_rect(constant float4 *pos [[buffer(0)]],
uint vid [[vertex_id]]) {
return pos[vid];
}
fragment float4 fragment_image_display(float4 vPos [[stage_in]],
texture2d<float, access::sample> imageToRender [[texture(0)]],
sampler imageSampler [[sampler(0)]]) {
return imageToRender.sample(imageSampler, float2(vPos.x, vPos.y));
}
When I try to create my render pipeline state with those, using this code:
// Make image display render pipeline state
let imageDisplayStateDescriptor = MTLRenderPipelineDescriptor()
imageDisplayStateDescriptor.colorAttachments[0].pixelFormat = view.colorPixelFormat
imageDisplayStateDescriptor.vertexFunction = library.makeFunction(name: "base_image_rect")
imageDisplayStateDescriptor.fragmentFunction = library.makeFunction(name: "fragment_image_display")
displayImagePipelineState = try! device.makeRenderPipelineState(descriptor: imageDisplayStateDescriptor)
There is an error at the creation of the pipeline state:
fatal error: 'try!' expression unexpectedly raised an error: Error
Domain=CompilerError Code=1 "Link failed: fragment input vPos was not
found in vertex shader outputs" [...]
I checked and rechecked the code and can't understand what's wrong.
Any ideas? Thank you!

Try replacing stage_in with position. I think that stage_in is mostly used with structs where each field is either annotated with a specific attribute qualifier or matched by name. Apparently, when it's used with a non-struct type, it's trying to match by name. For example, if your vertex function were to output a struct one of whose fields was vPos, that would find it.

Related

How to access Tier 1 Argument Buffer struct without indexing

According to this example from Apple, Tier 1 Argument Buffers cannot be accessed through pointer indexing (https://developer.apple.com/documentation/metal/buffers/about_argument_buffers). If this is not allowed, how can I index into a particular struct in my argument buffer array?
// Shader.metal
struct MyTexture {
metal::texture2d<float, metal::access::sample> texture;
};
fragment half4 myFragment(VertexOut vert [[stage_in]],
....,
constant int &count [[buffer(4)]],
constant MyTexture *textures [[buffer(5)]],
....)
{
for(int i = 0; i < count; i++) {
MyTexture resource = textures[i];
float4 color = resource.texture.sample(sampler, pos.xy);
outputColor = mix(inputColor, color, 0.5); // <-- Causes error
}
}
The error that I get is from creating the MTLRenderPipelineState with this error message:
Inlining all functions due to use of indirect argument bufferbuffer(5): Argument buffer accessed with non-zero array index.
Short answer: you can't.
The reason you can't is because tier 1 hardware can only emulate argument buffers using regular bind points. With tier2 you can bind any number of textures there, so the driver can't know at bind time how many slots it will need to use, and the hardware itself can't do a dependent read for other GPU objects, like textures and samplers.
A workaround for Tier 1 would be to pass a pointer to the instance inside the Argument Buffer, rather than the entire buffer.
Example, look at the use of Material.
// Argument-buffered resource
struct Material {
metal::sampler sampler [[id(AB_MaterialSampler)]];
metal::texture2d<float> base_color_texture [[id(AB_MaterialBaseColorTexture)]];
metal::texture2d<float> normal_map [[id(AB_MaterialNormalMap)]];
metal::texture2d<float> ao_metallic_roughness_map [[id(AB_MaterialAoMetallicRoughnessMap)]];
float3 base_color_factor [[id(AB_MaterialBaseColorFactor)]];
float metallic_factor [[id(AB_MaterialMetallicFactor)]];
float roughness_factor [[id(AB_MaterialRoughnessFactor)]];
};
// GPU-driven rendering kernel
kernel void icb_frame_kernel(device IcbContainer& icb_container [[buffer(KB_IcbContainer)]],
constant VertexUniforms* vertex_uniforms [[buffer(KB_VertexUniforms)]],
constant FragmentUniforms* fragment_uniforms [[buffer(KB_FragmentUniforms)]],
device Mesh* meshes [[buffer(KB_Meshes)]],
constant Transform* transforms [[buffer(KB_Transforms)]],
device Material* materials [[buffer(KB_Materials)]],
constant ShadowMap* shadow_map [[buffer(KB_ShadowMap)]],
constant Ibl* ibl [[buffer(KB_Ibl)]],
constant Cubemap* cubemap [[buffer(KB_Cubemap)]],
device MTLIndirectCommandBufferExecutionRange& range [[buffer(KB_ExecutionRange)]],
const uint instance_id [[thread_position_in_grid]]) {
device auto& mesh = meshes[instance_id];
device auto* range_length = reinterpret_cast<device atomic_uint*>(&range.length);
const auto index = atomic_fetch_add_explicit(range_length, 1, memory_order_relaxed);
render_command cmd(icb_container.icb, index);
cmd.set_render_pipeline_state(mesh.pipeline_state);
cmd.set_vertex_buffer(mesh.vertex_buffer, VB_Vertices);
cmd.set_vertex_buffer(vertex_uniforms, VB_VertexUniforms);
cmd.set_vertex_buffer(transforms, VB_Transforms);
cmd.set_fragment_buffer(fragment_uniforms, FB_FragmentUniforms);
cmd.set_fragment_buffer(transforms, FB_Transforms);
// Tier 1: use indexed access and pass pointer to instance
cmd.set_fragment_buffer(&materials[instance_id], FB_Material);
// Tier 2: pass entire buffer and use indexed access in fragment shader
cmd.set_fragment_buffer(materials, FB_Material);
cmd.set_fragment_buffer(shadow_map, FB_ShadowMap);
cmd.set_fragment_buffer(ibl, FB_Ibl);
cmd.set_fragment_buffer(cubemap, FB_Cubemap);
if (mesh.is_uint16_index){
constant auto* index_buffer = static_cast<constant ushort*>(mesh.index_buffer);
cmd.draw_indexed_primitives(primitive_type::triangle, mesh.index_count, index_buffer, 1, 0, instance_id);
} else {
constant auto* index_buffer = static_cast<constant uint*>(mesh.index_buffer);
cmd.draw_indexed_primitives(primitive_type::triangle, mesh.index_count, index_buffer, 1, 0, instance_id);
}
}
// Tier 1
fragment half4 pbr_fragment(ProjectedVertex vert [[stage_in]],
constant FragmentUniforms& uniforms [[buffer(FB_FragmentUniforms)]],
constant Material& material [[buffer(FB_Material)]],
constant Ibl& ibl [[buffer(FB_Ibl), function_constant(HAS_IBL)]],
constant ShadowMap& shadow_map [[buffer(FB_ShadowMap), function_constant(HAS_SHADOW_MAP)]]
) {
// Use Material
}
// Tier 2
fragment half4 pbr_fragment(ProjectedVertex vert [[stage_in]],
constant FragmentUniforms& uniforms [[buffer(FB_FragmentUniforms)]],
device Material* materials [[buffer(FB_Material)]],
constant Ibl& ibl [[buffer(FB_Ibl), function_constant(HAS_IBL)]],
constant ShadowMap& shadow_map [[buffer(FB_ShadowMap), function_constant(HAS_SHADOW_MAP)]]
) {
// Use indexed Material
const auto& material = materials[vert.instance_id];
}
I did not have time to edit the example for brevity, but it should be clear enough.
Side note: the Metal spec recommends to use device address space whenever you use pointer arithmetic (indexed access). See page 61 of the spec.

Writing to an MTLTexture causes a fatal error

Given an MTLTexture, defined as follows.
// Create device.
id<MTLDevice> dev = MTLCreateDefaultSystemDevice();
// Size of texture.
const unsigned int W = 640;
const unsigned int H = 480;
// Define texture.
MTLTextureDescriptor *desc = [[MTLTextureDescriptor alloc] init];
desc.pixelFormat = MTLPixelFormatBGRA8Unorm;
desc.width = W;
desc.height = H;
// Create texture.
id<MTLTexture> tex = [device newTextureWithDescriptor:desc];
It is my understanding that at this point I should have a texture as defined in desc allocated on device dev and accessible through tex.
Now, given another texture tex2 (known to be allocated and accessible) and a Metal compute kernel defined as follows.
kernel void foo(texture2d<float, access::read> in [[texture(0)]],
texture2d<float, access::write> out [[texture(1)]],
uint2 pix [[thread_position_in_grid]]) {
// Out of bounds check.
if (pix.x >= out.get_width() || pix.y >= out.get_height()) {
return;
}
// Do some processing on the input texture.
// ... All fine up to here.
// Write out a pixel to the output buffer.
const float4 p = abc; // abc is computed above.
out.write(p, pix);
}
It is my understanding that when the pixel p is written out to out, the values of p will be converted to conform to the pixel format of tex, in this case MTLPixelFormatBGRA8Unorm.
However, when launching the kernel as follows, the line in which p is written to out (above defined as tex) triggers a critical error (SIGABRT).
// Create a Metal library.
id<MTLLibrary> lib = [dev newDefaultLibrary];
// Load the kernel.
id<MTLFunction> kernel = [lib newFunctionWithName:#"foo"];
// Create a pipeline state.
id<MTLComputePipelineState> pipelineState = [dev newComputePipelineStateWithFunction:kernel error:NULL];
// Create a command queue.
id<MTLCommandQueue> cmdQueue = [dev newCommandQueue];
// Create command buffer.
id<MTLCommandBuffer> cmdBuff = [cmdQueue commandBuffer];
// Create compute encoder.
id<MTLComputeCommandEncoder> enc = [cmdBuff computeCommandEncoder];
// Set the pipeline state.
[enc setComputePipelineState:pipelineState];
// Set the input textures (tex2 is read only in the kernel, as above).
[enc setTexture:tex2 atIndex:0];
[enc setTexture:tex atIndex:1];
// 2D launch configuration.
const MTLSize groupDim = MTLSizeMake(16, 16, 1);
const MTLSize gridDim = MTLSizeMake((int)ceil((float)(W / (float)groupDim.width)),
(int)ceil((float)(H / (float)groupDim.height)),
1);
// Launch kernel.
[enc dispatchThreadgroups:gridDim threadsPerThreadgroup:groupDim];
[enc endEncoding];
[enc commit];
[cmdBuff waitUntilCompleted];
My question is that under the scenario outlined above, is my understanding of how one allocates a MTLTexture correct? Or, is the example above merely defining a wrapper around some texture that I need to separately allocate?
The above texture allocation and compute kernel launch are correct. Upon further digging in the documentation, the part that was missing was the usage property of MTLTextureDescriptor. In the documentation, the following is stated.
The default value for this property is MTLTextureUsageShaderRead.
As such, in the example given in the question, the following additional property assignment on MTLTextureDescriptor is required.
desc.usage = MTLTextureUsageShaderWrite;

Error when using Metal Indirect Command Buffer: "Fragment shader cannot be used with indirect command buffers"

I’m working on a Metal, MTKView based app that takes advantage of the A11 TBDR architecture to do deferred shading in a single render pass. I used Apple’s Deferred Lighting sample code as reference, and it works great.
I’d like to try changing the geometry buffer pass to be GPU-driven, using the Indirect Command Buffer feature of Metal 2 on A11 hardware.
I’ve been using Apple’s Encoding Indirect Command Buffers on the GPU sample code as my main point of reference for this. I’m able to run this sample on my iPhone XR (although, probably off-topic, the scrolling is not smooth, it judders).
I’m running into difficulties however with my own code, when I try to move my geometry buffer pass into an indirect command buffer. When I set supportIndirectCommandBuffers to true on the MTLRenderPipelineDescriptor of the Geometry Buffer pipeline, device.makeRenderPipelineState fails with the error
AGXMetalA12 Code=3 "Fragment shader cannot be used with indirect command buffers"
I’ve not been able to find any information in the documentation on this error. I’m wondering, are there certain kinds of fragment operation that are not allowed in indirect pipelines, or some kind of limit to GPU-driven drawing that I've overlooked (the number of color attachments perhaps)?
SharedTypes.h
Header shared by Metal and Swift
#ifndef SharedTypes_h
#define SharedTypes_h
#ifdef __METAL_VERSION__
#define NS_CLOSED_ENUM(_type, _name) enum _name : _type _name; enum _name : _type
#define NSInteger metal::int32_t
#else
#import <Foundation/Foundation.h>
#endif
#include <simd/simd.h>
typedef struct {
uint32_t meshId;
matrix_float3x3 normalViewMatrix;
matrix_float4x4 modelMatrix;
matrix_float4x4 shadowMVPTransformMatrix;
} InstanceData;
typedef struct {
vector_float3 cameraPosition;
float voxelScale;
float blockScale;
vector_float3 lightDirection;
matrix_float4x4 viewMatrix;
matrix_float4x4 projectionMatrix;
matrix_float4x4 projectionMatrixInverse;
matrix_float4x4 shadowViewProjectionMatrix;
} VoxelUniforms;
typedef NS_CLOSED_ENUM(NSInteger, BufferIndex)
{
BufferIndexInstances = 0,
BufferIndexVertices = 1,
BufferIndexIndices = 2,
BufferIndexVoxelUniforms = 3,
};
typedef NS_CLOSED_ENUM(NSInteger, RenderTarget)
{
RenderTargetLighting = 0,
RenderTargetNormal_shadow = 1,
RenderTargetVoxelIndex = 2,
RenderTargetDepth = 3,
};
#endif /* SharedTypes_h */
GBuffer shader
#include <metal_stdlib>
using namespace metal;
#include "../SharedTypes.h"
struct VertexIn {
packed_half3 position;
packed_half3 texCoord3D;
half ambientOcclusion;
uchar normalIndex;
};
struct VertexInOut {
float4 position [[ position ]];
half3 worldPos;
half3 eyeNormal;
half3 localPosition;
half3 localNormal;
float eyeDepth;
float3 shadowCoord;
half3 texCoord3D;
};
vertex VertexInOut gBufferVertex(device InstanceData* instances [[ buffer( BufferIndexInstances ) ]],
device VertexIn* vertices [[ buffer( BufferIndexVertices ) ]],
constant VoxelUniforms &uniforms [[ buffer( BufferIndexVoxelUniforms ) ]],
uint vid [[ vertex_id ]],
ushort iid [[ instance_id ]])
{
InstanceData instance = instances[iid];
VertexIn vert = vertices[vid];
VertexInOut out;
float4 position = float4(float3(vert.position), 1);
float4 worldPos = instance.modelMatrix * position;
float4 eyePosition = uniforms.viewMatrix * worldPos;
out.position = uniforms.projectionMatrix * eyePosition;
out.worldPos = half3(worldPos.xyz);
out.eyeDepth = eyePosition.z;
half3 normal = normals[vert.normalIndex];
out.eyeNormal = half3(instance.normalViewMatrix * float3(normal));
out.shadowCoord = (instance.shadowMVPTransformMatrix * position).xyz;
out.localPosition = half3(vert.position);
out.localNormal = normal;
out.texCoord3D = half3(vert.texCoord3D);
return out;
}
fragment GBufferData gBufferFragment(VertexInOut in [[ stage_in ]],
constant VoxelUniforms &uniforms [[ buffer( BufferIndexVoxelUniforms ) ]],
texture3d<ushort, access::sample> voxelMap [[ texture(0) ]],
depth2d<float> shadowMap [[ texture(1) ]],
texture3d<half, access::sample> fogOfWarMap [[ texture(2) ]]
) {
// voxel index
half3 center = round(in.texCoord3D);
uchar voxIndex = voxelMap.read(ushort3(center)).r - 1;
// ambient occlusion
half3 neighborPos = center + in.localNormal;
half3 absNormal = abs(in.localNormal);
half2 texCoord2D = tc2d(in.localPosition / uniforms.voxelScale, absNormal);
half ao = getAO(voxelMap, neighborPos, absNormal.yzx, absNormal.zxy, texCoord2D);
// shadow
constexpr sampler shadowSampler(coord::normalized,
filter::linear,
mip_filter::none,
address::clamp_to_edge,
compare_func::less);
float shadow_sample = ambientLightingLevel;
for (short i = 0; i < shadowSampleCount; i++){
shadow_sample += shadowMap.sample_compare(shadowSampler, in.shadowCoord.xy + poissonDisk[i] * 0.002, in.shadowCoord.z - 0.0018) * shadowContributionPerSample;
}
shadow_sample = min(1.0, shadow_sample);
//fog-of-war
half fogOfWarSample = fogOfWarMap.sample(fogOfWarSampler, (float3(in.worldPos) / uniforms.blockScale) + float3(0.5, 0.4, 0.5)).r;
half notVisible = max(fogOfWarSample, 0.5h);
// output
GBufferData out;
out.normal_shadow = half4(in.eyeNormal, ao * half(shadow_sample) * notVisible);
out.voxelIndex = voxIndex;
out.depth = in.eyeDepth;
return out;
};
Pipeline setup
extension RenderTarget {
var pixelFormat: MTLPixelFormat {
switch self {
case .lighting: return .bgra8Unorm
case .normal_shadow: return .rgba8Snorm
case .voxelIndex: return .r8Uint
case .depth: return .r32Float
}
}
static var allCases: [RenderTarget] = [.lighting, .normal_shadow, .voxelIndex, .depth]
}
public final class GBufferRenderer {
private let renderPipelineState: MTLRenderPipelineState
weak var shadowMap: MTLTexture?
public init(depthPixelFormat: MTLPixelFormat, colorPixelFormat: MTLPixelFormat, sampleCount: Int = 1) throws {
let library = try LibraryMonad.getLibrary()
let device = library.device
let descriptor = MTLRenderPipelineDescriptor()
descriptor.vertexFunction = library.makeFunction(name: "gBufferVertex")!
descriptor.fragmentFunction = library.makeFunction(name: "gBufferFragment")!
descriptor.depthAttachmentPixelFormat = depthPixelFormat
descriptor.stencilAttachmentPixelFormat = depthPixelFormat
descriptor.sampleCount = sampleCount
for target in RenderTarget.allCases {
descriptor.colorAttachments[target.rawValue].pixelFormat = target.pixelFormat
}
// uncomment below to trigger throw
// descriptor.supportIndirectCommandBuffers = true
renderPipelineState = try device.makeRenderPipelineState(descriptor: descriptor) // throws "Fragment shader cannot be used with indirect command buffers"
}
public convenience init(mtkView: MTKView) throws {
try self.init(depthPixelFormat: mtkView.depthStencilPixelFormat, colorPixelFormat: mtkView.colorPixelFormat, sampleCount: mtkView.sampleCount)
}
}
The above works great when triggering draws from the CPU in the usual way, but when setting supportIndirectCommandBuffers in preparation for GPU drawing it throws the error.
I've tried stripping down the fragment shader to just return constant values for the GBuffers, and then makeRenderPipelineState succeeds, but when I add texture sampling back in it begins complaining again. I can't seem to pin down what exactly it doesn't like about the frag shader.
Looking through the code and through Metal documentation and Metal Shading Language specification, I think I know why you get this error.
If you look through render_command interface that is present in metal_command_buffer header in Metal, you'll find that to pass parameters to indirect render commands, you only have these functions: set_vertex_buffer and set_fragment_buffer, there is no set_vertex_texture or set_vertex_sampler like you have in MTLRenderCommandEncoder.
But, since your pipeline uses shader that in turn uses textures as arguments and you indicate by using supportIndirectCommandBuffers that you would like to use this pipeline in indirect commands, Metal has no choice but to fail pipeline creation.
Instead if you want to pass textures or samplers to indirect render commands, you should use argument buffers, that you will pass to the shader that issues indirect render commands, which in turn will bind them using set_vertex_buffer and set_fragment_buffer for each render_command.
Specification: Metal Shading Language Specification (Section 5.16)

Shader reflection : variable name?

How (if at all possible) could i get the name of variables / structure members in a shader from reflection?
I'm talking about raw hlsl shaders (no effet Framework / no D3DX, just raw directX).
I'm using SharpDX and found out how to get most of the information i need from the shader signature
new SharpDX.D3DCompiler.ShaderReflection(MyShaderByteCode);
I can get most of the information i need, but while i can retrieve the sementic name (TEXCOORD, POSITION etc) i can't retrieve the actual name of the element (there's no "name" property).
Am i going about this the wrong way? Is this even possible at all?
struct Vertex
{
float4 Position : POSITION;
float2 UVPosition : TEXCOORD;
};
struct Pixel
{
float4 Position : SV_POSITION;
float2 UVPosition : TEXCOORD;
};
float4x4 worldViewProj;
Texture2D<float4> diffuse : register(t0);
Texture2D<float4> height : register(t1);
Texture2D<float4> lightmap : register(t2);
SamplerState pictureSampler;
Pixel PerVertex(Vertex input)
{
Pixel output = (Pixel) 0;
input.Position.z += height.SampleLevel(pictureSampler, input.UVPosition, 0).r / 2;
output.Position = mul(input.Position, worldViewProj);
output.UVPosition = input.UVPosition;
return output;
}
float4 PerPixel(Pixel input) : SV_Target
{
return diffuse.Sample(pictureSampler, input.UVPosition) * lightmap.Sample(pictureSampler, input.UVPosition);
}
What i can retrieve is "POSITION" and "TEXCOORD", what i want to retrieve is "Position" and "UVPosition"
You need to iterate trough constant buffers.
Please note that is a Constant Buffer is not used, it will be stripped off the bytecode.
Attached code that iterates though all variables:
SharpDX.D3DCompiler.ShaderReflection sr = new SharpDX.D3DCompiler.ShaderReflection(mybytecode);
for (int i = 0; i < sr.Description.ConstantBuffers; i++)
{
SharpDX.D3DCompiler.ConstantBuffer cb = sr.GetConstantBuffer(i);
for (int j = 0; j < cb.Description.VariableCount; j++)
{
SharpDX.D3DCompiler.ShaderReflectionVariable variable = cb.GetVariable(j);
Console.WriteLine(variable.Description.Name);
}
}
I don't think that is possible.
What you can do alternatively, is to explicitly assign semantic indices (in addition to semantic names):
struct Vertex
{
float4 Position : POSITION0;
float2 UVPosition : TEXCOORD0;
};
and use them to uniquely identify your inputs on CPU side by reading SemanticName and SemanticIndex fields of D3D11_SIGNATURE_PARAMETER_DESC.
Another way that comes in mind is to parse source HLSL file directly (and so rolling out your own reflection engine).
Hope it helps!

Reading output from geometry shader on CPU

I'm trying to read the output from a geometry shader which is using stream-output to output to a buffer.
The output buffer used by the geometry shader is described like this:
D3D10_BUFFER_DESC vbdesc =
{
numPoints * sizeof( MESH_VERTEX ),
D3D10_USAGE_DEFAULT,
D3D10_BIND_VERTEX_BUFFER | D3D10_BIND_STREAM_OUTPUT,
0,
0
};
V_RETURN( pd3dDevice->CreateBuffer( &vbdesc, NULL, &g_pDrawFrom ) );
The geometry shader creates a number of triangles based on a single point (at max 12 triangles per point), and if I understand the SDK correctly I have to create a staging resource in order to read the output from the geometry shader on the CPU.
I have declared another buffer resource (this time setting the STAGING flag) like this:
D3D10_BUFFER_DESC sbdesc =
{
(numPoints * (12*3)) * sizeof( VERTEX_STREAMOUT ),
D3D10_USAGE_STAGING,
NULL,
D3D10_CPU_ACCESS_READ,
0
};
V_RETURN( pd3dDevice->CreateBuffer( &sbdesc, NULL, &g_pStaging ) );
After the first draw call of the application the geometry shader is done creating all triangles and can be drawn. However, after this first draw call I would like to be able to read the vertices output by the geometry shader.
Using the buffer staging resource I'm trying to do it like this (right after the first draw call):
pd3dDevice->CopyResource(g_pStaging, g_pDrawFrom]);
pd3dDevice->Flush();
void *ptr = 0;
HRESULT hr = g_pStaging->Map( D3D10_MAP_READ, NULL, &ptr );
if( FAILED( hr ) )
return hr;
VERTEX_STREAMOUT *mv = (VERTEX_STREAMOUT*)ptr;
g_pStaging->Unmap();
This compiles and doesn't give any errors at runtime. However, I don't seem to be getting any output.
The geometry shader outputs the following:
struct VSSceneStreamOut
{
float4 Pos : POS;
float3 Norm : NORM;
float2 Tex : TEX;
};
and the VERTEX_STREAMOUT is declared like this:
struct VERTEX_STREAMOUT
{
D3DXVECTOR4 Pos;
D3DXVECTOR3 Norm;
D3DXVECTOR2 Tex;
};
Am I missing something here?
Problem solved by creating the staging resource buffer like this:
D3D10_BUFFER_DESC sbdesc;
ZeroMemory( &sbdesc, sizeof(sbdesc) );
g_pDrawFrom->GetDesc( &sbdesc );
sbdesc.CPUAccessFlags = D3D10_CPU_ACCESS_READ;
sbdesc.Usage = D3D10_USAGE_STAGING;
sbdesc.BindFlags = 0;
sbdesc.MiscFlags = 0;
V_RETURN( pd3dDevice->CreateBuffer( &sbdesc, NULL, &g_pStaging ) );
Problem was with the ByteWidth.

Resources