Must render buffer texture dimensions be power-of-two? - webgl

Does the texture that we use for WebGL render buffer storage need to have dimensions that are power-of-two?
Background info
I'm chasing a FRAMEBUFFER_INCOMPLETE_ATTACHMENT reported by a client on this setup:
Windows 7 Enterprise 32-Bit
Firefox Version: 33
Video Card: Intel Q45/Q43 Express Chipset Driver Version 8.13.10.2413
and so far I'm at a loss as to why it's happening, so guessing it might be something to do with NPOT textures.
Here's my render buffer implementation, which does not have power-of-two-texture yet:
SceneJS._webgl.RenderBuffer = function (cfg) {
/**
* True as soon as this buffer is allocated and ready to go
* #type {boolean}
*/
this.allocated = false;
this.canvas = cfg.canvas;
this.gl = cfg.canvas.gl;
this.buf = null;
this.bound = false;
};
/**
* Called after WebGL context is restored.
*/
SceneJS._webgl.RenderBuffer.prototype.webglRestored = function (_gl) {
this.gl = _gl;
this.buf = null;
};
/**
* Binds this buffer
*/
SceneJS._webgl.RenderBuffer.prototype.bind = function () {
this._touch();
if (this.bound) {
return;
}
this.gl.bindFramebuffer(this.gl.FRAMEBUFFER, this.buf.framebuf);
this.bound = true;
};
SceneJS._webgl.RenderBuffer.prototype._touch = function () {
var width = this.canvas.canvas.width;
var height = this.canvas.canvas.height;
if (this.buf) { // Currently have a buffer
if (this.buf.width == width && this.buf.height == height) { // Canvas size unchanged, buffer still good
return;
} else { // Buffer needs reallocation for new canvas size
this.gl.deleteTexture(this.buf.texture);
this.gl.deleteFramebuffer(this.buf.framebuf);
this.gl.deleteRenderbuffer(this.buf.renderbuf);
}
}
this.buf = {
framebuf: this.gl.createFramebuffer(),
renderbuf: this.gl.createRenderbuffer(),
texture: this.gl.createTexture(),
width: width,
height: height
};
this.gl.bindFramebuffer(this.gl.FRAMEBUFFER, this.buf.framebuf);
this.gl.bindTexture(this.gl.TEXTURE_2D, this.buf.texture);
this.gl.texParameteri(this.gl.TEXTURE_2D, this.gl.TEXTURE_MAG_FILTER, this.gl.NEAREST);
this.gl.texParameteri(this.gl.TEXTURE_2D, this.gl.TEXTURE_MIN_FILTER, this.gl.NEAREST);
this.gl.texParameteri(this.gl.TEXTURE_2D, this.gl.TEXTURE_WRAP_S, this.gl.CLAMP_TO_EDGE);
this.gl.texParameteri(this.gl.TEXTURE_2D, this.gl.TEXTURE_WRAP_T, this.gl.CLAMP_TO_EDGE);
try {
// Do it the way the spec requires
this.gl.texImage2D(this.gl.TEXTURE_2D, 0, this.gl.RGBA, width, height, 0, this.gl.RGBA, this.gl.UNSIGNED_BYTE, null);
} catch (exception) {
// Workaround for what appears to be a Minefield bug.
var textureStorage = new WebGLUnsignedByteArray(width * height * 3);
this.gl.texImage2D(this.gl.TEXTURE_2D, 0, this.gl.RGBA, width, height, 0, this.gl.RGBA, this.gl.UNSIGNED_BYTE, textureStorage);
}
this.gl.bindRenderbuffer(this.gl.RENDERBUFFER, this.buf.renderbuf);
this.gl.renderbufferStorage(this.gl.RENDERBUFFER, this.gl.DEPTH_COMPONENT16, width, height);
this.gl.framebufferTexture2D(this.gl.FRAMEBUFFER, this.gl.COLOR_ATTACHMENT0, this.gl.TEXTURE_2D, this.buf.texture, 0);
this.gl.framebufferRenderbuffer(this.gl.FRAMEBUFFER, this.gl.DEPTH_ATTACHMENT, this.gl.RENDERBUFFER, this.buf.renderbuf);
this.gl.bindTexture(this.gl.TEXTURE_2D, null);
this.gl.bindRenderbuffer(this.gl.RENDERBUFFER, null);
this.gl.bindFramebuffer(this.gl.FRAMEBUFFER, null);
// Verify framebuffer is OK
this.gl.bindFramebuffer(this.gl.FRAMEBUFFER, this.buf.framebuf);
if (!this.gl.isFramebuffer(this.buf.framebuf)) {
throw SceneJS_error.fatalError(SceneJS.errors.ERROR, "Invalid framebuffer");
}
var status = this.gl.checkFramebufferStatus(this.gl.FRAMEBUFFER);
switch (status) {
case this.gl.FRAMEBUFFER_COMPLETE:
break;
case this.gl.FRAMEBUFFER_INCOMPLETE_ATTACHMENT:
throw SceneJS_error.fatalError(SceneJS.errors.ERROR, "Incomplete framebuffer: FRAMEBUFFER_INCOMPLETE_ATTACHMENT");
case this.gl.FRAMEBUFFER_INCOMPLETE_MISSING_ATTACHMENT:
throw SceneJS_error.fatalError(SceneJS.errors.ERROR, "Incomplete framebuffer: FRAMEBUFFER_INCOMPLETE_MISSING_ATTACHMENT");
case this.gl.FRAMEBUFFER_INCOMPLETE_DIMENSIONS:
throw SceneJS_error.fatalError(SceneJS.errors.ERROR, "Incomplete framebuffer: FRAMEBUFFER_INCOMPLETE_DIMENSIONS");
case this.gl.FRAMEBUFFER_UNSUPPORTED:
throw SceneJS_error.fatalError(SceneJS.errors.ERROR, "Incomplete framebuffer: FRAMEBUFFER_UNSUPPORTED");
default:
throw SceneJS_error.fatalError(SceneJS.errors.ERROR, "Incomplete framebuffer: " + status);
}
this.bound = false;
};
/**
* Clears this renderbuffer
*/
SceneJS._webgl.RenderBuffer.prototype.clear = function () {
if (!this.bound) {
throw "Render buffer not bound";
}
this.gl.clear(this.gl.COLOR_BUFFER_BIT | this.gl.DEPTH_BUFFER_BIT);
this.gl.disable(this.gl.BLEND);
};
/**
* Reads buffer pixel at given coordinates
*/
SceneJS._webgl.RenderBuffer.prototype.read = function (pickX, pickY) {
var x = pickX;
var y = this.canvas.canvas.height - pickY;
var pix = new Uint8Array(4);
this.gl.readPixels(x, y, 1, 1, this.gl.RGBA, this.gl.UNSIGNED_BYTE, pix);
return pix;
};
/**
* Unbinds this renderbuffer
*/
SceneJS._webgl.RenderBuffer.prototype.unbind = function () {
this.gl.bindFramebuffer(this.gl.FRAMEBUFFER, null);
this.bound = false;
};
/** Returns the texture
*/
SceneJS._webgl.RenderBuffer.prototype.getTexture = function () {
var self = this;
return {
bind: function (unit) {
if (self.buf && self.buf.texture) {
self.gl.activeTexture(self.gl["TEXTURE" + unit]);
self.gl.bindTexture(self.gl.TEXTURE_2D, self.buf.texture);
return true;
}
return false;
},
unbind: function (unit) {
if (self.buf && self.buf.texture) {
self.gl.activeTexture(self.gl["TEXTURE" + unit]);
self.gl.bindTexture(self.gl.TEXTURE_2D, null);
}
}
};
};
/** Destroys this buffer
*/
SceneJS._webgl.RenderBuffer.prototype.destroy = function () {
if (this.buf) {
this.gl.deleteTexture(this.buf.texture);
this.gl.deleteFramebuffer(this.buf.framebuf);
this.gl.deleteRenderbuffer(this.buf.renderbuf);
this.buf = null;
this.bound = false;
}
};

As far as I could find (I don't use WebGL), the WebGL spec delegates to the OpenGL ES 2.0 spec on these FBO related calls. RGBA with 8 bits per component is not a format that is supported as render target in ES 2.0. Many devices do support it (advertised with the OES_rgb8_rgba8 extension), but it is not part of the standard.
The texture you are using as COLOR_ATTACHMENT0 is RGBA with 8-bit components:
this.gl.texImage2D(this.gl.TEXTURE_2D, 0, this.gl.RGBA, width, height, 0,
this.gl.RGBA, this.gl.UNSIGNED_BYTE, textureStorage);
Try specifying this as RGB565, which is color renderable:
this.gl.texImage2D(this.gl.TEXTURE_2D, 0, this.gl.RGBA, width, height, 0,
this.gl.RGB, this.gl.UNSIGNED_SHORT_5_6_5, textureStorage);
If you do need an alpha component in the texture, RGBA4444 or RGB5_A1 are your only portable options:
this.gl.texImage2D(this.gl.TEXTURE_2D, 0, this.gl.RGBA, width, height, 0,
this.gl.RGBA, this.gl.UNSIGNED_SHORT_4_4_4_4, textureStorage);
this.gl.texImage2D(this.gl.TEXTURE_2D, 0, this.gl.RGBA, width, height, 0,
this.gl.RGBA, this.gl.UNSIGNED_SHORT_5_5_5_1, textureStorage);
The spec actually looks somewhat contradictory to me. Under "Differences Between WebGL and OpenGL ES 2.0", it says:
The following combinations of framebuffer object attachments, when all of the attachments are framebuffer attachment complete, non-zero, and have the same width and height, must result in the framebuffer being framebuffer complete:
COLOR_ATTACHMENT0 = RGBA/UNSIGNED_BYTE texture
Which at first sight suggests that RGBA/UNSIGNED_BYTE is supported. But that's under the condition "when all of the attachments are framebuffer attachment complete", and according to the ES 2.0 spec, attachments with this format are not attachment complete. And there is no override in the WebGL spec on what "attachment complete" means.

Related

is there is way to use custom model in ml5 yolo()?

since ml5 yolo() mention that
"this implementation is heavily derived from ModelDepot".
It didn't mention which pre-train model it is using or how can you use your own trained model
let video;
let yolo;
let status;
let objects = [];
function setup() {
createCanvas(320, 240);
video = createCapture(VIDEO);
video.size(320, 240);
// Create a YOLO method
yolo = ml5.YOLO(video, startDetecting);
// Hide the original video
video.hide();
status = select('#status');
}
function draw() {
image(video, 0, 0, width, height);
for (let i = 0; i < objects.length; i++) {
noStroke();
fill(0, 255, 0);
text(objects[i].className, objects[i].x * width, objects[i].y * height - 5);
noFill();
strokeWeight(4);
stroke(0, 255, 0);
rect(objects[i].x * width, objects[i].y * height, objects[i].w * width, objects[i].h * height);
}
}
function startDetecting() {
status.html('Model loaded!');
detect();
}
function detect() {
yolo.detect(function(err, results) {
objects = results;
detect();
});
}
I've been reading yolo documentation and I kinda found a way:
ml5-yolo-library -> Heavily derived from https://github.com/ModelDepot/tfjs-yolo-tiny (ModelDepot: modeldepot.io)
here -> https://modeldepot.io/mikeshi/tiny-yolo-in-javascript
says -> This model was created from taking the original Darknet Tiny YOLO cfg and weights, converting it into Keras via YAD2K, and then using tensorflowjs_converter to convert it into the Tensorflow.js format.
So, i'm trying to do the same as you right now. hope i'll find a way

what is the correct way to use gl.readPixels?

I want to get the pixel-data from a Three.js demo.
As far as I know, there are 2 way to proceed :
1) draw the webGl-canvas inside a 2D-canvas and use Context2D.getImageData like that :
var canvas = document.createElement("canvas");
var ctx = canvas.getContext("2d");
ctx.drawImage(renderer.domElement,0,0);
var data = ctx.getImageData(0,0,w,h).data;
2) use directly the context3D with readPixels, like that :
var ctx = renderer.domElement.getContext("webgl");
var data = new UInt8Array(w*h*4);
ctx.readPixels(0, 0, w,h, ctx.RGBA, ctx.UNSIGNED_BYTE, data);
These 2 way to proceed works and give the same results but the second one is almost 2 times slower than the one using context2d.getImageData.
Sounds very weird to me. How the fact to draw the 3D-stuff into a 2D-canvas could be faster than using the context3D directly ? I don't understand and I'm almost sure I don't use gl.readPixels correctly.
Then my question is : how to use gl.readPixels in order to be faster than context2d.drawImage + context2d.getImageData ?
I tryed to used a Float32Array like that
var ctx = renderer.domElement.getContext("webgl");
var data = new Float32Array(w*h*4);
ctx.readPixels(0, 0, w,h, ctx.RGBA, ctx.FLOAT, data);
I thought it should be faster since there is no conversion from Float to UInt8 but it looks like it doesn't work like that because my 'data' array stay empty after the call of ctx.readPixels
Thank you for your help !
(please excuse me if my english is not perfect, it's not my native language)
On my machine I get readPixels as 2x to 20x faster than drawImage/getImageData. Tested on MacOS Chrome, Firefox, well as Windows 10 Chrome, and Firefox. Safari came out readPixels as slower. Sounds like a bug in Safari and in fact checking Safari Technology Preview Release 46, as expected, readPixels is 3x to 1.2x faster than drawImage/getImageData
const gl = document.createElement("canvas").getContext("webgl");
const ctx = document.createElement("canvas").getContext("2d");
const w = 512;
const h = 512;
gl.canvas.width = w;
gl.canvas.height = h;
ctx.canvas.width = w;
ctx.canvas.height = h;
const readPixelBuffer = new Uint8Array(w * h * 4);
const tests = [
{ fn: withReadPixelsPreAlloc, msg: "readPixelsPreAlloc", },
{ fn: withReadPixels, msg: "readPixels", },
{ fn: withDrawImageGetImageData, msg: "drawImageGetPixels", },
];
let ndx = 0;
runNextTest();
function runNextTest() {
if (ndx >= tests.length) {
return;
}
const test = tests[ndx++];
// use setTimeout to give the browser a change to
// do something between tests
setTimeout(function() {
log(test.msg, "iterations in 5 seconds:", runTest(test.fn));
runNextTest();
}, 0);
}
function runTest(fn) {
const start = performance.now();
let count = 0;
for (;;) {
const elapsed = performance.now() - start;
if (elapsed > 5000) {
break;
}
fn();
++count;
}
return count;
}
function withReadPixelsPreAlloc() {
gl.readPixels(0, 0, w, h, gl.RGBA, gl.UNSIGNED_BYTE, readPixelBuffer);
}
function withReadPixels() {
const readPixelBuffer = new Uint8Array(w * h * 4);
gl.readPixels(0, 0, w, h, gl.RGBA, gl.UNSIGNED_BYTE, readPixelBuffer);
}
function withDrawImageGetImageData() {
ctx.drawImage(gl.canvas, 0, 0);
ctx.getImageData(0, 0, w, h);
}
function log(...args) {
const elem = document.createElement("pre");
elem.textContent = [...args].join(' ');
document.body.appendChild(elem);
}
As for converting to float the canvas itself is stored in bytes. There is no conversion to float and you likely got a GL error
const gl = document.createElement("canvas").getContext("webgl");
const buf = new Float32Array(4);
gl.readPixels(0, 0, 1, 1, gl.RGBA, gl.FLOAT, buf);
log("ERROR:", glEnumToString(gl, gl.getError()));
function log(...args) {
const elem = document.createElement("pre");
elem.textContent = [...args].join(' ');
document.body.appendChild(elem);
}
function glEnumToString(gl, val) {
if (val === 0) { return 'NONE'; }
for (key in gl) {
if (gl[key] === val) {
return key;
}
}
return `0x${val.toString(16)}`;
}
Checking the console I see the error is
WebGL: INVALID_ENUM: readPixels: invalid type

DirectX 11 Blending

How can i access pixel colors of destination pixel in pixel shader, in order to use my specific blending equation, when control goes to pixel shader i only have the source pixel position and color, i want to know what is the color of destination pixel at that time..?
One approach i have heard is by using textures, but i am not able to find the way through textures.
Programmable blending is not allowed in directX 11, but with some hacks it is possible.
void D3D12HelloTriangle::LoadPipeline()
{
UINT dxgiFactoryFlags = 0;
ComPtr<IDXGIFactory4> factory;
ThrowIfFailed(CreateDXGIFactory2(dxgiFactoryFlags, IID_PPV_ARGS(&factory)));
//Device Creation
{
ComPtr<IDXGIAdapter1> hardwareAdapter;
GetHardwareAdapter(factory.Get(), &hardwareAdapter);
ThrowIfFailed(D3D12CreateDevice(
hardwareAdapter.Get(),
D3D_FEATURE_LEVEL_11_0,
IID_PPV_ARGS(&m_device)
));
}
// Describe and create the command queue.
D3D12_COMMAND_QUEUE_DESC queueDesc = {};
queueDesc.Flags = D3D12_COMMAND_QUEUE_FLAG_NONE;
queueDesc.Type = D3D12_COMMAND_LIST_TYPE_DIRECT;
ThrowIfFailed(m_device->CreateCommandQueue(&queueDesc, IID_PPV_ARGS(&m_commandQueue)));
// Describe and create the swap chain.
DXGI_SWAP_CHAIN_DESC1 swapChainDesc = {};
swapChainDesc.BufferCount = FrameCount;
swapChainDesc.Width = m_width;
swapChainDesc.Height = m_height;
swapChainDesc.Format = DXGI_FORMAT_R8G8B8A8_UNORM;
swapChainDesc.BufferUsage = DXGI_USAGE_RENDER_TARGET_OUTPUT;
swapChainDesc.SwapEffect = DXGI_SWAP_EFFECT_FLIP_DISCARD;
swapChainDesc.SampleDesc.Count = 1;
ComPtr<IDXGISwapChain1> swapChain;
ThrowIfFailed(factory->CreateSwapChainForCoreWindow(
m_commandQueue.Get(), // Swap chain needs the queue so that it can force a flush on it.
reinterpret_cast<IUnknown*>(Windows::UI::Core::CoreWindow::GetForCurrentThread()),
&swapChainDesc,
nullptr,
&swapChain
));
ThrowIfFailed(swapChain.As(&m_swapChain));
m_frameIndex = m_swapChain->GetCurrentBackBufferIndex();
// Create descriptor heaps.
{
// Describe and create a render target view (RTV) descriptor heap.
D3D12_DESCRIPTOR_HEAP_DESC rtvHeapDesc = {};
rtvHeapDesc.NumDescriptors = FrameCount;
rtvHeapDesc.Type = D3D12_DESCRIPTOR_HEAP_TYPE_RTV;
rtvHeapDesc.Flags = D3D12_DESCRIPTOR_HEAP_FLAG_NONE;
ThrowIfFailed(m_device->CreateDescriptorHeap(&rtvHeapDesc, IID_PPV_ARGS(&m_rtvHeap)));
m_rtvDescriptorSize = m_device->GetDescriptorHandleIncrementSize(D3D12_DESCRIPTOR_HEAP_TYPE_RTV);
}
// Create frame resources.
{
CD3DX12_CPU_DESCRIPTOR_HANDLE rtvHandle(m_rtvHeap->GetCPUDescriptorHandleForHeapStart());
// Create a RTV for each frame.
for (UINT n = 0; n < FrameCount; n++)
{
ThrowIfFailed(m_swapChain->GetBuffer(n, IID_PPV_ARGS(&m_renderTargets[n])));
m_device->CreateRenderTargetView(m_renderTargets[n].Get(), nullptr, rtvHandle);
rtvHandle.Offset(1, m_rtvDescriptorSize);
}
}
ThrowIfFailed(m_device->CreateCommandAllocator(D3D12_COMMAND_LIST_TYPE_DIRECT, IID_PPV_ARGS(&m_commandAllocator)));
}
// Load the sample assets.
void D3D12HelloTriangle::LoadAssets()
{
// Create an empty root signature.
{
CD3DX12_ROOT_SIGNATURE_DESC rootSignatureDesc;
rootSignatureDesc.Init(0, nullptr, 0, nullptr, D3D12_ROOT_SIGNATURE_FLAG_ALLOW_INPUT_ASSEMBLER_INPUT_LAYOUT);
ComPtr<ID3DBlob> signature;
ComPtr<ID3DBlob> error;
ThrowIfFailed(D3D12SerializeRootSignature(&rootSignatureDesc, D3D_ROOT_SIGNATURE_VERSION_1, &signature, &error));
ThrowIfFailed(m_device->CreateRootSignature(0, signature->GetBufferPointer(), signature->GetBufferSize(), IID_PPV_ARGS(&m_rootSignature)));
}
// Create the pipeline state, which includes compiling and loading shaders.
{
ComPtr<ID3DBlob> vertexShader;
ComPtr<ID3DBlob> pixelShader;
#if defined(_DEBUG)
// Enable better shader debugging with the graphics debugging tools.
UINT compileFlags = D3DCOMPILE_DEBUG | D3DCOMPILE_SKIP_OPTIMIZATION;
#else
UINT compileFlags = 0;
#endif
ThrowIfFailed(D3DCompileFromFile(GetAssetFullPath(L"VertexShader.hlsl").c_str(), nullptr, nullptr, "VSMain", "vs_5_0", compileFlags, 0, &vertexShader, nullptr));
ThrowIfFailed(D3DCompileFromFile(GetAssetFullPath(L"PixelShader.hlsl").c_str(), nullptr, nullptr, "PSMain", "ps_5_0", compileFlags, 0, &pixelShader, nullptr));
// Define the vertex input layout.
D3D12_INPUT_ELEMENT_DESC inputElementDescs[] =
{
{ "POSITION", 0, DXGI_FORMAT_R32G32B32_FLOAT, 0, 0, D3D12_INPUT_CLASSIFICATION_PER_VERTEX_DATA, 0 },
{ "COLOR", 0, DXGI_FORMAT_R32G32B32A32_FLOAT, 0, D3D12_APPEND_ALIGNED_ELEMENT, D3D12_INPUT_CLASSIFICATION_PER_VERTEX_DATA, 0 },
{ "DELAY", 0, DXGI_FORMAT_R32G32B32A32_FLOAT, 0, D3D12_APPEND_ALIGNED_ELEMENT, D3D12_INPUT_CLASSIFICATION_PER_VERTEX_DATA, 0 },
};
// Describe and create the graphics pipeline state object (PSO).
D3D12_GRAPHICS_PIPELINE_STATE_DESC psoDesc = {};
psoDesc.InputLayout = { inputElementDescs, _countof(inputElementDescs) };
psoDesc.pRootSignature = m_rootSignature.Get();
psoDesc.VS = CD3DX12_SHADER_BYTECODE(vertexShader.Get());
psoDesc.PS = CD3DX12_SHADER_BYTECODE(pixelShader.Get());
psoDesc.RasterizerState = CD3DX12_RASTERIZER_DESC(D3D12_DEFAULT);
psoDesc.BlendState = CD3DX12_BLEND_DESC(D3D12_DEFAULT);
psoDesc.DepthStencilState.DepthEnable = FALSE;
psoDesc.DepthStencilState.StencilEnable = FALSE;
psoDesc.SampleMask = 1;
psoDesc.PrimitiveTopologyType = D3D12_PRIMITIVE_TOPOLOGY_TYPE_TRIANGLE;
psoDesc.NumRenderTargets = 2;
psoDesc.RTVFormats[0] = DXGI_FORMAT_R8G8B8A8_UNORM;
psoDesc.RTVFormats[1] = DXGI_FORMAT_R8G8B8A8_UNORM;
//psoDesc.RTVFormats[2] = DXGI_FORMAT_R8G8B8A8_UNORM;
psoDesc.SampleDesc.Count = 1;
psoDesc.RasterizerState.CullMode = D3D12_CULL_MODE_NONE;
//ThrowIfFailed(m_device->CreateGraphicsPipelineState(&psoDesc, IID_PPV_ARGS(&m_pipelineState)));
D3D12_GRAPHICS_PIPELINE_STATE_DESC transparentPsoDesc = psoDesc;
D3D12_RENDER_TARGET_BLEND_DESC transparencyBlendDesc;
transparencyBlendDesc.BlendEnable = true;
transparencyBlendDesc.LogicOpEnable = false;
transparencyBlendDesc.SrcBlend = D3D12_BLEND_ONE;
transparencyBlendDesc.DestBlend = D3D12_BLEND_ONE;
transparencyBlendDesc.BlendOp = D3D12_BLEND_OP_MAX;
transparencyBlendDesc.SrcBlendAlpha = D3D12_BLEND_ONE;
transparencyBlendDesc.DestBlendAlpha = D3D12_BLEND_ONE;
transparencyBlendDesc.BlendOpAlpha = D3D12_BLEND_OP_MAX;
transparencyBlendDesc.LogicOp = D3D12_LOGIC_OP_NOOP;
transparencyBlendDesc.RenderTargetWriteMask = D3D12_COLOR_WRITE_ENABLE_ALL;
transparentPsoDesc.BlendState.RenderTarget[0] =
transparencyBlendDesc;
ThrowIfFailed(m_device->CreateGraphicsPipelineState(&transparentPsoDesc,IID_PPV_ARGS(&m_pipelineState)));
ThrowIfFailed(m_device->CreateCommandList(0, D3D12_COMMAND_LIST_TYPE_DIRECT, m_commandAllocator.Get(), m_pipelineState.Get(), IID_PPV_ARGS(&m_commandList)));
}
// Command lists are created in the recording state, but there is nothing
// to record yet. The main loop expects it to be closed, so close it now.
ThrowIfFailed(m_commandList->Close());
// Create the vertex buffer.
{
// Define the geometry for a triangle.
Vertex triangleVertices[] =
{
{ { 0.0f, 0.0f * m_aspectRatio, 0.0f },{ 0.0f, 0.0f, 1.0f,1.0f },{ 0.0f,0.0f,0.0f,0.0f } },
{ { 0.25f, 0.0f * m_aspectRatio, 0.0f },{ 0.0f, 0.0f, 1.0f, 1.0f },{ 0.0f,0.0f,0.0f,0.0f } },
{ { 0.0f, 0.25f * m_aspectRatio, 0.0f },{ 0.0f, 0.0f, 1.0f, 1.0f },{ 0.0f,0.0f,0.0f,0.0f } },
{ { 0.25f, 0.25f * m_aspectRatio, 0.0f },{ 0.0f,0.0f,1.0f, 1.0f },{ 0.0f,0.0f,0.0f,0.0f } },
};
// Cube vertices. Each vertex has a position and a color.
Vertex triangleVertices2[] =
{
{ { 0.0f, 0.0f * m_aspectRatio, 0.0f },{ 0.0f, 1.0f, 0.0f, 1.0f },{ 0.0f,0.0f,0.0f,1.0f } },
{ { 0.5f, 0.0f * m_aspectRatio, 0.0f },{ 0.0f, 1.0f, 0.0f, 1.0f },{ 0.0f,0.0f,0.0f,1.0f } },
{ { 0.0f, 0.5f * m_aspectRatio, 0.0f },{0.0f, 1.0f, 0.0f, 1.0f },{ 0.0f,0.0f,0.0f,1.0f } },
{ { 0.5f, 0.5f * m_aspectRatio, 0.0f },{ 0.0f,1.0f, 0.0f, 1.0f },{ 0.0f,0.0f,0.0f,1.0f } },
};
const UINT vertexBufferSize = sizeof(triangleVertices);
const UINT my_vertexBufferSize = sizeof(triangleVertices2);
// Note: using upload heaps to transfer static data like vert buffers is not
// recommended. Every time the GPU needs it, the upload heap will be marshalled
// over. Please read up on Default Heap usage. An upload heap is used here for
// code simplicity and because there are very few verts to actually transfer.
ThrowIfFailed(m_device->CreateCommittedResource(
&CD3DX12_HEAP_PROPERTIES(D3D12_HEAP_TYPE_UPLOAD),
D3D12_HEAP_FLAG_NONE,
&CD3DX12_RESOURCE_DESC::Buffer(vertexBufferSize),
D3D12_RESOURCE_STATE_GENERIC_READ,
nullptr,
IID_PPV_ARGS(&m_vertexBuffer)));
ThrowIfFailed(m_device->CreateCommittedResource(
&CD3DX12_HEAP_PROPERTIES(D3D12_HEAP_TYPE_UPLOAD),
D3D12_HEAP_FLAG_NONE,
&CD3DX12_RESOURCE_DESC::Buffer(my_vertexBufferSize),
D3D12_RESOURCE_STATE_GENERIC_READ,
nullptr,
IID_PPV_ARGS(&my_vertexBuffer)));
// Copy the triangle data to the vertex buffer.
UINT8* pVertexDataBegin;
CD3DX12_RANGE readRange(0, 0); // We do not intend to read from this resource on the CPU.
ThrowIfFailed(m_vertexBuffer->Map(0, &readRange, reinterpret_cast<void**>(&pVertexDataBegin)));
UINT8* my_pVertexDataBegin;
CD3DX12_RANGE my_readRange(0, 0); // We do not intend to read from this resource on the CPU.
ThrowIfFailed(my_vertexBuffer->Map(0, &my_readRange, reinterpret_cast<void**>(&my_pVertexDataBegin)));
memcpy(pVertexDataBegin, triangleVertices, sizeof(triangleVertices));
m_vertexBuffer->Unmap(0, nullptr);
memcpy(my_pVertexDataBegin, triangleVertices2, sizeof(triangleVertices2));
my_vertexBuffer->Unmap(0, nullptr);
// Initialize the vertex buffer view.
m_vertexBufferView[0].BufferLocation = m_vertexBuffer->GetGPUVirtualAddress();
m_vertexBufferView[0].StrideInBytes = sizeof(Vertex);
m_vertexBufferView[0].SizeInBytes = vertexBufferSize;
m_vertexBufferView[1].BufferLocation = my_vertexBuffer->GetGPUVirtualAddress();
m_vertexBufferView[1].StrideInBytes = sizeof(Vertex);
m_vertexBufferView[1].SizeInBytes = vertexBufferSize;
}
// Create synchronization objects and wait until assets have been uploaded to the GPU.
{
ThrowIfFailed(m_device->CreateFence(0, D3D12_FENCE_FLAG_NONE, IID_PPV_ARGS(&m_fence)));
m_fenceValue = 1;
}
}
// Update frame-based values.
void D3D12HelloTriangle::OnUpdate()
{
}
// Render the scene.
void D3D12HelloTriangle::OnRender()
{
// Record all the commands we need to render the scene into the command list.
PopulateCommandList();
// Execute the command list.
ID3D12CommandList* ppCommandLists[] = { m_commandList.Get() };
m_commandQueue->ExecuteCommandLists(_countof(ppCommandLists), ppCommandLists);
// Present the frame.
ThrowIfFailed(m_swapChain->Present(1, 0));
WaitForPreviousFrame();
}
void D3D12HelloTriangle::OnDestroy()
{
// Ensure that the GPU is no longer referencing resources that are about to be
// cleaned up by the destructor.
WaitForPreviousFrame();
}
void D3D12HelloTriangle::PopulateCommandList()
{
// Command list allocators can only be reset when the associated
// command lists have finished execution on the GPU; apps should use
// fences to determine GPU execution progress.
ThrowIfFailed(m_commandAllocator->Reset());
// However, when ExecuteCommandList() is called on a particular command
// list, that command list can then be reset at any time and must be before
// re-recording.
ThrowIfFailed(m_commandList->Reset(m_commandAllocator.Get(), m_pipelineState.Get()));
// Set necessary state.
m_commandList->SetGraphicsRootSignature(m_rootSignature.Get());
m_commandList->RSSetViewports(1, &m_viewport);
m_commandList->RSSetScissorRects(1, &m_scissorRect);
// Indicate that the back buffer will be used as a render target.
//m_commandList->ResourceBarrier(1, &CD3DX12_RESOURCE_BARRIER::Transition(m_renderTargets[m_frameIndex].Get(), D3D12_RESOURCE_STATE_PRESENT, D3D12_RESOURCE_STATE_RENDER_TARGET));
CD3DX12_CPU_DESCRIPTOR_HANDLE rtvHandle(m_rtvHeap->GetCPUDescriptorHandleForHeapStart(), m_frameIndex, m_rtvDescriptorSize);
m_commandList->OMSetRenderTargets(1, &rtvHandle, FALSE, nullptr);
// Record commands.
const float clearColor[] = { 1.0f, 0.0f, 0.0f, 1.0f };
m_commandList->ClearRenderTargetView(rtvHandle, clearColor, 0, nullptr);
m_commandList->IASetPrimitiveTopology(D3D_PRIMITIVE_TOPOLOGY_TRIANGLESTRIP);
m_commandList->IASetVertexBuffers(0, 1, &m_vertexBufferView[1]);
m_commandList->DrawInstanced(4, 1, 0, 0);
m_commandList->IASetVertexBuffers(0, 1, m_vertexBufferView);
m_commandList->DrawInstanced(4, 1, 0, 0);
// Indicate that the back buffer will now be used to present.
m_commandList->ResourceBarrier(1, &CD3DX12_RESOURCE_BARRIER::Transition(m_renderTargets[m_frameIndex].Get(), D3D12_RESOURCE_STATE_RENDER_TARGET, D3D12_RESOURCE_STATE_PRESENT));
ThrowIfFailed(m_commandList->Close());
}
void D3D12HelloTriangle::WaitForPreviousFrame()
{
// WAITING FOR THE FRAME TO COMPLETE BEFORE CONTINUING IS NOT BEST PRACTICE.
// This is code implemented as such for simplicity. The D3D12HelloFrameBuffering
// sample illustrates how to use fences for efficient resource usage and to
// maximize GPU utilization.
// Signal and increment the fence value.
const UINT64 fence = m_fenceValue;
ThrowIfFailed(m_commandQueue->Signal(m_fence.Get(), fence));
m_fenceValue++;
m_frameIndex = m_swapChain->GetCurrentBackBufferIndex();
}

Running a DX11 compute shader with SharpDX - cannot get results

I am trying to run a compute shader and get the resulting texture using SharpDX.
From what I understood, I need to:
1. Create a texture to set as an output to the shader.
2. Set the above texture as an unordered access view so I can write to it.
3. Run the shader
4. Copy the UAV texture to a staging texture so it can be accessed by the CPU
5. Read the staging texture to a Bitmap
The problem is that whatever I do, the result is a black bitmap. I don't think the bug is in the Texture2D -> Bitmap conversion code as printing the first pixel directly from the staging texture also gives me 0.
This is my shader code:
RWTexture2D<float4> Output : register(u0);
[numthreads(32, 32, 1)]
void main(uint3 id : SV_DispatchThreadID) {
Output[id.xy] = float4(0, 1.0, 0, 1.0);
}
Using the MS DX11 docs and blogs, I pieced together this code to run the texture:
public class GPUScreenColor {
private int adapterIndex = 0;
private Adapter1 gpu;
private Device device;
private ComputeShader computeShader;
private Texture2D texture;
private Texture2D stagingTexture;
private UnorderedAccessView view;
public GPUScreenColor() {
initializeDirectX();
}
private void initializeDirectX() {
using (var factory = new Factory1()) {
gpu = factory.GetAdapter1(adapterIndex);
}
device = new Device(gpu, DeviceCreationFlags.Debug, FeatureLevel.Level_11_1);
var compilationResult = ShaderBytecode.CompileFromFile("test.hlsl", "main", "cs_5_0", ShaderFlags.Debug);
computeShader = new ComputeShader(device, compilationResult.Bytecode);
texture = new Texture2D(device, new Texture2DDescription() {
BindFlags = BindFlags.UnorderedAccess | BindFlags.ShaderResource,
Format = Format.R8G8B8A8_UNorm,
Width = 1024,
Height = 1024,
OptionFlags = ResourceOptionFlags.None,
MipLevels = 1,
ArraySize = 1,
SampleDescription = { Count = 1, Quality = 0 }
});
UnorderedAccessView view = new UnorderedAccessView(device, texture, new UnorderedAccessViewDescription() {
Format = Format.R8G8B8A8_UNorm,
Dimension = UnorderedAccessViewDimension.Texture2D,
Texture2D = { MipSlice = 0 }
});
stagingTexture = new Texture2D(device, new Texture2DDescription {
CpuAccessFlags = CpuAccessFlags.Read,
BindFlags = BindFlags.None,
Format = Format.R8G8B8A8_UNorm,
Width = 1024,
Height = 1024,
OptionFlags = ResourceOptionFlags.None,
MipLevels = 1,
ArraySize = 1,
SampleDescription = { Count = 1, Quality = 0 },
Usage = ResourceUsage.Staging
});
}
public Bitmap getBitmap() {
device.ImmediateContext.ComputeShader.Set(computeShader);
device.ImmediateContext.ComputeShader.SetUnorderedAccessView(0, view);
device.ImmediateContext.Dispatch(32, 32, 1);
device.ImmediateContext.CopyResource(texture, stagingTexture);
var mapSource = device.ImmediateContext.MapSubresource(stagingTexture, 0, MapMode.Read, MapFlags.None);
Console.WriteLine(Marshal.ReadInt32(IntPtr.Add(mapSource.DataPointer, 0)));
try {
// Copy pixels from screen capture Texture to GDI bitmap
Bitmap bitmap = new Bitmap(1024, 1024, System.Drawing.Imaging.PixelFormat.Format32bppRgb);
BitmapData mapDest = bitmap.LockBits(new Rectangle(0, 0, 1024, 1024), ImageLockMode.ReadWrite, bitmap.PixelFormat);
try {
var sourcePtr = mapSource.DataPointer;
var destPtr = mapDest.Scan0;
for (int y = 0; y < 1024; y++) {
// Copy a single line
Utilities.CopyMemory(destPtr, sourcePtr, 1024 * 4);
// Advance pointers
sourcePtr = IntPtr.Add(sourcePtr, mapSource.RowPitch);
destPtr = IntPtr.Add(destPtr, mapDest.Stride);
}
return bitmap;
} finally {
bitmap.UnlockBits(mapDest);
}
} finally {
device.ImmediateContext.UnmapSubresource(stagingTexture, 0);
}
}
}
I am pretty new to shaders so it may be something obvious...
First thing, you create your UAV as a local :
UnorderedAccessView view = new UnorderedAccessView(....
So the field is then null, replacing by
view = new UnorderedAccessView(....
will solve the first issue.
Second, it's quite likely that the runtime will complain about types (debug will give you something like :
The resource return type for component 0 declared in the shader code (FLOAT) is not compatible with the resource type bound to Unordered Access View slot 0 of the Compute Shader unit (UNORM).
Some cards might do something (fix it silently), some might do nothing, some might crash :)
Problem is that RWTexture2D does not match UNORM format (as you specify flating point format here).
You need to enforce your RWTexture to be specifically of unorm format eg (yes runtime can be that picky):
RWTexture2D<unorm float4> Output : register(u0);
Then your whole setup should work (PS: I did not check the bitmap code, but I doubled checked that the shader is running without error and first pixel is matching)

3D and 2D display in DirectX

I want to render some sprites over my 3D scene, but when I enable D3D sprites, my 3D scene dissapears and i can see only those sprites.
Settings:
LPDIRECT3D9 d3d = NULL;
LPDIRECT3DDEVICE9 d3ddev = NULL;
D3DPRESENT_PARAMETERS d3dpp;
LPD3DXSPRITE d3dspt;
// Create Direct3D and the Direct3D Device
void InitDirect3D(GAMEWINDOW* gw)
{
d3d = Direct3DCreate9(D3D_SDK_VERSION);
ZeroMemory(&d3dpp, sizeof(d3dpp));
d3dpp.SwapEffect = D3DSWAPEFFECT_DISCARD;
d3dpp.BackBufferFormat = D3DFMT_X8R8G8B8;
d3dpp.Windowed = gw->Windowed;
d3dpp.BackBufferWidth = gw->Width;
d3dpp.BackBufferHeight = gw->Height;
d3dpp.EnableAutoDepthStencil = TRUE;
d3dpp.AutoDepthStencilFormat = D3DFMT_D16;
d3d->CreateDevice(D3DADAPTER_DEFAULT,
D3DDEVTYPE_HAL,
gw->hWnd,
D3DCREATE_SOFTWARE_VERTEXPROCESSING,
&d3dpp,
&d3ddev);
d3ddev->SetRenderState(D3DRS_LIGHTING, FALSE);
d3ddev->SetRenderState(D3DRS_ZENABLE, TRUE);
d3ddev->SetRenderState(D3DRS_CULLMODE, TRUE);
D3DXCreateSprite(d3ddev, &d3dspt);
return;
}
Rendering:
// Start rendering
void StartRender()
{
d3ddev->Clear(0, NULL, D3DCLEAR_TARGET, D3DCOLOR_XRGB(0, 0, 0), 1.0f, 0);
d3ddev->Clear(0, NULL, D3DCLEAR_ZBUFFER, D3DCOLOR_XRGB(0, 0, 0), 1.0f, 0);
d3ddev->BeginScene();
d3dspt->Begin(D3DXSPRITE_ALPHABLEND); // when enabled, 3d scene dissapears
return;
}
// Stop rendering
void EndRender()
{
d3dspt->End(); // disabling sprites
d3ddev->EndScene();
d3ddev->Present(NULL, NULL, NULL, NULL);
return;
}
Rendering function:
void Render()
{
static int frame = 0;
if (frame == 36) frame = 0;
StartRender();
DrawSprite(&interceptor, frame++, 100, 100, 0);
DrawModel(&a, 0.0f, 0.0f, 0.0f);
EndRender();
return;
}
Try processing your sprites separately from the rest of your scene. You could create a single render function that would make your life much easier:
void Render()
{
d3ddev->Clear(0, NULL, D3DCLEAR_TARGET|D3DCLEAR_ZBUFFER,
D3DCOLOR_XRGB(0, 0, 0), 1.0f, 0);
d3ddev->BeginScene();
d3dspt->Begin(D3DXSPRITE_ALPHABLEND);
My3DRenderingFunction();
MySpriteRenderingFunction();
d3dspt->End(); // disabling sprites
d3ddev->EndScene();
d3ddev->Present(NULL, NULL, NULL, NULL);
}
The My3DRenderingRunction() and MySpriteRenderingFunction() would be your custom functions where you would render everything. You could even pass a callback function (function pointer) to the render function. Also, note how you don't need two Clear() calls. You can just use one:
d3ddev->Clear(0, NULL, D3DCLEAR_TARGET|D3DCLEAR_ZBUFFER,
D3DCOLOR_XRGB(0, 0, 0), 1.0f, 0);

Resources