Update texture from vector directx9 - directx

I' m trying to render two textures one for RGB and another on for the alpha channel, I blend them together with a shader.
The alpha channel texture doesn't overlap properly to the RGB one. It seems to be stretched.
The alpha channel texture changes at every frame and I need to fill starting from an array of uint8_t by the following fuction:
D3DLOCKED_RECT locked_rect;
HRESULT hr = alpha_tex->LockRect(0, &locked_rect, nullptr, 0);
if (!FAILED(hr)) {
ret_code = 0;
BYTE *p_dst = (BYTE *)locked_rect.pBits;
for (uint y = 0; y < height; y++) {
memcpy(p_dst, alpha_array, width);
alpha_array += width;
p_dst += locked_rect.Pitch;
}
alpha_tex->UnlockRect(0);
}
where the alpha_array is a uint8_t array containing the alpha values.
To render the texture i use the following function:
hwctx->d3d9device->Clear(0, 0, D3DCLEAR_TARGET | D3DCLEAR_ZBUFFER, 0xffeeeeee, 1.0f, 0);
hwctx->d3d9device->BeginScene();
ctx->mFX->SetTechnique(ctx->mhTech);
ctx->texRGB->GetSurfaceLevel(0, &ctx->surfRGB);
hwctx->d3d9device->StretchRect((IDirect3DSurface9*)s->vdrFrame->data[3], NULL, ctx->surfRGB, NULL, D3DTEXF_LINEAR);
ctx->mFX->SetTexture(ctx->mhTexRGB, ctx->texRGB);
ctx->mFX->SetTexture(ctx->mhTexAlpha, ctx->texAlpha);
// Enable alpha blending.
hwctx->d3d9device->SetRenderState(D3DRS_ALPHABLENDENABLE, true);
hwctx->d3d9device->SetRenderState(D3DRS_SRCBLEND, D3DBLEND_SRCALPHA);
hwctx->d3d9device->SetRenderState(D3DRS_DESTBLEND, D3DBLEND_INVSRCALPHA);
UINT numPasses = 0;
ctx->mFX->Begin(&numPasses, 0);
for (UINT i = 0; i < numPasses; ++i){
ctx->mFX->BeginPass(i);
hwctx->d3d9device->DrawPrimitive(D3DPT_TRIANGLEFAN, 0, 2);
ctx->mFX->EndPass();
}
ctx->mFX->End();
hwctx->d3d9device->EndScene();
hwctx->d3d9device->Present(0, 0, 0, 0);
// Disable alpha blending.
hwctx->d3d9device->SetRenderState(D3DRS_ALPHABLENDENABLE, false);
I combine the textures by vertex/pixel shader:
uniform extern texture gTexRGB;
uniform extern texture gTexAlpha;
sampler TexRGB = sampler_state{
Texture = <gTexRGB>;
AddressU = WRAP;
AddressV = WRAP;
};
sampler TexAlpha = sampler_state{
Texture = <gTexAlpha>;
AddressU = WRAP;
AddressV = WRAP;
};
struct OutputVS{
float4 posH : POSITION0;
float2 tex0 : TEXCOORD0;
};
OutputVS TextureBlendingVS(float2 tex0: TEXCOORD0){
// Zero out our output.
OutputVS outVS = (OutputVS)0;
// Pass on texture coordinates to be interpolated in rasterization.
outVS.tex0 = tex0;
// Done--return the output.
return outVS;
}
float4 TextureBlendingPS(float2 tex0 : TEXCOORD0) : COLOR{
float3 rgb = tex2D(TexRGB, tex0).rgb;
float alpha = tex2D(TexAlpha, tex0).a;
return float4(rgb, alpha);
}
technique DirLightTexTech{
pass P0 {
// Specify the vertex and pixel shader associated with this pass.
vertexShader = compile vs_2_0 TextureBlendingVS();
pixelShader = compile ps_2_0 TextureBlendingPS();
}
}
The size of the textures is the same but during the rendering something goes wrong.
Please help me. :)

Related

Color conversion from DXGI_FORMAT_B8G8R8A8_UNORM to NV12 in GPU using DirectX11 pixel shaders

I'm working on a code to capture the desktop using Desktop duplication and encode the same to h264 using Intel hardwareMFT. The encoder only accepts NV12 format as input. I have got a DXGI_FORMAT_B8G8R8A8_UNORM to NV12 converter(https://github.com/NVIDIA/video-sdk-samples/blob/master/nvEncDXGIOutputDuplicationSample/Preproc.cpp) that works fine, and is based on DirectX VideoProcessor.
The problem is that the VideoProcessor on certain intel graphics hardware supports conversions only from DXGI_FORMAT_B8G8R8A8_UNORM to YUY2 but not NV12, I have confirmed the same by enumerating the supported formats through GetVideoProcessorOutputFormats. Though the VideoProcessor Blt succeeded without any errors, and I could see that the frames in the output video are pixelated a bit, I could notice it if I look at it closely.
I guess, the VideoProcessor has simply failed over to the next supported output format (YUY2) and I'm unknowingly feeding it to the encoder that thinks that the input is in NV12 as configured. There is no failure or major corruption of frames due to the fact that there is little difference like byte order and subsampling between NV12 and YUY2. Also, I don't have pixelating problems on hardware that supports NV12 conversion.
So I decided to do the color conversion using pixel shaders which is based on this code(https://github.com/bavulapati/DXGICaptureDXColorSpaceConversionIntelEncode/blob/master/DXGICaptureDXColorSpaceConversionIntelEncode/DuplicationManager.cpp). I'm able make the pixel shaders work, I have also uploaded my code here(https://codeshare.io/5PJjxP) for reference (simplified it as much as possible).
Now, I'm left with two channels, chroma, and luma respectively
(ID3D11Texture2D textures). And I'm really confused about efficiently
packing the two separate channels into one ID3D11Texture2D texture so
that I may feed the same to the encoder. Is there a way to efficiently
pack the Y and UV channels into a single ID3D11Texture2D in GPU? I'm
really tired of CPU based approaches due to the fact that it's costly,
and doesn't offer the best possible frame rates. In fact, I'm
reluctant to even copy the textures to CPU. I'm thinking of a way to
do it in GPU without any back and forth copies between CPU and GPU.
I have been researching this for quite some time without any progress, any help would be appreciated.
/**
* This method is incomplete. It's just a template of what I want to achieve.
*/
HRESULT CreateNV12TextureFromLumaAndChromaSurface(ID3D11Texture2D** pOutputTexture)
{
HRESULT hr = S_OK;
try
{
//Copying from GPU to CPU. Bad :(
m_pD3D11DeviceContext->CopyResource(m_CPUAccessibleLuminanceSurf, m_LuminanceSurf);
D3D11_MAPPED_SUBRESOURCE resource;
UINT subresource = D3D11CalcSubresource(0, 0, 0);
HRESULT hr = m_pD3D11DeviceContext->Map(m_CPUAccessibleLuminanceSurf, subresource, D3D11_MAP_READ, 0, &resource);
BYTE* sptr = reinterpret_cast<BYTE*>(resource.pData);
BYTE* dptrY = nullptr; // point to the address of Y channel in output surface
//Store Image Pitch
int m_ImagePitch = resource.RowPitch;
int height = GetImageHeight();
int width = GetImageWidth();
for (int i = 0; i < height; i++)
{
memcpy_s(dptrY, m_ImagePitch, sptr, m_ImagePitch);
sptr += m_ImagePitch;
dptrY += m_ImagePitch;
}
m_pD3D11DeviceContext->Unmap(m_CPUAccessibleLuminanceSurf, subresource);
//Copying from GPU to CPU. Bad :(
m_pD3D11DeviceContext->CopyResource(m_CPUAccessibleChrominanceSurf, m_ChrominanceSurf);
hr = m_pD3D11DeviceContext->Map(m_CPUAccessibleChrominanceSurf, subresource, D3D11_MAP_READ, 0, &resource);
sptr = reinterpret_cast<BYTE*>(resource.pData);
BYTE* dptrUV = nullptr; // point to the address of UV channel in output surface
m_ImagePitch = resource.RowPitch;
height /= 2;
width /= 2;
for (int i = 0; i < height; i++)
{
memcpy_s(dptrUV, m_ImagePitch, sptr, m_ImagePitch);
sptr += m_ImagePitch;
dptrUV += m_ImagePitch;
}
m_pD3D11DeviceContext->Unmap(m_CPUAccessibleChrominanceSurf, subresource);
}
catch(HRESULT){}
return hr;
}
Draw NV12:
//
// Draw frame for NV12 texture
//
HRESULT DrawNV12Frame(ID3D11Texture2D* inputTexture)
{
HRESULT hr;
// If window was resized, resize swapchain
if (!m_bIntialized)
{
HRESULT Ret = InitializeNV12Surfaces(inputTexture);
if (!SUCCEEDED(Ret))
{
return Ret;
}
m_bIntialized = true;
}
m_pD3D11DeviceContext->CopyResource(m_ShaderResourceSurf, inputTexture);
D3D11_TEXTURE2D_DESC FrameDesc;
m_ShaderResourceSurf->GetDesc(&FrameDesc);
D3D11_SHADER_RESOURCE_VIEW_DESC ShaderDesc;
ShaderDesc.Format = FrameDesc.Format;
ShaderDesc.ViewDimension = D3D11_SRV_DIMENSION_TEXTURE2D;
ShaderDesc.Texture2D.MostDetailedMip = FrameDesc.MipLevels - 1;
ShaderDesc.Texture2D.MipLevels = FrameDesc.MipLevels;
// Create new shader resource view
ID3D11ShaderResourceView* ShaderResource = nullptr;
hr = m_pD3D11Device->CreateShaderResourceView(m_ShaderResourceSurf, &ShaderDesc, &ShaderResource);
IF_FAILED_THROW(hr);
m_pD3D11DeviceContext->PSSetShaderResources(0, 1, &ShaderResource);
// Set resources
m_pD3D11DeviceContext->OMSetRenderTargets(1, &m_pLumaRT, nullptr);
m_pD3D11DeviceContext->PSSetShader(m_pPixelShaderLuma, nullptr, 0);
m_pD3D11DeviceContext->RSSetViewports(1, &m_VPLuminance);
// Draw textured quad onto render target
m_pD3D11DeviceContext->Draw(NUMVERTICES, 0);
m_pD3D11DeviceContext->OMSetRenderTargets(1, &m_pChromaRT, nullptr);
m_pD3D11DeviceContext->PSSetShader(m_pPixelShaderChroma, nullptr, 0);
m_pD3D11DeviceContext->RSSetViewports(1, &m_VPChrominance);
// Draw textured quad onto render target
m_pD3D11DeviceContext->Draw(NUMVERTICES, 0);
// Release shader resource
ShaderResource->Release();
ShaderResource = nullptr;
return S_OK;
}
Init shaders:
void SetViewPort(D3D11_VIEWPORT* VP, UINT Width, UINT Height)
{
VP->Width = static_cast<FLOAT>(Width);
VP->Height = static_cast<FLOAT>(Height);
VP->MinDepth = 0.0f;
VP->MaxDepth = 1.0f;
VP->TopLeftX = 0;
VP->TopLeftY = 0;
}
HRESULT MakeRTV(ID3D11RenderTargetView** pRTV, ID3D11Texture2D* pSurf)
{
if (*pRTV)
{
(*pRTV)->Release();
*pRTV = nullptr;
}
// Create a render target view
HRESULT hr = m_pD3D11Device->CreateRenderTargetView(pSurf, nullptr, pRTV);
IF_FAILED_THROW(hr);
return S_OK;
}
HRESULT InitializeNV12Surfaces(ID3D11Texture2D* inputTexture)
{
ReleaseSurfaces();
D3D11_TEXTURE2D_DESC lOutputDuplDesc;
inputTexture->GetDesc(&lOutputDuplDesc);
// Create shared texture for all duplication threads to draw into
D3D11_TEXTURE2D_DESC DeskTexD;
RtlZeroMemory(&DeskTexD, sizeof(D3D11_TEXTURE2D_DESC));
DeskTexD.Width = lOutputDuplDesc.Width;
DeskTexD.Height = lOutputDuplDesc.Height;
DeskTexD.MipLevels = 1;
DeskTexD.ArraySize = 1;
DeskTexD.Format = lOutputDuplDesc.Format;
DeskTexD.SampleDesc.Count = 1;
DeskTexD.Usage = D3D11_USAGE_DEFAULT;
DeskTexD.BindFlags = D3D11_BIND_SHADER_RESOURCE;
HRESULT hr = m_pD3D11Device->CreateTexture2D(&DeskTexD, nullptr, &m_ShaderResourceSurf);
IF_FAILED_THROW(hr);
DeskTexD.Format = DXGI_FORMAT_R8_UNORM;
DeskTexD.BindFlags = D3D11_BIND_RENDER_TARGET;
hr = m_pD3D11Device->CreateTexture2D(&DeskTexD, nullptr, &m_LuminanceSurf);
IF_FAILED_THROW(hr);
DeskTexD.CPUAccessFlags = D3D11_CPU_ACCESS_READ;
DeskTexD.Usage = D3D11_USAGE_STAGING;
DeskTexD.BindFlags = 0;
hr = m_pD3D11Device->CreateTexture2D(&DeskTexD, NULL, &m_CPUAccessibleLuminanceSurf);
IF_FAILED_THROW(hr);
SetViewPort(&m_VPLuminance, DeskTexD.Width, DeskTexD.Height);
HRESULT Ret = MakeRTV(&m_pLumaRT, m_LuminanceSurf);
if (!SUCCEEDED(Ret))
return Ret;
DeskTexD.Width = lOutputDuplDesc.Width / 2;
DeskTexD.Height = lOutputDuplDesc.Height / 2;
DeskTexD.Format = DXGI_FORMAT_R8G8_UNORM;
DeskTexD.Usage = D3D11_USAGE_DEFAULT;
DeskTexD.CPUAccessFlags = 0;
DeskTexD.BindFlags = D3D11_BIND_RENDER_TARGET;
hr = m_pD3D11Device->CreateTexture2D(&DeskTexD, nullptr, &m_ChrominanceSurf);
IF_FAILED_THROW(hr);
DeskTexD.CPUAccessFlags = D3D11_CPU_ACCESS_READ;
DeskTexD.Usage = D3D11_USAGE_STAGING;
DeskTexD.BindFlags = 0;
hr = m_pD3D11Device->CreateTexture2D(&DeskTexD, NULL, &m_CPUAccessibleChrominanceSurf);
IF_FAILED_THROW(hr);
SetViewPort(&m_VPChrominance, DeskTexD.Width, DeskTexD.Height);
return MakeRTV(&m_pChromaRT, m_ChrominanceSurf);
}
HRESULT InitVertexShader(ID3D11VertexShader** ppID3D11VertexShader)
{
HRESULT hr = S_OK;
UINT Size = ARRAYSIZE(g_VS);
try
{
IF_FAILED_THROW(m_pD3D11Device->CreateVertexShader(g_VS, Size, NULL, ppID3D11VertexShader));;
m_pD3D11DeviceContext->VSSetShader(m_pVertexShader, nullptr, 0);
// Vertices for drawing whole texture
VERTEX Vertices[NUMVERTICES] =
{
{ XMFLOAT3(-1.0f, -1.0f, 0), XMFLOAT2(0.0f, 1.0f) },
{ XMFLOAT3(-1.0f, 1.0f, 0), XMFLOAT2(0.0f, 0.0f) },
{ XMFLOAT3(1.0f, -1.0f, 0), XMFLOAT2(1.0f, 1.0f) },
{ XMFLOAT3(1.0f, -1.0f, 0), XMFLOAT2(1.0f, 1.0f) },
{ XMFLOAT3(-1.0f, 1.0f, 0), XMFLOAT2(0.0f, 0.0f) },
{ XMFLOAT3(1.0f, 1.0f, 0), XMFLOAT2(1.0f, 0.0f) },
};
UINT Stride = sizeof(VERTEX);
UINT Offset = 0;
D3D11_BUFFER_DESC BufferDesc;
RtlZeroMemory(&BufferDesc, sizeof(BufferDesc));
BufferDesc.Usage = D3D11_USAGE_DEFAULT;
BufferDesc.ByteWidth = sizeof(VERTEX) * NUMVERTICES;
BufferDesc.BindFlags = D3D11_BIND_VERTEX_BUFFER;
BufferDesc.CPUAccessFlags = 0;
D3D11_SUBRESOURCE_DATA InitData;
RtlZeroMemory(&InitData, sizeof(InitData));
InitData.pSysMem = Vertices;
// Create vertex buffer
IF_FAILED_THROW(m_pD3D11Device->CreateBuffer(&BufferDesc, &InitData, &m_VertexBuffer));
m_pD3D11DeviceContext->IASetVertexBuffers(0, 1, &m_VertexBuffer, &Stride, &Offset);
m_pD3D11DeviceContext->IASetPrimitiveTopology(D3D11_PRIMITIVE_TOPOLOGY_TRIANGLELIST);
D3D11_INPUT_ELEMENT_DESC Layout[] =
{
{ "POSITION", 0, DXGI_FORMAT_R32G32B32_FLOAT, 0, 0, D3D11_INPUT_PER_VERTEX_DATA, 0 },
{ "TEXCOORD", 0, DXGI_FORMAT_R32G32_FLOAT, 0, 12, D3D11_INPUT_PER_VERTEX_DATA, 0 }
};
UINT NumElements = ARRAYSIZE(Layout);
hr = m_pD3D11Device->CreateInputLayout(Layout, NumElements, g_VS, Size, &m_pVertexLayout);
m_pD3D11DeviceContext->IASetInputLayout(m_pVertexLayout);
}
catch (HRESULT) {}
return hr;
}
HRESULT InitPixelShaders()
{
HRESULT hr = S_OK;
// Refer https://codeshare.io/5PJjxP for g_PS_Y & g_PS_UV blobs
try
{
UINT Size = ARRAYSIZE(g_PS_Y);
hr = m_pD3D11Device->CreatePixelShader(g_PS_Y, Size, nullptr, &m_pPixelShaderChroma);
IF_FAILED_THROW(hr);
Size = ARRAYSIZE(g_PS_UV);
hr = m_pD3D11Device->CreatePixelShader(g_PS_UV, Size, nullptr, &m_pPixelShaderLuma);
IF_FAILED_THROW(hr);
}
catch (HRESULT) {}
return hr;
}
I am experimenting this RGBA conversion to NV12 in the GPU only, using DirectX11.
This is a good challenge. I'm not familiar with Directx11, so this is my first experimentation.
Check this project for updates : D3D11ShaderNV12
In my current implementation (may not be the last), here is what I do:
Step 1: use a DXGI_FORMAT_B8G8R8A8_UNORM as input texture
Step 2: make a 1st pass shader to get 3 textures (Y:Luma, U:ChromaCb and V:ChromaCr): see YCbCrPS2.hlsl
Step 3: Y is DXGI_FORMAT_R8_UNORM, and is ready for final NV12 texture
Step 4: UV needs to be downsampled in a 2nd pass shader: see ScreenPS2.hlsl (using linear filtering)
Step 5: a third pass shader to sample Y texture
Step 6: a fourth pass shader to sample UV texture using a shift texture (I think other technique could be use)
My final texture is not DXGI_FORMAT_NV12, but a similar DXGI_FORMAT_R8_UNORM texture. My computer is Windows7, so DXGI_FORMAT_NV12 is not handled. I will try later on a another computer.
The process with pictures:

How to use DXGI_SWAP_EFFECT_FLIP_SEQUENTIAL correctly?

I am having issue while drawing my cube on the window. I don't see any graphics on the window. I see the following warning messages in visual studio:
D3D11 WARNING: ID3D11DeviceContext::DrawIndexed: The Pixel Shader expects a Render Target View bound to slot 0, but the Render Target View was unbound during a call to Present. A successful Present call for DXGI_SWAP_EFFECT_FLIP_SEQUENTIAL SwapChains unbinds backbuffer 0 from all GPU writeable bind points. [ EXECUTION WARNING #3146082:
D3D11 WARNING: ID3D11DeviceContext::DrawIndexed: The Pixel Shader expects a Render Target View bound to slot 0, but none is bound. This is OK, as writes of an unbound Render Target View are discarded. It is also possible the developer knows the data will not be used anyway. This is only a problem if the developer actually intended to bind a Render Target View here. [ EXECUTION WARNING #3146081: DEVICE_DRAW_RENDERTARGETVIEW_NOT_SET]
I created swapchain using the following API:
virtual IDXGISwapChain* SwapChain(HWND wnd)
{
HRESULT hr = S_OK;
IDXGISwapChain* swapchain = nullptr;
DXGI_SWAP_CHAIN_DESC desc;
ZeroMemory(&desc, sizeof(DXGI_SWAP_CHAIN_DESC));
desc.Windowed = TRUE; // Sets the initial state of full-screen mode.
desc.BufferCount = 2;
desc.BufferDesc.Format = DXGI_FORMAT_R8G8B8A8_UNORM;
desc.BufferUsage = DXGI_USAGE_RENDER_TARGET_OUTPUT;
desc.SampleDesc.Count = 1; //multisampling setting
desc.SampleDesc.Quality = 0; //vendor-specific flag
desc.SwapEffect = DXGI_SWAP_EFFECT_FLIP_SEQUENTIAL;
desc.OutputWindow = wnd;
// Create the DXGI device object to use in other factories, such as Direct2D.
IDXGIDevice3* dxgiDevice;
hr = device_->QueryInterface(__uuidof(IDXGIDevice3), reinterpret_cast<void**>(&dxgiDevice));
if (FAILED(hr))
return nullptr;
// Create swap chain.
IDXGIAdapter* adapter;
IDXGIFactory* factory;
hr = dxgiDevice->GetAdapter(&adapter);
dxgiDevice->Release();
if (FAILED(hr))
return nullptr;
adapter->GetParent(IID_PPV_ARGS(&factory));
hr = factory->CreateSwapChain(device_, &desc, &swapchain);
adapter->Release();
factory->Release();
return swapchain;
}
Render Target is bound using the call:
m_d3dDevice.Context()->OMSetRenderTargets(1, &m_pRenderTarget, , _pDepthStencilView);
The Present is implemented as:
swap_chain->Present(0, 0);
The shader code is:
cbuffer ConstantBuffer : register(b0)
{
matrix World;
matrix View;
matrix Projection;
float4 vLightDir[2];
float4 vLightColor[2];
float4 vOutputColor;
}
struct VS_INPUT
{
float4 Pos : POSITION;
float3 Norm : NORMAL;
};
struct PS_INPUT
{
float4 Pos : SV_POSITION;
float3 Norm : TEXCOORD0;
};
PS_INPUT VS(VS_INPUT input)
{
PS_INPUT output = (PS_INPUT)0;
output.Pos = mul(input.Pos, World);
output.Pos = mul(output.Pos, View);
output.Pos = mul(output.Pos, Projection);
output.Norm = mul(float4(input.Norm, 1), World).xyz;
return output;
}
float4 PS(PS_INPUT input) : SV_Target
{
float4 finalColor = 0;
//do NdotL lighting for 2 lights
for (int i = 0; i < 2; i++)
{
finalColor += saturate(dot((float3)vLightDir[i],input.Norm) * vLightColor[i]);
}
finalColor.a = 1;
return finalColor;
}
float4 PSSolid(PS_INPUT input) : SV_Target
{
return vOutputColor;
}
Invoke ID3D11DeviceContext::OMSetRenderTargets(...) before each rendering
// Add this before each rendering
spImCtx->OMSetRenderTargets(1, spRTV.GetAddressOf(), spZView.Get());
// clear
spImCtx->ClearRenderTargetView(spRTV.Get(), Colors::Black);
spImCtx->ClearDepthStencilView(spZView.Get(), D3D11_CLEAR_DEPTH, 1.0f, 0);
// drawing...
// swap
spSwapChain->Present(1, 0);

Metal vertex shader draw points of a Texture

I want to execute Metal (or OpenGLES 3.0) shader that draws Points primitive with blending. To do that, I need to pass all the pixel coordinates of the texture to Vertex shader as vertices which computes the position of the vertex to be passed to fragment shader. The fragment shader simply outputs the color for the point with blending enabled. My problem is if there is an efficient was to pass coordinates of vertices to the vertex shader, since there would be too many vertices for 1920x1080 image, and that needs to be done 30 times in a second? Like we do in a compute shader by using dispatchThreadgroups command, except that compute shader can not draw a geometry with blending enabled.
EDIT: This is what I did -
let vertexFunctionRed = library!.makeFunction(name: "vertexShaderHistogramBlenderRed")
let fragmentFunctionAccumulator = library!.makeFunction(name: "fragmentShaderHistogramAccumulator")
let renderPipelineDescriptorRed = MTLRenderPipelineDescriptor()
renderPipelineDescriptorRed.vertexFunction = vertexFunctionRed
renderPipelineDescriptorRed.fragmentFunction = fragmentFunctionAccumulator
renderPipelineDescriptorRed.colorAttachments[0].pixelFormat = .bgra8Unorm
renderPipelineDescriptorRed.colorAttachments[0].isBlendingEnabled = true
renderPipelineDescriptorRed.colorAttachments[0].rgbBlendOperation = .add
renderPipelineDescriptorRed.colorAttachments[0].sourceRGBBlendFactor = .one
renderPipelineDescriptorRed.colorAttachments[0].destinationRGBBlendFactor = .one
do {
histogramPipelineRed = try device.makeRenderPipelineState(descriptor: renderPipelineDescriptorRed)
} catch {
print("Unable to compile render pipeline state Histogram Red!")
return
}
Drawing code:
let commandBuffer = commandQueue?.makeCommandBuffer()
let renderEncoder = commandBuffer?.makeRenderCommandEncoder(descriptor: renderPassDescriptor!)
renderEncoder?.setRenderPipelineState(histogramPipelineRed!)
renderEncoder?.setVertexTexture(metalTexture, index: 0)
renderEncoder?.drawPrimitives(type: .point, vertexStart: 0, vertexCount: 1, instanceCount: metalTexture!.width*metalTexture!.height)
renderEncoder?.drawPrimitives(type: .point, vertexStart: 0, vertexCount: metalTexture!.width*metalTexture!.height, instanceCount: 1)
and Shaders:
vertex MappedVertex vertexShaderHistogramBlenderRed (texture2d<float, access::sample> inputTexture [[ texture(0) ]],
unsigned int vertexId [[vertex_id]])
{
MappedVertex out;
constexpr sampler s(s_address::clamp_to_edge, t_address::clamp_to_edge, min_filter::linear, mag_filter::linear, coord::pixel);
ushort width = inputTexture.get_width();
ushort height = inputTexture.get_height();
float X = (vertexId % width)/(1.0*width);
float Y = (vertexId/width)/(1.0*height);
int red = inputTexture.sample(s, float2(X,Y)).r;
out.position = float4(-1.0 + (red * 0.0078125), 0.0, 0.0, 1.0);
out.pointSize = 1.0;
out.colorFactor = half3(1.0, 0.0, 0.0);
return out;
}
fragment half4 fragmentShaderHistogramAccumulator ( MappedVertex in [[ stage_in ]]
)
{
half3 colorFactor = in.colorFactor;
return half4(colorFactor*(1.0/256.0), 1.0);
}
Maybe you can draw a single point instanced 1920x1080 times. Something like:
vertex float4 my_func(texture2d<float, access::read> image [[texture(0)]],
constant uint &width [[buffer(0)]],
uint instance_id [[instance_id]])
{
// decompose the instance ID to a position
uint2 pos = uint2(instance_id % width, instance_id / width);
return float4(image.read(pos).r * 255, 0, 0, 0);
}

DX11 HLSL Secondary Texture Coordinates Lost

Been banging my head up against the wall with this for a while. Despite the fact that I THINK I have a proper Vertex Format defined with D3D11_INPUT_ELEMENT_DESC, no matter what I do, I can't see to read my TEXCOORD1 values from this shader. To test this shader, I put random values into my second set of UV coordinates just to see if they were reaching the shader, but to my dismay, I haven't been able to find these random values anywhere. I have also watched the data go into the mapped memory directly, and I am pretty sure the random values were there when they were mapped.
Here is the Shader code:
sampler ImageSampler: register(s0);
Texture2D <float4> ImageTexture: register(t0);
Texture2D <float4> ReflectionTexture: register(t1);
//Texture2D <float4> ReflectionMap: register(t0);
struct PS_IN
{
float4 InPos: SV_POSITION;
float2 InTex: TEXCOORD;
float2 InRef: TEXCOORD1;
float4 InCol: COLOR0;
};
float4 main(PS_IN input): SV_TARGET
{
float4 res;
float4 mul;
float2 tcRef;
float4 res1 = ImageTexture.Sample(ImageSampler, input.InTex) * input.InCol;
float4 res2 = ReflectionTexture.Sample(ImageSampler, input.InRef+input.InTex);
mul.r = 0.5;
mul.g = 0.5;
mul.b = 0.5;
mul.a = 0.5;
res = res1 + res2;
res = res * mul;
res.a = res1.a;
res.r = input.InRef.x;//<-----should be filled with random stuff... not working
res.b = input.InRef.y;//<-----should be filled with random stuff... not working
return res;
}
Here is my D3D11_ELEMENT_DESC... (sorry it is in pascal, but I like pascal)
const
CanvasVertexLayout: array[0..3] of D3D11_INPUT_ELEMENT_DESC =
((SemanticName: 'POSITION';
SemanticIndex: 0;
Format: DXGI_FORMAT_R32G32_FLOAT;
InputSlot: 0;
AlignedByteOffset: 0;
InputSlotClass: D3D11_INPUT_PER_VERTEX_DATA;
InstanceDataStepRate: 0),
(SemanticName: 'TEXCOORD';
SemanticIndex: 0;
Format: DXGI_FORMAT_R32G32_FLOAT;
InputSlot: 0;
AlignedByteOffset: 8;
InputSlotClass: D3D11_INPUT_PER_VERTEX_DATA;
InstanceDataStepRate: 0),
(SemanticName: 'TEXCOORD';
SemanticIndex: 1;
Format: DXGI_FORMAT_R32G32_FLOAT;
InputSlot: 0;
AlignedByteOffset: 16;
InputSlotClass: D3D11_INPUT_PER_VERTEX_DATA;
InstanceDataStepRate: 0),
(SemanticName: 'COLOR';
SemanticIndex: 0;
Format: DXGI_FORMAT_R8G8B8A8_UNORM;
InputSlot: 0;
AlignedByteOffset: 24;
InputSlotClass: D3D11_INPUT_PER_VERTEX_DATA;
InstanceDataStepRate: 0)
);
And here's the Vertext Struct
TVertexEntry = packed record
X, Y: Single;
U, V: Single;
u2,v2:single;
Color: LongWord;
end;
Since the COLOR semantic follows the TEXTURE semantics, my best guess is that the problem is with the SHADER and not the pascal code... but since I'm new to this kind of stuff, I'm obviously lost
Any insight is appreciated.
Answering my own question. Since I'm new to Shaders in general, maybe this will help some other newbs.
I was assuming that all I needed to do was add a second set of UV coordinates to the Vertex Format and add a D3D11_INPUT_ELEMENT_DESC for it. However, there is also a vertex shader involved, more-or-less a passthrough and that vertex shader needs to be aware of the new UV coordinates and let them pass through. I was just making a 2D engine so I didn't think that I'd even have to mess with VertexShaders... go figure. So I modified the vertex shader, and this was the result:
void main(
float2 InPos: POSITION0,
float2 InTex: TEXCOORD0,
float2 InTex2: TEXCOORD1,//<--added
float4 InCol: COLOR0,
out float4 OutPos: SV_POSITION,
out float2 OutTex: TEXCOORD2,
out float2 OutTex2: TEXCOORD3,//<--added
out float4 OutCol: COLOR0)
{
OutPos = float4(InPos, 0.0, 1.0);
OutTex = InTex;
OutCol = InCol;
OutTex2 = InTex2;//<--added
}

Direct3D11, some part of the model always in front of the others, probably about depth

I'm a new D3D programmer.
When I tried to render a model, I got a strange problem.!
you can see the picture, some part of the model always in front of the others.
the model vertex only contains the following data
{
float x, y, z;
float r, g, b;
float u, v;
}
I tried to render it in opengl and webgl ( http://nalol.azurewebsites.net/ ), it works well. but in D3D11, I got this strange problem.
I tried Google and find something about depth, but i don't know how to deal with it.
the following are some part of my code:
HLSL file
struct vout
{
float4 position : SV_POSITION;
float3 normal : NORMAL;
float2 texcoord : TEXCOORD;
};
vout vshader(float3 position : POSITION, float3 normals : NORMAL, float2 texcoords : TEXCOORD)
{
vout output;
output.position = float4(position, 1);
output.normal = normals ;
output.texcoord = texcoords;
return output;
}
Texture2D shaderTexture;
SamplerState SampleType;
float3 pshader(float3 position : POSITION, float3 normals : NORMAL, float2 texcoords : TEXCOORD) : SV_TARGET
{
return shaderTexture.Sample(SampleType, texcoords);
}
vertex struct
struct lol_skn_vertex {
float position[3];
char bone_index[4]; // for bones and animation, not used here
float bone_weights[4]; // for bones and animation, not used here
float normals[3];
float texcoords[2];
};
input layout object
D3D11_INPUT_ELEMENT_DESC ied[] =
{
{"POSITION", 0, DXGI_FORMAT_R32G32_FLOAT, 0, 0, D3D11_INPUT_PER_VERTEX_DATA, 0},
{"NORMAL", 0, DXGI_FORMAT_R32G32_FLOAT, 0, 32, D3D11_INPUT_PER_VERTEX_DATA, 0},
{"TEXCOORD", 0, DXGI_FORMAT_R32G32_FLOAT, 0, 44, D3D11_INPUT_PER_VERTEX_DATA, 0},
};
render function
void RenderFrame(void)
{
FLOAT ColorRGBA[4] = {0.0f, 0.2f, 0.4f, 1.0f};
d3d11_device_context->ClearRenderTargetView(d3d11_view_rt_backbuffer, ColorRGBA);
d3d11_device_context->ClearDepthStencilView(d3d11_view_ds,D3D11_CLEAR_DEPTH|D3D11_CLEAR_STENCIL,1.f,0);
update();
UINT stride = sizeof(lol_skn_vertex);
UINT offset = 0;
d3d11_device_context->IASetVertexBuffers(0, 1, &vertex_buffer, &stride, &offset);
d3d11_device_context->IASetIndexBuffer(index_buffer, DXGI_FORMAT_R16_UINT, 0);
d3d11_device_context->IASetPrimitiveTopology(D3D10_PRIMITIVE_TOPOLOGY_TRIANGLELIST);
d3d11_device_context->DrawIndexed(skn.num_indices, 0, 0);
// switch the back buffer and the front buffer
dxgi_swapchain->Present(0, 0);
}
buffer update function
void update() {
// copy the vertices into the buffer
D3D11_MAPPED_SUBRESOURCE ms;
d3d11_device_context->Map(vertex_buffer, NULL, D3D11_MAP_WRITE_DISCARD, NULL, &ms); // map the buffer
memcpy(ms.pData, skn_vertex_buffer, sizeof(lol_skn_vertex) * skn.num_vertices); // copy the data
// unmap the buffer
SYSTEMTIME SystemTime;
GetSystemTime(&SystemTime);
float angle = (float)SystemTime.wMilliseconds/1000+SystemTime.wSecond;
D3DXMATRIX x;
D3DXMatrixRotationY(&x, angle);
D3DXVec4TransformArray((D3DXVECTOR4 *)ms.pData, sizeof(lol_skn_vertex), (D3DXVECTOR4 *)ms.pData, sizeof(lol_skn_vertex), &x, skn.num_vertices);
// use D3DXVECTOR4 for Transform
d3d11_device_context->Unmap(vertex_buffer, NULL);
}
At last I solved the problem.
I make 2 very stupid mistake.
first: in "input layout object", i use DXGI_FORMAT_R32G32_FLOAT for position, which only contain x and y. so the shader always get 0 on z.
second: my model data is not normalized, which ranged from -50 to 50, so i use D3D11_RASTERIZER_DESC to disable DepthClip and I forgot about it.
Fix this 2 problems and everything works.
And great thank to Gnietschow :)

Resources