I've written a basic 2d pixel shader, and i can't seem to get it to work. If i draw with the effect active, then nothing draws to the screen. But if i disable it, then the texture draws to the screen as expected.
My aim is to be able to draw an arbitrary texture to the screen, then have this pixel shader "carve" circular hunks of pixels out of it, for use in an overlay system for ranges and such.
Here's my pixel shader code:
sampler TextureSampler : register(s0);
//A list of positions for circles. They are specified in texture space rather than screen space.
float2 PositionData[64];
//A matching list of radiuses. These are specified in pixels, though.
float Radii[64];
//how much of the array is filled with data.
int DataSize;
//the size of the texture being drawn.
float2 TextureSize;
float4 RenderSolidCircles(float2 texCoord : TEXCOORD0) : COLOR
{
float opacityAcc = 1;
float2 screenSpaceTexCoord = texCoord * TextureSize;
for (int i = 0; i < DataSize; i++)
{
float2 properPosCoordinate = PositionData[i] * TextureSize;
float dist = length(screenSpaceTexCoord - properPosCoordinate) - Radii[i];
if (dist < 0)
{
opacityAcc -= min(abs(dist), 1);
}
}
opacityAcc = max(0, opacityAcc);
float4 outPix = tex2D(TextureSampler, texCoord);
outPix.a *= opacityAcc;
return outPix;
}
technique SolidCircles
{
pass P0
{
PixelShader = compile ps_3_0 RenderSolidCircles();
}
}
float4 PassThrough(float2 texCoord : TEXCOORD0) : COLOR
{
return tex2D(TextureSampler, texCoord);
}
technique PassThrough
{
pass P0
{
PixelShader = compile ps_3_0 PassThrough();
}
}
Here's the ASM version of the SolidCircles technique:
//
// Generated by Microsoft (R) D3DX9 Shader Compiler 9.15.779.0000
//
// Parameters:
//
// int DataSize;
// float2 PositionData[64];
// float Radii[64];
// sampler2D TextureSampler;
// float2 TextureSize;
//
//
// Registers:
//
// Name Reg Size
// -------------- ----- ----
// PositionData c0 64
// Radii c64 64
// DataSize c128 1
// TextureSize c129 1
// TextureSampler s0 1
//
//
// Default values:
//snipped comments here
ps_3_0
def c130, 1, 0, -1, 2
def c131, 3, 4, 5, 6
def c132, 7, 8, 9, 10
def c133, 11, 12, 13, 14
def c134, 15, 16, 17, 18
def c135, 19, 20, 21, 22
def c136, 23, 24, 25, 26
def c137, 27, 28, 29, 30
def c138, 31, 32, 33, 34
def c139, 35, 36, 37, 38
def c140, 39, 40, 41, 42
def c141, 43, 44, 45, 46
def c142, 47, 48, 49, 50
def c143, 51, 52, 53, 54
def c144, 55, 56, 57, 58
def c145, 59, 60, 61, 62
def c146, 63, 0, 0, 0
dcl_texcoord v0.xy // texCoord<0,1>
dcl_2d s0
#line 22 "C:\Users\RCIX\Documents\Visual Studio 2008\Projects\2DFXFilesTest\2DFXFilesTest\Content\OverlayFx.fx"
mov r0.w, c130.x // opacityAcc<0>
mul r2.xy, v0, c129 // screenSpaceTexCoord<0,1>
mov r5.w, -c128.x
add r0.z, r5.w, c130.y
cmp r12.w, r0.z, c130.y, c130.x
mul r11.w, r12.w, c130.x
if_ne r11.w, -r11.w
mov r13.xy, c129 // ::TextureSize<0,1>
mul r12.xy, r13, c0 // properPosCoordinate<0,1>
mov r12.xy, -r12
add r11.xy, r2, r12
mul r16.xy, r11, r11
add r11.z, r16.x, r16.y
rsq r10.w, r11.z
rcp r8.w, r10.w
mov r9.w, -c64.x
add r4.w, r8.w, r9.w // dist<0>
add r7.w, r4.w, c130.y
cmp r6.w, r7.w, c130.y, c130.x
mov r3.w, -r4.w
mov r5.z, -r3.w
add r1.w, r4.w, r5.z
cmp r15.w, r1.w, r4.w, r3.w
add r14.w, r15.w, c130.z
cmp r2.w, r14.w, c130.x, r15.w
mov r2.w, -r2.w
add r13.w, r2.w, c130.x // opacityAcc<0>
mov r6.w, -r6.w
cmp r0.w, r6.w, r0.w, r13.w // opacityAcc<0>
#line 24
endif
//snipped 63 blocks of unrolled loop code
#line 33
mov r1.w, -r0.w
add r15.w, r1.w, c130.y
cmp r14.w, r15.w, c130.y, r0.w // opacityAcc<0>
texld r0, v0, s0 // outPix<0,1,2,3>
mul r2.x, r14.w, r0.w // outPix<3>
mov oC0.xyz, r0 // ::RenderSolidCircles<0,1,2>
mov oC0.w, r2.x // ::RenderSolidCircles<3>
// approximately 1866 instruction slots used (1 texture, 1865 arithmetic)
and here's the relevant portion of my Draw function in my Game class:
GraphicsDevice.Clear(Color.CornflowerBlue);
overlayEffect.Parameters["PositionData"].SetValue(Positions.ToArray());
overlayEffect.Parameters["Radii"].SetValue(Radii.ToArray());
overlayEffect.Parameters["DataSize"].SetValue(64);
overlayEffect.Parameters["TextureSize"].SetValue(new Vector2(500));
spriteBatch.Begin(SpriteBlendMode.AlphaBlend,
SpriteSortMode.Immediate,
SaveStateMode.None);
overlayEffect.Begin();
overlayEffect.CurrentTechnique.Passes[0].Begin();
spriteBatch.Draw(pixTex, new Rectangle(0, 0, 500, 500), Color.White);
spriteBatch.End();
overlayEffect.CurrentTechnique.Passes[0].End();
overlayEffect.End();
base.Draw(gameTime);
Finally, here's my function that builds the list of positions and radii:
private void RebuildPositionsList()
{
spriteBatch = new SpriteBatch(GraphicsDevice);
Positions = new List<Vector2>();
Radii = new List<float>();
for (int i = 0; i < 64; i++)
{
Positions.Add(
new Vector2(
(float)r.NextDouble(),
(float)r.NextDouble())
);
Radii.Add(((float)r.NextDouble() * 100) + 40);
}
}
The lines that make my texture:
pixTex = new Texture2D(GraphicsDevice, 1, 1);
pixTex.SetData<Color>(new Color[] { new Color(0f, 0f, 0f, 1f) });
Positions and Radii are Lists of Vectors and floats respectively, of size 64. pixTex is a 1 pixel solid black texture.
Why does the shader not work?
So in the comments for this answer it was determined that yet another problem here is that the vertex shader for Sprite Batch in XNA 3.1 is vs_1_1. And strictly speaking you cannot mix SM 3.0 vertex or pixel shaders with shaders of different versions. It seems that, in practice, most cards will let you get away with it, but apparently RCIX's card (a Radeon HD 4850) will not.
(This is why it's worth having the DirectX debug runtimes on hand (from the SDK), as they will warn you about things like this. You can use DebugView to view its output.)
There are a number of solutions to this issue:
1) By far the easiest solution is to upgrade to XNA 4.0 (the downsides are the breaking changes and the fact that it's currently only in beta). In this version of XNA you can easily specify your own vertex shader for SpriteBatch.
2) You could use a custom vertex shader with SpriteBatch in XNA 3.1, it's just not as easy. A good starting point would be the source code for the vertex shader used by XNA (up until XNA 4.0, see above).
3) Finally: perhaps you could just make your shader use ps_2_0? Do you really need 64 cutouts per texture?
OK, first of all you need to apply the two fixes in my other answer (end the batch before ending the effect, and (from comments) use SpriteBlendMode.AlphaBlend).
Now - the problem in your shader. It's actually working - but possibly not in the way you expect. It seems you're confusing the screen-space coordinates your sprite is drawn at (width = 500, height = 500), with the texture-space coordinates that your pixel shader works in (width = 1, height = 1).
So first of all - when you cut holes in your sprite you need to do it in texture-space, like so:
Positions.Add(new Vector2(0.5f, 0.5f));
Radii.Add(0.25f);
Positions.Add(new Vector2(0.25f, 0.25f));
Radii.Add(0.1f);
And second of all, what looks like an attempt at anti-aliasing is causing your cut outs to be more of a fade-out. It needs to take into account the size of the texture as it is drawn on screen. The easiest fix is to change this line:
opacityAcc -= min(abs(dist), 1);
To this:
opacityAcc -= min(abs(dist * 500), 1);
Of course - this assumes that your sprite is drawn at 500 by 500. You should pass the actual value in as a shader parameter.
If you're going to draw your sprite non-square, then you'll need to do a little extra maths to make the coordinate systems "line up". I'll leave that as an exercise.
Before calling effect.End() you need to call spriteBatch.End().
The reason for this (and the fact that it is "fixed" in XNA 4.0) is described in this article on Shawn Hargreaves' blog. (This entry may also be worth reading.)
Basically (in XNA 3.1): SpriteSortMode.Immediate is not as immediate as you might expect it to be. You need to call spriteBatch.End() to actually push the final batch of sprites to the GPU, before you end your effect.
The Sprite Effects sample shows how to correctly apply effects to sprites.
Related
Updated with more explanation around my confusion
(This is how a non-graphics developer imagines the rendering process!)
I specify a 2x2 sqaure to be drawn in by way of two triangles. I'm going to not talk about the triangle anymore. Square is a lot better. Let's say the square gets drawn in one piece.
I have not specified any units for my drawing. The only places in my code that I do something like that is: canvas size (set to 1x1 in my case) and the viewport (i always set this to the dimensions of my output texture).
Then I call draw().
What happens is this: that regardless of the size of my texture (being 1x1 or 10000x10000) all my texels are filled with data (color) that I returned from my frag shader. This is working each time perfectly.
So now I'm trying to explain this to myself:
The GPU is only concerned with coloring the pixels.
Pixel is the smallest unit that the GPU deals with (colors).
Depending on how many pixels my 2x2 square is mapped to, I should be running into one of the following 3 cases:
The number of pixels (to be colored) and my output texture dims match one to one: In this ideal case, for each pixel, there would be one value assigned to my output texture. Very clear to me.
The number of pixels are fewer than my output texture dims. In this case, I should expect that some of the output texels to have exact same value (which is the color of the pixel the fall under). For instance if the GPU ends up drawing 16x16 pixels and my texture is 64x64 then I'll have blocks of 4 texel which get the same value. I have not observed such case regardless of the size of my texture. Which means there is never a case where we end up with fewer pixels (really hard to imagine -- let's keep going)
The number of pixels end up being more than the number of texels. In this case, the GPU should decide which value to assign to my texel. Would it average out the pixel colors? If the GPU is coloring 64x64 pixels and my output texture is 16x16 then I should expect that each texel gets an average color of the 4x4 pixels it contains. Anyway, in this case my texture should be completely filled with values I didn't intend specifically for them (like averaged out) however this has not been the case.
I didn't even talk about how many times my frag shader gets called because it didn't matter. The results would be deterministic anyway.
So considering that I have never run into 2nd and 3rd case where the values in my texels are not what I expected them the only conclusion I can come up with is that the whole assumption of the GPU trying to render pixels is actually wrong. When I assign an output texture to it (which is supposed to stretch over my 2x2 square all the time) then the GPU will happily oblige and for each texel will call my frag shader. Somewhere along the line the pixels get colored too.
But the above lunatistic explanation also fails to answer why I end up with no values in my texels or incorrect values if I stretch my geometry to 1x1 or 4x4 instead of 2x2.
Hopefully the above fantastic narration of the GPU coloring process has given you clues as to where I'm getting this wrong.
Original Post:
We're using WebGL for general computation. As such we create a rectangle and draw 2 triangles in it. Ultimately what we want is the data inside the texture mapped to this geometry.
What I don't understand is if I change the rectangle from (-1,-1):(1,1) to say (-0.5,-0.5):(0.5,0.5) suddenly data is dropped from the texture bound to the framebuffer.
I'd appreciate if someone makes me understand the correlations. The only places that real dimensions of the output texture come into play are the call to viewPort() and readPixels().
Below are relevant pieces of code for you to see what I'm doing:
... // canvas is created with size: 1x1
... // context attributes passed to canvas.getContext()
contextAttributes = {
alpha: false,
depth: false,
antialias: false,
stencil: false,
preserveDrawingBuffer: false,
premultipliedAlpha: false,
failIfMajorPerformanceCaveat: true
};
... // default geometry
// Sets of x,y,z (for rectangle) and s,t coordinates (for texture)
return new Float32Array([
-1.0, 1.0, 0.0, 0.0, 1.0, // upper left
-1.0, -1.0, 0.0, 0.0, 0.0, // lower left
1.0, 1.0, 0.0, 1.0, 1.0, // upper right
1.0, -1.0, 0.0, 1.0, 0.0 // lower right
]);
...
const geometry = this.createDefaultGeometry();
gl.bindBuffer(gl.ARRAY_BUFFER, buffer);
gl.bufferData(gl.ARRAY_BUFFER, geometry, gl.STATIC_DRAW);
... // binding to the vertex shader attribs
gl.vertexAttribPointer(positionHandle, 3, gl.FLOAT, false, 20, 0);
gl.vertexAttribPointer(textureCoordHandle, 2, gl.FLOAT, false, 20, 12);
gl.enableVertexAttribArray(positionHandle);
gl.enableVertexAttribArray(textureCoordHandle);
... // setting up framebuffer; I set the viewport to output texture dimensions (I think this is absolutely needed but not sure)
gl.bindTexture(gl.TEXTURE_2D, texture);
gl.bindFramebuffer(gl.FRAMEBUFFER, this.framebuffer);
gl.framebufferTexture2D(
gl.FRAMEBUFFER, // The target is always a FRAMEBUFFER.
gl.COLOR_ATTACHMENT0, // We are providing the color buffer.
gl.TEXTURE_2D, // This is a 2D image texture.
texture, // The texture.
0); // 0, we aren't using MIPMAPs
gl.viewport(0, 0, width, height);
... // reading from output texture
gl.bindTexture(gl.TEXTURE_2D, texture);
gl.framebufferTexture2D(
gl.FRAMEBUFFER, gl.COLOR_ATTACHMENT0, gl.TEXTURE_2D, texture,
0);
gl.readPixels(0, 0, width, height, gl.FLOAT, gl.RED, buffer);
new answer
I'm just saying the same thing yet again (3rd time?)
Copied from below
WebGL is destination based. That means it's going to iterate over the pixels of the line/point/triangle it's drawing and for each point call the fragment shader and ask 'what value should I store here`?
It's destination based. It's going to draw each pixel exactly once. For that pixel it's going to ask "what color should I make this"
destination based loop
for (let i = start; i < end; ++i) {
fragmentShaderFunction(); // must set gl_FragColor
destinationTextureOrCanvas[i] = gl_FragColor;
You can see in the loop above there is no setting any random destination. There is no setting any part of destination twice. It's just going to run from start to end and exactly once for each pixel in the destination between start and end ask what color it should make that pixel.
How to do you set start and end? Again, to make it simple let's assume a 200x1 texture so we can ignore Y. It works like this
vertexShaderFunction(); // must set gl_Position
const start = clipspaceToArrayspaceViaViewport(viewport, gl_Position.x);
vertexShaderFunction(); // must set gl_Position
const end = clipspaceToArrayspaceViaViewport(viewport, gl_Position.x);
for (let i = start; i < end; ++i) {
fragmentShaderFunction(); // must set gl_FragColor
texture[i] = gl_FragColor;
}
see below for clipspaceToArrayspaceViaViewport
What is viewport? viewport is what you set when you called `gl.viewport(x, y, width, height)
So, set gl_Position.x to -1 and +1, viewport.x to 0 and viewport.width = 200 (the width of the texture) then start will be 0, end will be 200
set gl_Position.x to .25 and .75, viewport.x to 0 and viewport.width = 200 (the width of the texture). The start will be 125 and end will be 175
I honestly feel like this answer is leading you down the wrong path. It's not remotely this complicated. You don't have to understand any of this to use WebGL IMO.
The simple answer is
You set gl.viewport to the sub rectangle you want to affect in your destination (canvas or texture it doesn't matter)
You make a vertex shader that somehow sets gl_Position to clip space coordinates (they go from -1 to +1) across the texture
Those clip space coordinates get converted to the viewport space. It's basic math to map one range to another range but it's mostly not important. It's seems intuitive that -1 will draw to the viewport.x pixel and +1 will draw to the viewport.x + viewport.width - 1 pixel. That's what "maps from clip space to the viewport settings means".
It's most common for the viewport settings to be (x = 0, y = 0, width = width of destination texture or canvas, height = height of destination texture or canvas)
So that just leaves what you set gl_Position to. Those values are in clip space just like it explains in this article.
You can make it simple by doing if you want by converting from pixel space to clip space just like it explains in this article
zeroToOne = someValueInPixels / destinationDimensions;
zeroToTwo = zeroToOne * 2.0;
clipspace = zeroToTwo - 1.0;
gl_Position = clipspace;
If you continue the articles they'll also show adding a value (translation) and multiplying by a value (scale)
Using just those 2 things and a unit square (0 to 1) you can choose any rectangle on the screen. Want to effect 123 to 127. That's 5 units so scale = 5, translation = 123. Then apply the math above to convert from pixels to clips space and you'll get the rectangle you want.
If you continue further though those articles you'll eventually get the point where that math is done with matrices but you can do that math however you want. It's like asking "how do I compute the value 3". Well, 1 + 1 + 1, or 3 + 0, or 9 / 3, or 100 - 50 + 20 * 2 / 30, or (7^2 - 19) / 10, or ????
I can't tell you how to set gl_Position. I can only tell you make up whatever math you want and set it to *clip space* and then give an example of converting from pixels to clipspace (see above) as just one example of some possible math.
old answer
I get that this might not be clear I don't know how to help. WebGL draws lines, points, or triangles two a 2D array. That 2D array is either the canvas, a texture (as a framebuffer attachment) or a renderbuffer (as a framebuffer attachment).
The size of the area is defined by the size of the canvas, texture, renderbuffer.
You write a vertex shader. When you call gl.drawArrays(primitiveType, offset, count) you're telling WebGL to call your vertex shader count times. Assuming primitiveType is gl.TRIANGLES then for every 3 vertices generated by your vertex shader WebGL will draw a triangle. You specify that triangle by setting gl_Position in clip space.
Assuming gl_Position.w is 1, Clip space goes from -1 to +1 in X and Y across the destination canvas/texture/renderbuffer. (gl_Position.x and gl_Position.y are divided by gl_Position.w) which is not really important for your case.
To convert back to actually pixels your X and Y are converted based on the settings of gl.viewport. Let's just do X
pixelX = ((clipspace.x / clipspace.w) * .5 + .5) * viewport.width + viewport.x
WebGL is destination based. That means it's going to iterate over the pixels of the line/point/triangle it's drawing and for each point call the fragment shader and ask 'what value should I store here`?
Let's translate that to JavaScript in 1D. Let's assume you have an 1D array
const dst = new Array(100);
Let's make a function that takes a start and end and sets values between
function setRange(dst, start, end, value) {
for (let i = start; i < end; ++i) {
dst[i] = value;
}
}
You can fill the entire 100 element array with 123
const dst = new Array(100);
setRange(dst, 0, 99, 123);
To set the last half of the array to 456
const dst = new Array(100);
setRange(dst, 50, 99, 456);
Let's change that to use clip space like coordinates
function setClipspaceRange(dst, clipStart, clipEnd, value) {
const start = clipspaceToArrayspace(dst, clipStart);
const end = clipspaceToArrayspace(dst, clipEnd);
for (let i = start; i < end; ++i) {
dst[i] = value;
}
}
function clipspaceToArrayspace(array, clipspaceValue) {
// convert clipspace value (-1 to +1) to (0 to 1)
const zeroToOne = clipspaceValue * .5 + .5;
// convert zeroToOne value to array space
return Math.floor(zeroToOne * array.length);
}
This function now works just like the previous one except takes clip space values instead of array indices
// fill entire array with 123
const dst = new Array(100);
setClipspaceRange(dst, -1, +1, 123);
Set the last half of the array to 456
setClipspaceRange(dst, 0, +1, 456);
Now abstract one more time. Instead of using the array's length use a setting
// viewport looks like `{ x: number, width: number} `
function setClipspaceRangeViaViewport(dst, viewport, clipStart, clipEnd, value) {
const start = clipspaceToArrayspaceViaViewport(viewport, clipStart);
const end = clipspaceToArrayspaceViaViewport(viewport, clipEnd);
for (let i = start; i < end; ++i) {
dst[i] = value;
}
}
function clipspaceToArrayspaceViaViewport(viewport, clipspaceValue) {
// convert clipspace value (-1 to +1) to (0 to 1)
const zeroToOne = clipspaceValue * .5 + .5;
// convert zeroToOne value to array space
return Math.floor(zeroToOne * viewport.width) + viewport.x;
}
Now to fill the entire array with 123
const dst = new Array(100);
const viewport = { x: 0, width: 100; }
setClipspaceRangeViaViewport(dst, viewport, -1, 1, 123);
Set the last half of the array to 456 there are now 2 ways. Way one is just like the previous using 0 to +1
setClipspaceRangeViaViewport(dst, viewport, 0, 1, 456);
You can also set the viewport to start half way through the array
const halfViewport = { x: 50, width: 50; }
setClipspaceRangeViaViewport(dst, halfViewport, -1, +1, 456);
I don't know if that was helpful or not.
The only other thing to add is instead of value replace that with a function that gets called every iteration to supply value
function setClipspaceRangeViaViewport(dst, viewport, clipStart, clipEnd, fragmentShaderFunction) {
const start = clipspaceToArrayspaceViaViewport(viewport, clipStart);
const end = clipspaceToArrayspaceViaViewport(viewport, clipEnd);
for (let i = start; i < end; ++i) {
dst[i] = fragmentShaderFunction();
}
}
Note this is the exact same thing that is said in this article and clearified somewhat in this article.
I perform a capture of Direct3D back buffer. When I download the pixels the image frame is flipped along its vertical axis.Is it possible to "tell" D3D to flip the frame when copying resource,or when creating target ID3D11Texture2D ?
Here is how I do it:
The texture into which I copy the frame buffer is created like this:
D3D11_TEXTURE2D_DESC description =
{
desc.BufferDesc.Width, desc.BufferDesc.Height, 1, 1,
DXGI_FORMAT_R8G8B8A8_UNORM,
{ 1, 0 }, // DXGI_SAMPLE_DESC
D3D11_USAGE_STAGING,//transder from GPU to CPU
0, D3D11_CPU_ACCESS_READ, 0
};
D3D11_SUBRESOURCE_DATA data = { buffer, desc.BufferDesc.Width * PIXEL_SIZE, 0 };
device->CreateTexture2D(&description, &data, &pNewTexture);
Then on each frame I do:
pSwapChain->GetBuffer(0, __uuidof(ID3D11Texture2D), reinterpret_cast< void** >(&pSurface));
pContext->CopyResource(pNewTexture, pSurface);
D3D11_MAPPED_SUBRESOURCE resource;
pContext->Map(pNewTexture, 0, D3D11_MAP_READ , 0, &resource);
//reading from resource.pData
//...
PS: I don't have a control of the rendering pipeline. I hook an external app with this code.
Also,I don't want to mess with the pixel buffer on the CPU, like reverse copy in a loop etc.. The low latency of the copy is high priority.
UPDATE:
I also tried this:
D3D11_BOX box;
box.left = 0;
box.right = desc.BufferDesc.Width;
box.top = desc.BufferDesc.Height;
box.bottom = 0;
box.front = 0;
box.back = 1;
pContext->CopySubresourceRegion(pNewTexture, 0, 0, 0, 0, pSurface, 0, &box);
Which causes the frame to be empty from its content.
Create a texture with D3D11_USAGE_DEAFULT, with CPUAccessFlags=0 and BindFlags=D3D11_BIND_SHADER_RESOURCE. CopyResource the swapchain's backbuffer to it. Create another texture with D3D11_BIND_RENDER_TARGET. Set it as a render target, set a pixel shader and draw a flipped quad using the first texture. Now you should be able to CopyResource the second texture to the staging texture that you use now. This should be faster than copying a flipped image data using the CPU. However, this solution would take more resources on the GPU and might be hard to setup in a hook.
All Direct3D mapped resources should be processed scanline-by-scanline, so just reverse the copy:
auto ptr = reinterpret_cast<const uint8_t>(resource.pData)
+ (desc.BufferDesc.Height - 1) * resource.RowPitch;
for(unsigned int y = 0; y < desc.BufferDesc.Height; ++y )
{
// do something with the data in ptr
// which is desc.BufferDesc.Width * BytesPerPixel(desc.Format) bytes
// i.e. DXGI_FORMAT_R8G8B8A8_UNORM would be desc.BufferDesc.Width * 4
ptr -= resource.RowPitch;
}
For lots of examples of working with Direct3D resources, see DirectXTex.
Heello, everyone!
I've been trying to write a script that uses GLSL to render a Mandelbrot set, but something weird is happening.
I call the effect functio like this:
vec4 effect( vec4 color, Image texture, vec2 texture_coords, vec2 screen_coords){
But, when I try to use the texture_coords values, say, like this:
vec2 c = vec2((texture_coords[0]-WD/2)/100, (texture_coords[1]-HT/2)/100);
It returns the same value for every pixel; if, on the other hand, I use screen_coords instead, it works, but I'm affraid that if I drag the window around it might fuzz with the results.
Why am I unable to retrieve texture_coords?
More insight on the program and the problems here
UPDATE
I have reworked the code, now it looks like this:
vec4 effect( vec4 color, Image texture, vec2 texture_coords, vec2 window_coords)
{
vec2 c = vec2( ( MinRe + window_coords[0] * ( MaxRe - MinRe ) / ( width + 1 ) ),
( MaxIm - window_coords[1] * ( MaxIm - MinIm ) / ( height + 1 ) )
);
vec2 z = c;
vec2 zn = vec2(0.0, 0.0);
int n_iter = 0;
while( (z[0]*z[0] + z[1]*z[1] < 4) && (n_iter < max_iter)) {
zn[0] = z[0]*z[0] - z[1]*z[1] + c[0];
zn[1] = 2* z[0]*z[1] + c[1];
z[0] = zn[0];
z[1] = zn[1];
n_iter++;
}
Which works beautifully. But when I use texture_coords instead of window_coords, the code returns the same value to every pixel, despite the fact that the texture I'm using is the same size of the window.
The problem is that some drawable objects of love.graphics don't set any texture coordinate if you don't load an image. So, instead of using draw.rectangle, you should use a Mesh:
A 2D polygon mesh used for drawing arbitrary textured shapes
In order to add a mesh object you can add to the load function:
function love.load()
width, height = love.graphics.getDimensions( )
local vertices = {
{
-- top-left corner
0, 0, -- position of the vertex
0, 0, -- texture coordinate at the vertex position
255, 0, 0, -- color of the vertex
},
{
-- top-right corner
width, 0,
1, 0, -- texture coordinates are in the range of [0, 1]
0, 255, 0
},
{
-- bottom-right corner
width, height,
1, 1,
0, 0, 255
},
{
-- bottom-left corner
0, height,
0, 1,
255, 255, 0
},
}
-- the Mesh DrawMode "fan" works well for 4-vertex Meshes.
mesh = love.graphics.newMesh(vertices, "fan")
-- ... other stuff here ...
end
and in the draw function:
function love.draw()
-- ...
love.graphics.draw(mesh,0,0)
-- ...
end
The complete code, considering your previous question and my answer to that, adding some lines to manage the coordinate tranformations become:
function love.load()
width, height = love.graphics.getDimensions( )
local vertices = {
{
-- top-left corner
0, 0, -- position of the vertex
0, 0, -- texture coordinate at the vertex position
255, 0, 0, -- color of the vertex
},
{
-- top-right corner
width, 0,
1, 0, -- texture coordinates are in the range of [0, 1]
0, 255, 0
},
{
-- bottom-right corner
width, height,
1, 1,
0, 0, 255
},
{
-- bottom-left corner
0, height,
0, 1,
255, 255, 0
},
}
mesh = love.graphics.newMesh(vertices, "fan")
GLSLShader = love.graphics.newShader[[
vec4 black = vec4(0.0, 0.0, 0.0, 1.0);
vec4 white = vec4(1.0, 1.0, 1.0, 1.0);
extern int max_iter;
extern vec2 size;
extern vec2 left_top;
vec4 clr(int n){
if(n == max_iter){return black;}
float m = float(n)/float(max_iter);
float r = float(mod(n,256))/32;
float g = float(128 - mod(n+64,127))/255;
float b = float(127 + mod(n,64))/255;
if (r > 1.0) {r = 1.0;}
else{
if(r<0){r = 0;}
}
if (g > 1.0) {g = 1.0;}
else{
if(g<0){g = 0;}
}
if (b > 1.0) {b = 1.0;}
else{
if(b<0){b = 0;}
}
return vec4(r, g, b, 1.0);
}
vec4 effect( vec4 color, Image texture, vec2 texture_coords, vec2 window_coords){
vec2 c = vec2(texture_coords[0]*size[0] + left_top[0],texture_coords[1]*size[1] - left_top[1]);
vec2 z = vec2(0.0,0.0);
vec2 zn = vec2(0.0,0.0);
int n_iter = 0;
while ( (z[0]*z[0] + z[1]*z[1] < 4) && (n_iter < max_iter) ) {
zn[0] = z[0]*z[0] - z[1]*z[1] + c[0];
zn[1] = 2*z[0]*z[1] + c[1];
z[0] = zn[0];
z[1] = zn[1];
n_iter++;
}
return clr(n_iter);
}
]]
end
function love.draw()
center_x = -0.5
center_y = 0.0
size_x = 3
size_y = size_x*height/width
GLSLShader:send("left_top",{center_x-size_x*0.5,center_y+size_y*0.5})
GLSLShader:send("size",{size_x,size_y})
GLSLShader:sendInt("max_iter",1024)
love.graphics.setShader(GLSLShader)
love.graphics.draw(mesh,0,0)
love.graphics.setShader()
end
But it's somewhat misguiding, because my texture was the size of the window, and it didn't work
Well, let's investigate that. You didn't exactly provide a lot of information, but let's look anyway.
(texture_coords[0]-WD/2)/100
What is that? Well, we know what texture_coords is. From the Love2D wiki:
The location inside the texture to get pixel data from. Texture coordinates are usually normalized to the range of (0, 0) to (1, 1), with the top-left corner being (0, 0).
So you subtract from this texture coordinate WD/2. You didn't bother mentioning what that WD value was. But regardless, you divide the result by 100.
So, what exactly is WD? Let's see if algebra can help:
val = (texture_coords[0]-WD/2)/100
val * 100 = texture_coords[0] - WD / 2
(val * 100) - texture_coords[0] = -WD / 2
-2 * ((val * 100) - texture_coords[0]) = WD
So, what is WD? Well, from this equation, I can determine... nothing. This equation seems to be gibberish.
I'm guessing you intend for WD to mean "width" (seriously, it's three more characters; you couldn't type that out?). Presumably, the texture's width. If so... the equation remains gibberish.
You're taking a value that ranges from [0, 1], then subtracting half of the texture width from it. What does that mean? Why divide by 100? Since the texture width is probably much larger than the largest value from texture_coords (aka: 1), the result of this is going to be basically -WD/200.
And unless you're rendering to a floating-point image, that's going to get clamped to the valid color range: [0, 1]. So all your values come out to be the same color: black.
Since you're talking about Mandelbrot and so forth, I suspect you're trying to generate values on the range [-1, 1] or whatever. And your equation might do that... if texture_coords weren't normalized texture coordinates on the range [0, 1]. You know, exactly like the Wiki says they are.
If you want to turn texture coordinates into the [-1, 1] range, it's really much simpler. This is why we use normalized texture coordinates:
vec2 c = (2 * texture_coord) - 1; //Vector math is good.
If you want that to be the [-100, 100] range, just multiply the result by 100.
I'm trying to write a pixel shader - I'd like to use Texture.SampleCmpLevelZero as this is usable in loop constructs where Texture.Sample is not.
I've constructed a texture and can sample it fine with Texture.Sample, but switching to SampleCmpLevelZero works for the first few frames, then goes blank, then rarely but intermittently renders correctly.
My scene is static (and the texture data too) - I'm rendering one quad and there is no camera movement of any kind - I can reproduce this reliably by just changing the single line in the PS shader function.
Has anyone seen this?
Thanks
SamplerState sampPointClamp
{
Filter = MIN_MAG_MIP_POINT;
AddressU = Clamp;
AddressV = Clamp;
};
SamplerComparisonState ShadowSampler
{
// sampler state
Filter = MIN_MAG_MIP_POINT;
AddressU = Clamp;
AddressV = Clamp;
// sampler comparison state
ComparisonFunc = LESS;
//ComparisonFilter = COMPARISON_MIN_MAG_MIP_POINT;
};
texture2D tex;
//on the fly full screen quad
PS_IN VS(uint id : SV_VertexID)
{
PS_IN ret;
ret.uv = float2( id & 1, (id & 2) >> 1 );
ret.pos = float4( ret.uv * float2( 2.0f, -2.0f ) + float2( -1.0f, 1.0f), 0.0f, 1.0f );
return ret;
}
float4 PS( PS_IN input ) : SV_Target
{
//return float4(tex.SampleCmpLevelZero(ShadowSampler, input.uv, 0), 0, 0, 1); // Does not work properly
return float4(tex.Sample(sampPointClamp, input.uv).x, 0, 0, 1); // Works fine
}
Sample should work in loops just fine:
float4 PSColUV(COLUV_PIXEL input) : SV_Target
{
float4 output;
for (int i = 0; i < 4; i++)
{
float f = float(i) / 256.0;
float2 uv = input.UV + float2(i,i);
output += g_txDiffuse.Sample(g_samLinear, uv);
}
return input.Col * output/4.0;
}
produces:
ps_4_0
dcl_sampler s0, mode_default
dcl_resource_texture2d (float,float,float,float) t0
dcl_input_ps linear v1.xyzw
dcl_input_ps linear v2.xy
dcl_output o0.xyzw
dcl_temps 3
0: mov r0.xyzw, l(0,0,0,0)
1: mov r1.x, l(0)
2: loop
3: ige r1.y, r1.x, l(4)
4: breakc_nz r1.y
5: itof r1.y, r1.x
6: add r1.yz, r1.yyyy, v2.xxyx
7: sample r2.xyzw, r1.yzyy, t0.xyzw, s0
8: add r0.xyzw, r0.xyzw, r2.xyzw
9: iadd r1.x, r1.x, l(1)
10: endloop
11: mul r0.xyzw, r0.xyzw, v1.xyzw
12: mul o0.xyzw, r0.xyzw, l(0.250000, 0.250000, 0.250000, 0.250000)
13: ret
Also, you do realise that you're doing a PCF lookup rather than a normal texture sample, and that this won't give you the data in the texture, but rather it's going to compare all the texel subsamples (e.g. 8 in bilinear) with your reference value (0), calculate 0 or 1 depending on if they're LESS or GREATEREQUAL to your reference value, the filter those boolean values into a number between 0 and 1
Reply to comment:
thanks - I think Sample can't be in a loop with a variable length or a
length not known at compile time (?). The error I got was " error
X4014: cannot have divergent gradient operations inside loops error:
There was an error compiling expression". On your other point - I do
want an exact sample - I thought that was what I'm getting - I'm just
trying to do some procedural texture generation using the texture
buffer as a table of values to let me compute the true texel value
based on (u,v) etc.. – AnonDev
http://msdn.microsoft.com/en-gb/library/windows/desktop/bb219848%28v=vs.85%29.aspx
"Interaction of Per-Pixel Flow Control With Screen Gradients"
Remember that pixels are executed in (at minimum) a 2x2 block. You can't have control flow that would cause some pixels to sample whilst others do not, nor can you have calculations inside control flow that would cause a sample operation to get different gradients.
(Well, you can, but you need to use SampleGrad for that. But! That's not what you want in this instance. )
You say "exact" sample. Do you mean that your resource only has a single mip map and you want to get each texel in the resource without filtering? (i.e. you were doing a point filter?). Given your explanation of the texture being a table of values, then I don't see why you would need the texture to be a mipchain, and only the top level contains useful info. In which case you can use SampleLevel() with a LOD of 0. This means there will be no divergence in the derivatives, as the sample op isn't using derivatives!
This is the same reason SampleCmpLevelZero works but SampleCmp will not :) If you are point sampling, then another good candidate would be Load(), as you give it exact texel positions as you can even use it on buffers. So if your texture look-up positions are based of the pixel (X,Y) for instance, then you can pass these straight into Load (after accounting for the half texel offset..).
Anyway, you really don't want to be using SampleCmp/LevelZero. It does the wrong thing that you're after! It's used for shadow maps and so on. Use SampleLevel with a LOD of 0 instead.
The problem was:
SamplerComparisonState ShadowSampler
{
// sampler state
Filter = MIN_MAG_MIP_POINT;
AddressU = Clamp;
AddressV = Clamp;
// sampler comparison state
ComparisonFunc = LESS;
//ComparisonFilter = COMPARISON_MIN_MAG_MIP_POINT;
};
It looks like there was a time when ComparisonFilter existed as an attribute (as it turns up in the docs) e.g. build 3/5/2013 of http://msdn.microsoft.com/en-gb/library/windows/desktop/bb509644(v=vs.85).aspx
but will not compile if present.
I fixed the above behaviour by changing the Filter attribute to have the value COMPARISON_MIN_MAG_MIP_POINT - at that point it all worked
I'm trying to draw up to 200,000 squares on the screen. Or a lot of squares basically. I believe I'm just calling way to many draw calls, and it's crippling the performance of the app. The squares only update when I press a button, so I don't necessarily have to update this every frame.
Here's the code i have now:
- (void)glkViewControllerUpdate:(GLKViewController *)controller
{
//static float transY = 0.0f;
//float y = sinf(transY)/2.0f;
//transY += 0.175f;
GLKMatrix4 modelview = GLKMatrix4MakeTranslation(0, 0, -5.f);
effect.transform.modelviewMatrix = modelview;
//GLfloat ratio = self.view.bounds.size.width/self.view.bounds.size.height;
GLKMatrix4 projection = GLKMatrix4MakeOrtho(0, 768, 1024, 0, 0.1f, 20.0f);
effect.transform.projectionMatrix = projection;
_isOpenGLViewReady = YES;
}
- (void)glkView:(GLKView *)view drawInRect:(CGRect)rect
{
if(_model.updateView && _isOpenGLViewReady)
{
glClear(GL_COLOR_BUFFER_BIT);
[effect prepareToDraw];
int pixelSize = _model.pixelSize;
if(!_model.isReady)
return;
//NSLog(#"UPDATING: %d, %d", _model.rows, _model.columns);
for(int i = 0; i < _model.rows; i++)
{
for(int ii = 0; ii < _model.columns; ii++)
{
ColorModel *color = [_model getColorAtRow:i andColumn:ii];
CGRect rect = CGRectMake(ii * pixelSize, i*pixelSize, pixelSize, pixelSize);
//[self drawRectWithRect:rect withColor:c];
GLubyte squareColors[] = {
color.red, color.green, color.blue, 255,
color.red, color.green, color.blue, 255,
color.red, color.green, color.blue, 255,
color.red, color.green, color.blue, 255
};
// NSLog(#"Drawing color with red: %d", color.red);
int xVal = rect.origin.x;
int yVal = rect.origin.y;
int width = rect.size.width;
int height = rect.size.height;
GLfloat squareVertices[] = {
xVal, yVal, 1,
xVal + width, yVal, 1,
xVal, yVal + height, 1,
xVal + width, yVal + height, 1
};
glEnableVertexAttribArray(GLKVertexAttribPosition);
glEnableVertexAttribArray(GLKVertexAttribColor);
glVertexAttribPointer(GLKVertexAttribPosition, 3, GL_FLOAT, GL_FALSE, 0, squareVertices);
glVertexAttribPointer(GLKVertexAttribColor, 4, GL_UNSIGNED_BYTE, GL_TRUE, 0, squareColors);
glDrawArrays(GL_TRIANGLE_STRIP, 0, 4);
glDisableVertexAttribArray(GLKVertexAttribPosition);
glDisableVertexAttribArray(GLKVertexAttribColor);
}
}
_model.updateView = YES;
}
First, do you really need to draw 200,000 squares? Your viewport only has 786,000 pixels total. You might be able to reduce the number of drawn objects without significantly impacting the overall quality of your scene.
That said, if these are smaller squares, you could draw them as points with a pixel size large enough to cover your square's area. That would require setting gl_PointSize in your vertex shader to the appropriate pixel width. You could then generate your coordinates and send them all to be drawn at once as GL_POINTS. That should remove the overhead of the extra geometry of the triangles and the individual draw calls you are using here.
Even if you don't use points, it's still a good idea to calculate all of the triangle geometry you need first, then send all that in a single draw call. This will significantly reduce your OpenGL ES API call overhead.
One other thing you could look into would be to use vertex buffer objects to store this geometry. If the geometry is static, you can avoid sending it on each drawn frame, or only update a part of it that has changed. Even if you just change out the data each frame, I believe using a VBO for dynamic geometry has performance advantages on the modern iOS devices.
Can you not try to optimize it somehow? I'm not terribly familiar with graphics type stuff, but I'd imagine that if you are drawing 200,000 squares chances that all of them are actually visible seems to be unlikely. Could you not add some sort of isVisible tag for your mySquare class that determines whether or not the square you want to draw is actually visible? Then the obvious next step is to modify your draw function so that if the square isn't visible, you don't draw it.
Or are you asking for someone to try to improve the current code you have, because if your performance is as bad as you say, I don't think making small changes to the above code will solve your problem. You'll have to rethink how you're doing your drawing.
It looks like what your code is actually trying to do is take a _model.rows × _model.columns 2D image and draw it upscaled by _model.pixelSize. If -[ColorModel getColorAtRow:andColumn:] is retrieving 3 bytes at a time from an array of color values, then you may want to consider uploading that array of color values into an OpenGL texture as GL_RGB/GL_UNSIGNED_BYTE data and letting the GPU scale up all of your pixels at once.
Alternatively, if scaling up the contents of your ColorModel is the only reason that you’re using OpenGL ES and GLKit, you might be better off wrapping your color values into a CGImage and allowing UIKit and Core Animation do the drawing for you. How often do the color values in the ColorModel get updated?