XNA Projected texture in two directions (one is opposite direction) - xna

I created Projector with:
Matrix.CreateLookAt(position, direction, Vector3.Up);
Matrix.CreatePerspectiveFieldOfView(MathHelper.ToRadians(45), 1, 1, 2);
I pass to the shader multiplication of these matrices (in shader called View), then in shader I do:
float4 proj(float3 Position)
float4 texCoord = mul(float4(Position, 1.0), View);
texCoord.x = ( (texCoord.x / texCoord.w)/2) + 0.5;
texCoord.y = (-(texCoord.y / texCoord.w)/2) + 0.5;
return tex2D(shape, texCoord.xy);
uvw of texture is Clamped. I use it in light stage of deffered shading. Resulting image (red arrow is the correct direction):
What should I do to make it go only in correct direction?
The problem was back projection wich was simply solved:
float4 proj(float3 Position)
float4 texCoord = mul(float4(Position, 1.0), View);
if(texCoord.z < 0)
return 0;
texCoord.x = ( (texCoord.x / texCoord.w)/2) + 0.5;
texCoord.y = (-(texCoord.y / texCoord.w)/2) + 0.5;
return tex2D(shape, texCoord.xy);

Adding Light Falloff for multiple Point Lights

I'm currently trying to add multiple point lights to my game. What I have done appears to be mostly working, except for a small problem of blending light falloff. Here's two images to show you what's happening. In the first one, Light Falloff is commented out. Both point lights appear correctly.
And here's the second image, where I have light falloff enabled. You will see that only light #2 is "mostly" visible. There are traces of light #1, but for the most part, light #1 appears to be overridden by light #2's falloff. In other words, each consecutive light's falloff overrides the light from previous lights.
Does anyone know how to add falloff for multiple point lights? I'm sure I'm doing something slightly wrong, and that's why the lights are not properly accumulated.
Here's my shader:
struct Vertex
float4 pos : POSITION;
float2 tex : TEXTURE;
float3 norm : NORMAL;
struct PixelShaderArgs
float4 pos : SV_POSITION;
float2 col : TEXTURE;
float3 norm : NORMAL;
float3 worldPos : POSITION;
struct PointLightShaderArgs
float3 pos;
float radius;
float intensity;
float3 padding;
float4 ambient;
float4 diffuse;
Texture2D ShaderTexture : register(t0);
SamplerState Sampler : register(s0);
float4x4 localMatrix : register(b0);
cbuffer ShaderDataBuffer : register(b1)
float2 TextureResolution;
cbuffer cbPerFrame : register(b3)
PointLightShaderArgs light[8];
cbuffer WorldPositionBuffer : register(b4)
float4x4 World;
PixelShaderArgs VertexShaderMain(Vertex vertex)
PixelShaderArgs output;
output.pos = mul(vertex.pos, localMatrix);
output.col = vertex.tex;
output.norm = mul(vertex.norm, World);
output.worldPos = mul(vertex.pos, World);
return output;
int2 convertUVToPixel(float u, float v)
int width = TextureResolution.x;
int height = TextureResolution.y;
int xCoordinate = floor(u * width);
int yCoordinate = floor(v * height);
return int2(xCoordinate % width, yCoordinate % height);
float Falloff(float distance, float radius)
return clamp(1.0f - (distance / radius), 0.0, 1.0);
#define ATTENUATION_CONSTANT 1.0f // 0% Constant
#define ATTENUATION_LINEAR 0.0f // 100% Linear
#define ATTENUATION_QUADRATIC 0.0f // 100% Quadratic
float4 PixelShaderMain(PixelShaderArgs pixelShaderArgs) : SV_Target
float u = pixelShaderArgs.col.x;
float v = pixelShaderArgs.col.y;
// Lighting
float3 fragColor = float3(0.0f, 0.0f, 0.0f);
float4 diffuse = ShaderTexture.Load(int3(convertUVToPixel(u, v), 0));
for (int i = 0; i < 2; i++)
float3 ambient = diffuse * light[i].ambient;
pixelShaderArgs.norm = normalize(pixelShaderArgs.norm);
float3 lightToPixelVec = light[i].pos - pixelShaderArgs.worldPos;
float distance = length(lightToPixelVec);
float luminosity = dot(lightToPixelVec / distance, pixelShaderArgs.norm);
float intensity = 1.00f;
if (luminosity > 0.0f)
// Do lighting attenuation
fragColor += luminosity * diffuse * light[i].diffuse;
fragColor /= ATTENUATION_CONSTANT + (ATTENUATION_LINEAR * distance) + (ATTENUATION_QUADRATIC * (distance * distance));
fragColor *= light[i].intensity; // multiply the final result by the intensity.
fragColor *= Falloff(distance, light[i].radius); // This is what's causing the problem!!
//fragColor = saturate(fragColor + ambient);
return float4(fragColor, diffuse.a);
I figured this out. The solution was to move the falloff calculation up and inline it with the following line: fragColor += luminosity * diffuse * light[i].diffuse * Falloff(distance,light[i].radius);
This results the correcting falloff blending, shown in this picture:
and another picture showing three overlapped point lights:
Here's the updated shader (A lot of changes were made from the first one because I'm actually posting this answer late)
struct Vertex
float4 pos : POSITION;
float2 tex : TEXTURE;
float3 norm : NORMAL;
struct PixelShaderArgs
float4 pos : SV_POSITION;
float2 col : TEXTURE;
float3 norm : NORMAL;
float3 worldPos : POSITION;
struct PointLightShaderArgs
float3 pos;
float radius;
float intensity;
float3 padding;
float4 ambient;
float4 diffuse;
Texture2D ShaderTexture : register(t0);
SamplerState Sampler : register(s0);
float4x4 localMatrix : register(b0);
cbuffer ShaderDataBuffer : register(b1)
float2 TextureResolution;
cbuffer cbPerFrame : register(b3)
PointLightShaderArgs light[32];
cbuffer WorldPositionBuffer : register(b4)
float4x4 World;
PixelShaderArgs VertexShaderMain(Vertex vertex)
PixelShaderArgs output;
output.pos = mul(vertex.pos, localMatrix);
output.col = vertex.tex;
output.norm = mul(vertex.norm, World);
output.worldPos = mul(vertex.pos, World);
return output;
int2 convertUVToPixel(float u, float v)
int width = TextureResolution.x;
int height = TextureResolution.y;
int xCoordinate = floor(u * width);
int yCoordinate = floor(v * height);
return int2(xCoordinate % width, yCoordinate % height);
float Falloff(float distance, float radius)
return clamp(1.0f - (distance / radius), 0.0, 1.0);
#define ATTENUATION_CONSTANT 1.0f // 0% Constant
#define ATTENUATION_LINEAR 0.0f // 100% Linear
#define ATTENUATION_QUADRATIC 0.0f // 100% Quadratic
float4 PixelShaderMain(PixelShaderArgs pixelShaderArgs) : SV_Target
float u = pixelShaderArgs.col.x;
float v = pixelShaderArgs.col.y;
// Lighting
float3 fragColor = float3(0.0f, 0.0f, 0.0f);
float4 diffuse = ShaderTexture.Load(int3(convertUVToPixel(u, v), 0));
for (int i = 0; i < 32; i++)
float3 ambient = diffuse * light[i].ambient;
pixelShaderArgs.norm = normalize(pixelShaderArgs.norm);
float3 lightToPixelVec = light[i].pos - pixelShaderArgs.worldPos;
float distance = length(lightToPixelVec);
float luminosity = dot(lightToPixelVec / distance, pixelShaderArgs.norm);
float intensity = 1.00f;
if (luminosity > 0.0f)
// Do lighting attenuation
fragColor += luminosity * diffuse * light[i].diffuse * Falloff(distance,light[i].radius);
fragColor /= ATTENUATION_CONSTANT + (ATTENUATION_LINEAR * distance) + (ATTENUATION_QUADRATIC * (distance * distance));
fragColor *= light[i].intensity; // multiply the final result by the intensity.
fragColor = saturate(fragColor + ambient);
return float4(fragColor, diffuse.a);

Metal equivalent to OpenGL mix

I'm trying to understand what is the equivalent of mix OpenGL function in metal. This is the OpenGL code I'm trying to convert:
float udRoundBox( vec2 p, vec2 b, float r )
return length(max(abs(p)-b+r,0.0))-r;
void mainImage( out vec4 fragColor, in vec2 fragCoord )
// setup
float t = 0.2 + 0.2 * sin(mod(iTime, 2.0 * PI) - 0.5 * PI);
float iRadius = min(iResolution.x, iResolution.y) * (0.05 + t);
vec2 halfRes = 0.5 * iResolution.xy;
// compute box
float b = udRoundBox( fragCoord.xy - halfRes, halfRes, iRadius );
// colorize (red / black )
vec3 c = mix( vec3(1.0,0.0,0.0), vec3(0.0,0.0,0.0), smoothstep(0.0,1.0,b) );
fragColor = vec4( c, 1.0 );
I was able to convert part of it so far:
float udRoundBox( float2 p, float2 b, float r )
return length(max(abs(p)-b+r,0.0))-r;
float4 cornerRadius(sampler_h src) {
float2 greenCoord = src.coord(); // this is alreay in relative coords; no need to devide by image size
float t = 0.5;
float iRadius = min(greenCoord.x, greenCoord.y) * (t);
float2 halfRes = float2(greenCoord.x * 0.5, greenCoord.y * 0.5);
float b = udRoundBox( float2(greenCoord.x - halfRes.x, greenCoord.y - halfRes.y), halfRes, iRadius );
float3 c = mix(float3(1.0,0.0,0.0), float3(0.0,0.0,0.0), smoothstep(0.0,1.0,b) );
return float4(c, 1.0);
But it's producing green screen. I'm trying to achieve corner radius on a video like so:
The mix function is an implementation of linear interpolation, more frequently referred to as a Lerp function.
You can use linear interpolation where you have a value, let's say t and you want to know how that value maps within a certain range.
For example if I have three values:
a = 0
b = 1
t = 0.5
I could call mix(a,b,t) and my result would be 0.5. That is because the mix function expects a start range value, an end range value and a factor by which to interpolate, so I get 0.5 which is halfway between 0 and 1.
Looking at the documentation Metal has an implementation of mix that does a linear interpolation.
The problem is, that greenCoord (which was only a good variable name for the other question you asked, by the way) is the relative coordinate of the current pixel and has nothing to do with the absolute input resolution.
If you want a replacement for your iResolution, use src.size() instead.
And it seems you need your input coordinates in absolute (pixel) units. You can achieve that by adding a destination parameter to the inputs of your kernel like so:
float4 cornerRadius(sampler src, destination dest) {
const float2 destCoord = dest.coord(); // pixel position in the output buffer in absolute coordinates
const float2 srcSize = src.size();
const float t = 0.5;
const float radius = min(srcSize.x, srcSize.y) * t;
const float2 halfRes = 0.5 * srcSize;
const float b = udRoundBox(destCoord - halfRes, halfRes, radius);
const float3 c = mix(float3(1.0,0.0,0.0), float3(0.0,0.0,0.0), smoothstep(0.0,1.0,b) );
return float4(c, 1.0);

HLSL: Gaussian Blur Effect

I'm trying to achieve a gaussian blur using post-processing. I have two render passes; first pass renders the scene and the second is used for the effect.
This is my pixel shader code:
const float offset[] = {
0.0, 1.0, 2.0, 3.0, 4.0
const float weight[] = {
0.2270270270, 0.1945945946, 0.1216216216,
0.0540540541, 0.0162162162
ppColour = SceneTexture.Sample(PointSample, ppIn.UV) * weight[0];
float3 FragmentColor = float3(0.0f, 0.0f, 0.0f);
for (int i = 1; i < 5; i++) {
// Horizontal-pass
FragmentColor +=
SceneTexture.Sample(PointSample, ppIn.UV + float2(0.0f, offset[i]))*weight[i] +
SceneTexture.Sample(PointSample, ppIn.UV - float2(0.0f, offset[i]))*weight[i];
// Vertical-pass
FragmentColor +=
SceneTexture.Sample(PointSample, ppIn.UV + float2(offset[i], 0.0f))*weight[i] +
SceneTexture.Sample(PointSample, ppIn.UV - float2(offset[i], 0.0f))*weight[i];
ppColour += FragmentColor;
return (ppColour,1.0);
I get a string-y look, as seen:
What am I doing wrong?
I think you need to render horizontal and vertical pass separately using shader code such as below but with different direction (See dir uniform variable). So you need 3 steps
Render scene to texture A using default shader
Render texture A to texture B using gaussion blur shader horizontally (dir={1.0,0.0})
Render texture B to screen using same gaussion blur shader vertically (dir={0.0,1.0})
uniform vec2 dir;
const float offset[] = {0.0, 1.0, 2.0, 3.0, 4.0};
const float weight[] = {
0.2270270270, 0.1945945946, 0.1216216216,
0.0540540541, 0.0162162162
ppColour = SceneTexture.Sample(PointSample, ppIn.UV) * weight[0];
float3 FragmentColor = float3(0.0f, 0.0f, 0.0f);
//(1.0, 0.0) -> horizontal blur
//(0.0, 1.0) -> vertical blur
float hstep = dir.x;
float vstep = dir.y;
for (int i = 1; i < 5; i++) {
FragmentColor +=
SceneTexture.Sample(PointSample, ppIn.UV + float2(hstep*offset[i], vstep*offset[i]))*weight[i] +
SceneTexture.Sample(PointSample, ppIn.UV - float2(hstep*offset[i], vstep*offset[i]))*weight[i];
ppColour += FragmentColor;
return (ppColour,1.0);
See Efficient Gaussion Blur with Linear Sampling

DX 11 Compute Shader\SharpDX Deferrerd Tiled lighting, Point light problems

I have just finished porting my engine from XNA to SharpDX(DX11).
Everything is going really well and I have conquered most of my issues without having to ask for help until now and I'm really stuck, maybe I just need another set of eye to look over my code idk but here it is.
I'm implementing tile based lighting (point lights only for now), I'm basing my code off the Intel sample because it's not as messy as the ATI one.
So my problem is that the lights move with the camera, I have looked all over the place to find a fix and I have tried everything (am I crazy?).
I just made sure all my normal and light vectors are in view space and normalized (still the same).
I have tried with the inverse View, inverse Projection, a mix of the both and a few other bits from over the net but I can't fix it.
So here is my CPU code:
Dim viewSpaceLPos As Vector3 = Vector3.Transform(New Vector3(pointlight.PosRad.X, pointlight.PosRad.Y, pointlight.PosRad.Z), Engine.Camera.EyeTransform)
Dim lightMatrix As Matrix = Matrix.Scaling(pointlight.PosRad.W) * Matrix.Translation(New Vector3(pointlight.PosRad.X, pointlight.PosRad.Y, pointlight.PosRad.Z))
Here is my CS shader code:
void TileLightingCS(uint3 dispatchThreadID : SV_DispatchThreadID, uint3 GroupID : SV_GroupID, uint3 GroupThreadID : SV_GroupThreadID)
int2 globalCoords = dispatchThreadID.xy;
uint groupIndex = GroupThreadID.y * GROUP_WIDTH + GroupThreadID.x;
float minZSample = FrameBufferCamNearFar.x;
float maxZSample = FrameBufferCamNearFar.y;
float2 gbufferDim;
DepthBuffer.GetDimensions(gbufferDim.x, gbufferDim.y);
float2 screenPixelOffset = float2(2.0f, -2.0f) / gbufferDim;
float2 positionScreen = (float2(globalCoords)+0.5f) * screenPixelOffset.xy + float2(-1.0f, 1.0f);
float depthValue = DepthBuffer[globalCoords].r;
float3 positionView = ComputePositionViewFromZ(positionScreen, Projection._43 / (depthValue - Projection._33));
// Avoid shading skybox/background or otherwise invalid pixels
float viewSpaceZ = positionView.z;
bool validPixel = viewSpaceZ >= FrameBufferCamNearFar.x && viewSpaceZ < FrameBufferCamNearFar.y;
[flatten] if (validPixel)
minZSample = min(minZSample, viewSpaceZ);
maxZSample = max(maxZSample, viewSpaceZ);
// How many total lights?
uint totalLights, dummy;
InputBuffer.GetDimensions(totalLights, dummy);
// Initialize shared memory light list and Z bounds
if (groupIndex == 0)
sTileNumLights = 0;
sMinZ = 0x7F7FFFFF; // Max float
sMaxZ = 0;
if (maxZSample >= minZSample) {
InterlockedMin(sMinZ, asuint(minZSample));
InterlockedMax(sMaxZ, asuint(maxZSample));
float minTileZ = asfloat(sMinZ);
float maxTileZ = asfloat(sMaxZ);
// Work out scale/bias from [0, 1]
float2 tileScale = float2(FrameBufferCamNearFar.zw) * rcp(float(2 * GROUP_WIDTH));
float2 tileBias = tileScale - float2(GroupID.xy);
// Now work out composite projection matrix
// Relevant matrix columns for this tile frusta
float4 c1 = float4(Projection._11 * tileScale.x, 0.0f, tileBias.x, 0.0f);
float4 c2 = float4(0.0f, -Projection._22 * tileScale.y, tileBias.y, 0.0f);
float4 c4 = float4(0.0f, 0.0f, 1.0f, 0.0f);
// Derive frustum planes
float4 frustumPlanes[6];
// Sides
frustumPlanes[0] = c4 - c1;
frustumPlanes[1] = c4 + c1;
frustumPlanes[2] = c4 - c2;
frustumPlanes[3] = c4 + c2;
// Near/far
frustumPlanes[4] = float4(0.0f, 0.0f, 1.0f, -minTileZ);
frustumPlanes[5] = float4(0.0f, 0.0f, -1.0f, maxTileZ);
// Normalize frustum planes (near/far already normalized)
[unroll] for (uint i = 0; i < 4; ++i)
frustumPlanes[i] *= rcp(length(frustumPlanes[i].xyz));
// Cull lights for this tile
for (uint lightIndex = groupIndex; lightIndex < totalLights; lightIndex += (GROUP_WIDTH * GROUP_HEIGHT))
PointLight light = InputBuffer[lightIndex];
float3 lightVS = light.PosRad.xyz;// mul(float4(light.Pos.xyz, 1), View);
// Cull: point light sphere vs tile frustum
bool inFrustum = true;
for (uint i = 0; i < 6; ++i)
float d = dot(frustumPlanes[i], float4(lightVS, 1.0f));
inFrustum = inFrustum && (d >= -light.PosRad.w);
if (inFrustum)
uint listIndex;
InterlockedAdd(sTileNumLights, 1, listIndex);
sTileLightIndices[listIndex] = lightIndex;
uint numLights = sTileNumLights;
if (all(globalCoords < FrameBufferCamNearFar.zw))
float4 NormalMap = NormalBuffer[globalCoords];
float3 normal = DecodeNormal(NormalMap);
if (numLights > 0)
float3 lit = float3(0.0f, 0.0f, 0.0f);
for (uint tileLightIndex = 0; tileLightIndex < numLights; ++tileLightIndex)
PointLight light = InputBuffer[sTileLightIndices[tileLightIndex]];
float3 lDir = light.PosRad.xyz - positionView;
lDir = normalize(lDir);
float3 nl = saturate(dot(lDir, normal));
lit += ((light.Color.xyz * light.Color.a) * nl) * 0.1f;
PointLightColor[globalCoords] = float4(lit, 1);
PointLightColor[globalCoords] = 0;
So I know the culling works because there are lights drawn, they just move with the camera.
Could it be a handedness issue?
Am I setting my CPU light code up right?
Have I messed my spaces up?
What am I missing?
Am I reconstructing my position from depth wrong? (don't think it's this because the culling works)
ps. I write depth out like this:
VS shader
float4 viewSpacePos = mul(float4(input.Position,1), WV);
output.Depth=viewSpacePos.z ;
PS Shader
-input.Depth.x / FarClip

SSAOeffect.fx from the Screen Space Ambient Occlusion XNA 3.1 example doesn't work in XNA 4.0

I'm trying to convert the Screen Space Ambient Occlusion example from XNA 3.1 to XNA 4.0. I've fixed all the problems in the source, except this strange problem in a shader file. I've gone through and fixed all the obvious problems with the shader as guided by Shawn Hargreaves' blog, but when it compiles it uses up 620 instruction slots, which is well over the 512 instruction slot limit. How could this have worked in XNA 3.1, but not in XNA 4.0?
The changes from the 3.1 copy of the file are very minimal, and only consisted of renaming a few functions. below is the full shader source in it's current form. I'll be very grateful for any help in reducing the number instruction slots this compiles to.
float sampleRadius;
float distanceScale;
float4x4 Projection;
float3 cornerFustrum;
struct VS_OUTPUT
float4 pos : POSITION;
float2 TexCoord : TEXCOORD0;
float3 viewDirection : TEXCOORD1;
VS_OUTPUT VertexShaderFunction(
float4 Position : POSITION, float2 TexCoord : TEXCOORD0)
Out.pos = Position;
Position.xy = sign(Position.xy);
Out.TexCoord = (float2(Position.x, -Position.y) + float2( 1.0f, 1.0f ) ) * 0.5f;
float3 corner = float3(-cornerFustrum.x * Position.x,
cornerFustrum.y * Position.y, cornerFustrum.z);
Out.viewDirection = corner;
return Out;
texture depthTexture;
texture randomTexture;
sampler2D depthSampler = sampler_state
Texture = <depthTexture>;
sampler2D RandNormal = sampler_state
Texture = <randomTexture>;
float4 PixelShaderFunction(VS_OUTPUT IN) : COLOR0
float4 samples[16] =
float4(0.355512, -0.709318, -0.102371, 0.0 ),
float4(0.534186, 0.71511, -0.115167, 0.0 ),
float4(-0.87866, 0.157139, -0.115167, 0.0 ),
float4(0.140679, -0.475516, -0.0639818, 0.0 ),
float4(-0.0796121, 0.158842, -0.677075, 0.0 ),
float4(-0.0759516, -0.101676, -0.483625, 0.0 ),
float4(0.12493, -0.0223423, -0.483625, 0.0 ),
float4(-0.0720074, 0.243395, -0.967251, 0.0 ),
float4(-0.207641, 0.414286, 0.187755, 0.0 ),
float4(-0.277332, -0.371262, 0.187755, 0.0 ),
float4(0.63864, -0.114214, 0.262857, 0.0 ),
float4(-0.184051, 0.622119, 0.262857, 0.0 ),
float4(0.110007, -0.219486, 0.435574, 0.0 ),
float4(0.235085, 0.314707, 0.696918, 0.0 ),
float4(-0.290012, 0.0518654, 0.522688, 0.0 ),
float4(0.0975089, -0.329594, 0.609803, 0.0 )
IN.TexCoord.x += 1.0/1600.0;
IN.TexCoord.y += 1.0/1200.0;
normalize (IN.viewDirection);
float depth = tex2D(depthSampler, IN.TexCoord).a;
float3 se = depth * IN.viewDirection;
float3 randNormal = tex2D( RandNormal, IN.TexCoord * 200.0 ).rgb;
float3 normal = tex2D(depthSampler, IN.TexCoord).rgb;
float finalColor = 0.0f;
for (int i = 0; i < 16; i++)
float3 ray = reflect(samples[i].xyz,randNormal) * sampleRadius;
//if (dot(ray, normal) < 0)
// ray += normal * sampleRadius;
float4 sample = float4(se + ray, 1.0f);
float4 ss = mul(sample, Projection);
float2 sampleTexCoord = 0.5f * ss.xy/ss.w + float2(0.5f, 0.5f);
sampleTexCoord.x += 1.0/1600.0;
sampleTexCoord.y += 1.0/1200.0;
float sampleDepth = tex2D(depthSampler, sampleTexCoord).a;
if (sampleDepth == 1.0)
finalColor ++;
float occlusion = distanceScale* max(sampleDepth - depth, 0.0f);
finalColor += 1.0f / (1.0f + occlusion * occlusion * 0.1);
return float4(finalColor/16, finalColor/16, finalColor/16, 1.0f);
technique SSAO
pass P0
VertexShader = compile vs_3_0 VertexShaderFunction();
PixelShader = compile ps_3_0 PixelShaderFunction();
XNA 4.0 enforces the 512 instruction limit (which the xbox360 has and the HiDef profile enforces as a minumum), whereas XNA3.1 didn't.
On the plus side, any graphics card that can run the XNA HiDef profile shouldn't fall over, where as had XNA allowed any number of instructions, it may have done.
Since you have a loop in your code, you could try forcing the compiler to use loop instructions if it's currently unrolling it (not familiar with this myself).
If you are looking for an XNA 4 SSAO that has open source, check this link out : Deferred Rendering with SSAO Normals
reduce the number of samples in the shader from 16 to 8
