I have next function:
float4 blur(float rad, texture2d<float> tex2D, sampler sampler2D, float2 textureCoordinate){
float width = tex2D.get_width();
float height = tex2D.get_height();
float weight = 1 / ((2 * rad + 1) * (2 * rad + 1));
float4 blured_color = float4(0,0,0,0);
for(int i = -1 * rad; i <= rad; i++){
for (int j = -1 * rad; j <= rad; j++){
blured_color += tex2D.sample(sampler2D, textureCoordinate + float2(i/width, j/height)) * weight;
}
}
return blured_color;
}
It blurs given fragment.
My problem is that, when I call this function it doesn't work properly - it just make picture darker. But when I write the same code without wrapping it in function it works okay:
fragment float4 blured_background_fragment(VertexOut interpolated [[ stage_in ]],
texture2d<float> tex2D [[ texture(0) ]],
sampler sampler2D [[ sampler(0) ]])
{
float4 color = tex2D.sample(sampler2D, interpolated.textureCoordinate);
float3 color3 = float3(color[0] , color[1] , color[2]);
if (is_skin(color3) && !(interpolated.color[0] == 1 && interpolated.color[1] == 1 && interpolated.color[2] == 1)){
float width = tex2D.get_width();
float height = tex2D.get_height();
float rad = 13;
float weight = 1 / ((2 * rad + 1) * (2 * rad + 1));
float4 blured_color = float4(0,0,0,0);
for(int i = -1 * rad; i <= rad; i++){
for (int j = -1 * rad; j <= rad; j++){
blured_color += tex2D.sample(sampler2D, interpolated.textureCoordinate + float2(i/width, j/height)) * weight;
}
}
// Here I try to call this blur function
// float4 blured_color = blur(13, tex2D, sampler2D, interpolated.textureCoordinate);
return blured_color * 0.43 + color * 0.57;
}
else{
return tex2D.sample(sampler2D, interpolated.textureCoordinate);
}
}
Related
I got following message while profiling in the xcode. It suggest that i am using 32 bit floating point. In any case there is no issue for me to use 16 bit precision floating point. but If i do that it shows error.
how can i improve the performance following is the shader function. For texture sampling can we use half4.
struct VertexOutCalculatedTextureBezier {
float4 pos[[position]];
float4 color;
float2 textureCoordinates;
int8_t index;
};
struct BezierCalParameters
{
float2 a;
float angle;
float size;
int8_t index;
};
Vertex shader function which is suggested while profiling
vertex VertexOutCalculatedTextureBezier bezier_calculated_vertex_texture(constant BezierCalParameters *allParams[[buffer(0)]],
uint vertexId [[vertex_id]],
uint instanceId [[instance_id]])
{
BezierCalParameters params = allParams[instanceId];
VertexOutCalculatedTextureBezier vo;
vo.pos.xy = params.a;
if(vertexId % 4 == 0){
vo.pos.x = params.a.x + params.size * sinpi(params.angle) ;
vo.pos.y = params.a.y - params.size * cospi(params.angle);
vo.textureCoordinates = float2(0,0);
vo.index = params.index ;
}else if(vertexId % 4 == 1){
vo.pos.x = params.a.x + params.size * cospi(params.angle);
vo.pos.y = params.a.y + params.size * sinpi(params.angle);
vo.textureCoordinates = float2(0,1);;
vo.index = params.index ;
}
else if(vertexId % 4 == 2){
vo.pos.x = params.a.x - params.size * cospi(params.angle) ;
vo.pos.y = params.a.y - params.size * sinpi(params.angle);
vo.textureCoordinates = float2(1,0);
vo.index = params.index ;
}
else if(vertexId % 4 == 3){
vo.pos.x = params.a.x - params.size * sinpi(params.angle);
vo.pos.y = params.a.y + params.size * cospi(params.angle) ;
vo.textureCoordinates = float2(1,1);
vo.index = params.index ;
}
vo.pos.zw = float2(0, 1);
return vo;
}
The fragment function corresponding to this function
fragment half4 bezier_fragment_calculated_texture(VertexOutCalculatedTextureBezier params[[stage_in]],
texture2d<float, access::sample>texture [[texture(0)]],
texture2d<float, access::sample> texture2 [[texture(1)]],
texture2d<float, access::sample> texture3 [[texture(2)]])
{
constexpr sampler defaultSampler;
if(params.index == 0) {
float4 color =texture.sample(defaultSampler, params.textureCoordinates) * float4(1,0,0,0.68);
return half4(color);
}else if(params.index == 1) {
float4 color =texture2.sample(defaultSampler, params.textureCoordinates) * float4(1,0,0,0.68);
return half4(color);
}else if(params.index == 2) {
float4 color =texture3.sample(defaultSampler, params.textureCoordinates) * float4(1,0,0,0.68);
return half4(color);
}
float4 color =texture.sample(defaultSampler, params.textureCoordinates) * float4(1,0,0,0.68);
return half4(color);
}
Fragment Shader
vertex VertexOutBezier bezier_vertex(constant BezierParameters *allParams[[buffer(0)]],
// constant GlobalParameters& globalParams[[buffer(1)]],
uint vertexId [[vertex_id]],
uint instanceId [[instance_id]])
{
float t = (float) vertexId / 300;
rint(t);
BezierParameters params = allParams[instanceId];
float lineWidth = (1 - (((float) (vertexId % 2)) * 2.0)) * params.lineThickness;
float2 a = params.a;
float2 b = params.b;
float nt = 1.0f - t;
float nt_2 = nt * nt;
float nt_3 = nt_2 * nt;
float t_2 = t * t;
float t_3 = t_2 * t;
float2 point = a * nt_3 + params.p1 * nt_2 * t + params.p2 * nt * t_2 + b * t_3;
float2 tangent = -3.0 * a * nt_2 + params.p1 * (1.0 - 4.0 * t + 3.0 * t_2) + params.p2 * (2.0 * t - 3.0 * t_2) + 3 * b * t_2;
tangent = normalize(float2(-tangent.y, tangent.x));
VertexOutBezier vo;
vo.pos.xy = point + (tangent * (lineWidth / 3.0f));
vo.pos.zw = float2(0, 1);
vo.color = params.color ;
return vo;
}
My Fragment shader is
fragment float4 bezier_fragment(VertexOutBezier params[[stage_in]],
texture2d<float> texture [[texture(0)]]
)
{
constexpr sampler defaultSampler;
float4 canvasColor = texture.sample(defaultSampler, params.pos.xy);
return canvasColor;
}
Here i expect to get the pixel color of the texture. But here it is only getting single color. It is not getting the color of the texture according to its position.
Even when I do this in fragment I am getting the single color it is not varying with coordinates
fragment float4 bezier_fragment(VertexOutBezier params[[stage_in]],
texture2d<float> texture [[texture(0)]]
)
{
constexpr sampler defaultSampler;
float4 canvasColor = params.color * params.pos.x;
return canvasColor;
}
If I do this in Vertex Shader I got color varying according position of x
vo.pos.xy = point + (tangent * (lineWidth / 3.0f));
vo.pos.zw = float2(0, 1);
vo.color = params.color * vo.pos.x;
What is the Issue in fragment Shader. I cannot get the coordinates from Vertex Shader
Please make sure the VertexOutBezier.pos.xy value is normalization ( 0 ~ 1.0) ,due to the defaultSampler only receive normalization position value, if always return a single may be the position is beyond the bounds.
I am rather new to HLSL and I am struggling with implementing a grass shader.
In the geometry shader I create quads which will display the grass blades. However when I try blending in the pixelshader things get weird. Sometimes it ignores everything which is behind the quad. I'm assuming it's a problem with the depth stencil.
this is the result:
Here is my shader:
//************
// VARIABLES *
//************
cbuffer cbPerObject
{
float4x4 m_MatrixWorldViewProj : WORLDVIEWPROJECTION;
float4x4 m_MatrixWorld : WORLD;
float4x4 gMatrixViewInverse : VIEWINVERSE;
float3 m_LightDir = { 2.0f,-5.0f,0.0f };
}
RasterizerState FrontCulling
{
CullMode = NONE;
};
SamplerState samLinear
{
Filter = MIN_MAG_MIP_LINEAR;
AddressU = Wrap;// of Mirror of Clamp of Border
AddressV = Wrap;// of Mirror of Clamp of Border
};
BlendState EnableBlending
{
BlendEnable[0] = TRUE;
SrcBlend = SRC_ALPHA;
DestBlend = INV_SRC_ALPHA;
BlendOp = ADD;
SrcBlendAlpha = ZERO;
DestBlendAlpha = ZERO;
BlendOpAlpha = ADD;
RenderTargetWriteMask[0] = 0x0F;
};
DepthStencilState EnableDepth
{
// Depth test parameters
DepthEnable = true;
DepthWriteMask = all;
DepthFunc = less;
StencilEnable = false;
};
Texture2D m_TextureDiffuse<
string UIName = "Diffuse Texture";
string UIWidget = "Texture";
string ResourceName = "Grass.dds";
>;
Texture2D m_TextureDiffuseBlade<
string UIName = "Diffuse Texture Blade";
string UIWidget = "Texture";
string ResourceName = "GrassBladeDiffuse.dds";
>;
Texture2D m_PerlinNoise<
string UIName = "Perlin Texture";
string UIWidget = "Texture";
string ResourceName = "Perlin.dds";
>;
float gGrassHeight
<
string UIName = "Grass Height";
string UIWidget = "slider";
float UIMin = 0;
float UIMax = 10.0f;
float UIStep = 0.01;
> = 0.6f;
float gGrassHeightRandom
<
string UIName = "Grass Height Random";
string UIWidget = "slider";
float UIMin = 0;
float UIMax = 1.0f;
float UIStep = 0.01;
> = 1.0f;
float gGrassBend
<
string UIName = "Grass Bend";
string UIWidget = "slider";
float UIMin = 0;
float UIMax = 1.0f;
float UIStep = 0.01;
> = 1.0f;
int gGrassBlades
<
string UIName = "Grass Blades";
string UIWidget = "slider";
int UIMin = 1;
int UIMax = 5.0f;
int UIStep = 1;
> = 5;
float gGrassBladesSize
<
string UIName = "Grass Blades Size";
string UIWidget = "slider";
float UIMin = 0;
float UIMax = 1.0f;
float UIStep = 0.01;
> = 0.2f;
float gGrassSpread<
string UIName = "Grass Spread";
> = 5.0f;
float gTime;
//**********
// STRUCTS *
//**********
struct VS_DATA
{
float3 Position : POSITION;
float3 Normal : NORMAL;
float2 TexCoord : TEXCOORD;
};
struct GS_DATA
{
float4 Position : SV_POSITION;
float3 Normal : NORMAL;
float2 TexCoord : TEXCOORD0;
bool Blade : FALSE;
};
//****************
// VERTEX SHADER *
//****************
VS_DATA MainVS(VS_DATA vsData)
{
return vsData;
}
//******************
// GEOMETRY SHADER *
//******************
void CreateVertex(inout TriangleStream<GS_DATA> triStream, float3 pos, float3 normal, float2 texCoord, bool blade = true)
{
//Step 1. Create a GS_DATA object
GS_DATA temp = (GS_DATA)0;
//Step 2. Transform the position using the WVP Matrix and assign it to (GS_DATA object).Position (Keep in mind: float3 -> float4)
temp.Position = mul(float4(pos, 1), m_MatrixWorldViewProj);
//Step 3. Transform the normal using the World Matrix and assign it to (GS_DATA object).Normal (Only Rotation, No translation!)
temp.Normal = mul(normal, (float3x3)m_MatrixWorld);
//Step 4. Assign texCoord to (GS_DATA object).TexCoord
temp.TexCoord = texCoord;
//set if blade or not
temp.Blade = blade;
//Step 5. Append (GS_DATA object) to the TriangleStream parameter (TriangleStream::Append(...))
triStream.Append(temp);
}
float3x3 AngleAxis3x3(float angle, float3 axis)
{
float c, s;
sincos(angle, s, c);
float t = 1 - c;
float x = axis.x;
float y = axis.y;
float z = axis.z;
return float3x3(
t * x * x + c, t * x * y - s * z, t * x * z + s * y,
t * x * y + s * z, t * y * y + c, t * y * z - s * x,
t * x * z - s * y, t * y * z + s * x, t * z * z + c
);
}
[maxvertexcount(5*6*3 +3)]
//[instance(16)]
void GrassGenerator(triangle VS_DATA vertices[3], inout TriangleStream<GS_DATA> triStream)//, uint InstanceID : SV_GSInstanceID)
{
float3 basePoint, top;
//Step 1. Calculate The basePoint
basePoint = (vertices[0].Position + vertices[1].Position + vertices[2].Position) / 3;
//Step 2. Calculate The normal of the basePoint
float3 normal = normalize((vertices[0].Normal + vertices[1].Normal + vertices[2].Normal) / 3);
//orignal vertex
CreateVertex(triStream, vertices[0].Position, vertices[0].Normal, vertices[0].TexCoord, false);
CreateVertex(triStream, vertices[1].Position, vertices[1].Normal, vertices[1].TexCoord, false);
CreateVertex(triStream, vertices[2].Position, vertices[2].Normal, vertices[2].TexCoord, false);
triStream.RestartStrip();
float3 left, right, grassnormal;
for (int j = 0; j < gGrassBlades; j++)
{
float3 position = basePoint + float3(m_PerlinNoise.SampleLevel(samLinear, vertices[j].TexCoord, 0).y - 0.5f, m_PerlinNoise.SampleLevel(samLinear, vertices[j].TexCoord, 0).z - 0.5f, 0)*gGrassSpread;
top = position + (gGrassHeight * normal);
float3 grassDirection = float3(1, 0, 0) * gGrassBladesSize;
float xAngle = 0.0f;
for (int i = 0; i < 3; i++)
{
float3x3 rotation = AngleAxis3x3(xAngle, normal);
grassDirection = mul(grassDirection, rotation);
//Step 5. Calculate The Normal of the grass
float3 leftEdge, rightEdge;
leftEdge = (position - grassDirection) - top;
rightEdge = (position + grassDirection) - top;
grassnormal = normalize(cross(leftEdge, rightEdge));
//Create Spike Geometry
CreateVertex(triStream, top - grassDirection, grassnormal, float2(0, 0));
CreateVertex(triStream, position - grassDirection, grassnormal, float2(0, 1));
CreateVertex(triStream, position + grassDirection, grassnormal, float2(1, 1));
triStream.RestartStrip();
CreateVertex(triStream, top + grassDirection, grassnormal, float2(1, 0));
CreateVertex(triStream, position + grassDirection, grassnormal, float2(1, 1));
CreateVertex(triStream, top - grassDirection, grassnormal, float2(0, 0));
triStream.RestartStrip();
static const float PI = 3.14159265f;
xAngle = 2 * PI / 3;
}
}
}
//***************
// PIXEL SHADER *
//***************
float4 MainPS(GS_DATA input) : SV_TARGET
{
input.Normal = -normalize(input.Normal);
float alpha;
float3 color;
if (input.Blade) {
alpha = m_TextureDiffuseBlade.Sample(samLinear,input.TexCoord).a;
color = m_TextureDiffuseBlade.Sample(samLinear,input.TexCoord).rgb;
}
else {
alpha = m_TextureDiffuse.Sample(samLinear,input.TexCoord).a;
color = m_TextureDiffuse.Sample(samLinear,input.TexCoord).rgb;
}
float s = max(dot(m_LightDir, input.Normal), 0.4f);
return float4(color*s,alpha);
}
//*************
// TECHNIQUES *
//*************
technique10 DefaultTechnique
{
pass p0 {
SetDepthStencilState(EnableDepth, 0);
SetBlendState(EnableBlending, float4(0.0f, 0.0f, 0.0f, 0.0f), 0xFFFFFFFF);
SetRasterizerState(FrontCulling);
SetVertexShader(CompileShader(vs_4_0, MainVS()));
SetGeometryShader(CompileShader(gs_5_0, GrassGenerator()));
SetPixelShader(CompileShader(ps_4_0, MainPS()));
}
}
I've been porting Shadertoy shaders to Metal in order to learn how to write Metal shaders. I don't think I'm doing it correctly as I have been writing every one of my shaders as a compute shader, rather than vertex/fragment shaders. This has worked for quite a few shaders I've ported, almost 20. However some ports are extremely slow, and others include functions that aren't available.
Here is one of the shaders that is tripping me up:
https://www.shadertoy.com/view/4t2SRh
The fwidth() call in render() and mainImage() is not allowed within a metal compute shader. Metal Shader Language does however have fwidth(), but it can only be called within a fragment shader.
Here is my attempt at porting to a compute shader:
#include <metal_stdlib>
using namespace metal;
float float_mod(float f1, float f2) {
return f1-f2 * floor(f1/f2);
}
float sdfCircle(float2 center, float radius, float2 coord )
{
float2 offset = coord - center;
return sqrt((offset.x * offset.x) + (offset.y * offset.y)) - radius;
}
float sdfEllipse(float2 center, float a, float b, float2 coord)
{
float a2 = a * a;
float b2 = b * b;
return (b2 * (coord.x - center.x) * (coord.x - center.x) +
a2 * (coord.y - center.y) * (coord.y - center.y) - a2 * b2)/(a2 * b2);
}
float sdfLine(float2 p0, float2 p1, float width, float2 coord)
{
float2 dir0 = p1 - p0;
float2 dir1 = coord - p0;
float h = clamp(dot(dir0, dir1)/dot(dir0, dir0), 0.0, 1.0);
return (length(dir1 - dir0 * h) - width * 0.5);
}
float sdfUnion( const float a, const float b )
{
return min(a, b);
}
float sdfDifference( const float a, const float b)
{
return max(a, -b);
}
float sdfIntersection( const float a, const float b )
{
return max(a, b);
}
float anti(float d) {
return fwidth(d) * 1.0;
}
float4 render(float d, float3 color, float stroke)
{
//stroke = fwidth(d) * 2.0;
float anti = fwidth(d) * 1.0;
float4 strokeLayer = float4(float3(0.05), 1.0-smoothstep(-anti, anti, d - stroke));
float4 colorLayer = float4(color, 1.0-smoothstep(-anti, anti, d));
if (stroke < 0.000001) {
return colorLayer;
}
return float4(mix(strokeLayer.rgb, colorLayer.rgb, colorLayer.a), strokeLayer.a);
}
kernel void compute(texture2d<float, access::write> output [[texture(0)]],
texture2d<float, access::sample> input [[texture(1)]],
constant float &timer [[buffer(0)]],
uint2 gid [[thread_position_in_grid]])
{
float4 fragColor;
int width = output.get_width();
int height = output.get_height();
float2 resolution = float2(width,height);
float2 uv = float2(gid) / resolution;
float size = min(resolution.x, resolution.y);
float pixSize = 1.0 / size;
float stroke = pixSize * 1.5;
float2 center = float2(0.5, 0.5 * resolution.y/resolution.x);
float a = sdfEllipse(float2(0.5, center.y*2.0-0.34), 0.25, 0.25, uv);
float b = sdfEllipse(float2(0.5, center.y*2.0+0.03), 0.8, 0.35, uv);
b = sdfIntersection(a, b);
float4 layer1 = render(b, float3(0.32, 0.56, 0.53), fwidth(b) * 2.0);
// Draw strips
float4 layer2 = layer1;
float t, r0, r1, r2, e, f;
float2 sinuv = float2(uv.x, (sin(uv.x*40.0)*0.02 + 1.0)*uv.y);
for (float i = 0.0; i < 10.0; i++) {
t = float_mod(timer + 0.3 * i, 3.0) * 0.2;
r0 = (t - 0.15) / 0.2 * 0.9 + 0.1;
r1 = (t - 0.15) / 0.2 * 0.1 + 0.9;
r2 = (t - 0.15) / 0.2 * 0.15 + 0.85;
e = sdfEllipse(float2(0.5, center.y*2.0+0.37-t*r2), 0.7*r0, 0.35*r1, sinuv);
f = sdfEllipse(float2(0.5, center.y*2.0+0.41-t), 0.7*r0, 0.35*r1, sinuv);
f = sdfDifference(e, f);
f = sdfIntersection(f, b);
float4 layer = render(f, float3(1.0, 0.81, 0.27), 0.0);
layer2 = mix(layer2, layer, layer.a);
}
// Draw the handle
float bottom = 0.08;
float handleWidth = 0.01;
float handleRadius = 0.04;
float d = sdfCircle(float2(0.5-handleRadius+0.5*handleWidth, bottom), handleRadius, uv);
float c = sdfCircle(float2(0.5-handleRadius+0.5*handleWidth, bottom), handleRadius-handleWidth, uv);
d = sdfDifference(d, c);
c = uv.y - bottom;
d = sdfIntersection(d, c);
c = sdfLine(float2(0.5, center.y*2.0-0.05), float2(0.5, bottom), handleWidth, uv);
d = sdfUnion(d, c);
c = sdfCircle(float2(0.5, center.y*2.0-0.05), 0.01, uv);
d = sdfUnion(c, d);
c = sdfCircle(float2(0.5-handleRadius*2.0+handleWidth, bottom), handleWidth*0.5, uv);
d = sdfUnion(c, d);
float4 layer0 = render(d, float3(0.404, 0.298, 0.278), stroke);
float2 p = (2.0*float2(gid).xy-resolution.xy)/min(resolution.y,resolution.x);
float3 bcol = float3(1.0,0.8,0.7-0.07*p.y)*(1.0-0.25*length(p));
fragColor = float4(bcol, 1.0);
fragColor.rgb = mix(fragColor.rgb, layer0.rgb, layer0.a);
fragColor.rgb = mix(fragColor.rgb, layer1.rgb, layer1.a);
fragColor.rgb = mix(fragColor.rgb, layer2.rgb, layer2.a);
fragColor.rgb = pow(fragColor.rgb, float3(1.0/2.2));
output.write(fragColor,gid);
}
This doesn't compile, as fwidth() is not available. However, if I do get rid of fwidth(), it will compile... but of course not draw the right thing.
I was wondering if there is a better way to port this to a fragment/vertex shader, so that I can use MSL's fwidth() ? Or is writing it as a compute shader fine, and I should find a different way around using fwidth() ?
I've got a pixelshader (below) that i'm using with XNA. On my laptop (crappy graphics card) it runs a little jerky, but ok. I've just tried running it on the xbox and it's horrible!
There's nothing to the game (it's just a fractal renderer) so it's got to be the pixel shader causing the issues. I also think it's the PS code because i've lowered the iterations and it's ok. I've also checked, and the GC delta is zero.
Are there any HLSL functions that are no-no's on the xbox?? I must be doing something wrong here, performance can't be that bad!
#include "FractalBase.fxh"
float ZPower;
float3 Colour;
float3 ColourScale;
float ComAbs(float2 Arg)
{
return sqrt(Arg.x * Arg.x + Arg.y * Arg.y);
}
float2 ComPow(float2 Arg, float Power)
{
float Mod = pow(Arg.x * Arg.x + Arg.y * Arg.y, Power / 2);
float Ang = atan2(Arg.y, Arg.x) * Power;
return float2(Mod * cos(Ang), Mod * sin(Ang));
}
float4 FractalPixelShader(float2 texCoord : TEXCOORD0, uniform float Iterations) : COLOR0
{
float2 c = texCoord.xy;
float2 z = 0;
float i;
float oldBailoutTest = 0;
float bailoutTest = 0;
for(i = 0; i < Iterations; i++)
{
z = ComPow(z, ZPower) + c;
bailoutTest = z.x * z.x + z.y * z.y;
if(bailoutTest >= ZPower * ZPower)
{
break;
}
oldBailoutTest = bailoutTest;
}
float normalisedIterations = i / Iterations;
float factor = (bailoutTest - oldBailoutTest) / (ZPower * ZPower - oldBailoutTest);
float4 Result = normalisedIterations + (1 / factor / Iterations);
Result = (i >= Iterations - 1) ? float4(0.0, 0.0, 0.0, 1.0) : float4(Result.x * Colour.r * ColourScale.x, Result.y * Colour.g * ColourScale.y, Result.z * Colour.b * ColourScale.z, 1);
return Result;
}
technique Technique1
{
pass
{
VertexShader = compile vs_3_0 SpriteVertexShader();
PixelShader = compile ps_3_0 FractalPixelShader(128);
}
}
Below is FractalBase.fxh:
float4x4 MatrixTransform : register(vs, c0);
float2 Pan;
float Zoom;
float Aspect;
void SpriteVertexShader(inout float4 Colour : COLOR0,
inout float2 texCoord : TEXCOORD0,
inout float4 position : SV_Position)
{
position = mul(position, MatrixTransform);
// Convert the position into from screen space into complex coordinates
texCoord = (position) * Zoom * float2(1, Aspect) - float2(Pan.x, -Pan.y);
}
EDIT I did try removing the conditional by using lots of lerps, however when i did that i got loads of artifacts (and not the kind that "belong in a museum"!). I changed things around, and fixed a few logic errors, however the key was to multiply the GreaterThan result by 1 + epsilon, to account for rounding errors just making 0.9999 = 0 (integer). See the fixed code below:
#include "FractalBase.fxh"
float ZPower;
float3 Colour;
float3 ColourScale;
float ComAbs(float2 Arg)
{
return sqrt(Arg.x * Arg.x + Arg.y * Arg.y);
}
float2 ComPow(float2 Arg, float Power)
{
float Mod = pow(Arg.x * Arg.x + Arg.y * Arg.y, Power / 2);
float Ang = atan2(Arg.y, Arg.x) * Power;
return float2(Mod * cos(Ang), Mod * sin(Ang));
}
float GreaterThan(float x, float y)
{
return ((x - y) / (2 * abs(x - y)) + 0.5) * 1.001;
}
float4 FractalPixelShader(float2 texCoord : TEXCOORD0, uniform float Iterations) : COLOR0
{
float2 c = texCoord.xy;
float2 z = 0;
int i;
float oldBailoutTest = 0;
float bailoutTest = 0;
int KeepGoing = 1;
int DoneIterations = Iterations;
int Bailout = 0;
for(i = 0; i < Iterations; i++)
{
z = lerp(z, ComPow(z, ZPower) + c, KeepGoing);
bailoutTest = lerp(bailoutTest, z.x * z.x + z.y * z.y, KeepGoing);
Bailout = lerp(Bailout, GreaterThan(bailoutTest, ZPower * ZPower), -abs(Bailout) + 1);
KeepGoing = lerp(KeepGoing, 0.0, Bailout);
DoneIterations = lerp(DoneIterations, min(i, DoneIterations), Bailout);
oldBailoutTest = lerp(oldBailoutTest, bailoutTest, KeepGoing);
}
float normalisedIterations = DoneIterations / Iterations;
float factor = (bailoutTest - oldBailoutTest) / (ZPower * ZPower - oldBailoutTest);
float4 Result = normalisedIterations + (1 / factor / Iterations);
Result = (DoneIterations >= Iterations - 1) ? float4(0.0, 0.0, 0.0, 1.0) : float4(Result.x * Colour.r * ColourScale.x, Result.y * Colour.g * ColourScale.y, Result.z * Colour.b * ColourScale.z, 1);
return Result;
}
technique Technique1
{
pass
{
VertexShader = compile vs_3_0 SpriteVertexShader();
PixelShader = compile ps_3_0 FractalPixelShader(128);
}
}
The xbox has a pretty large block size, so branching on the xbox isn't always so great. Also the compiler isn't always the most effective at emitting dynamic branches which your code seems to use.
Look into the branch attribute: http://msdn.microsoft.com/en-us/library/bb313972%28v=xnagamestudio.31%29.aspx
Also, if you move the early bailout, does the PC become more more similar to the Xbox?
Keep in mind that modern graphic cards are actually quite a bit faster then the Xenon unit by now.