I want to input control points through the tessellation stages and output them as bent lines.
I expand the lines into billboarded quads in the geometry shader.
Right now I input a bunch of random vertices with a control point number of 4.
I assume the bending is done in the domain shader so I use one of the uv coordinates unique for that stage as a t value for a bezier function that takes in 4 world position coordinates.
However the lines remain straight. And I don't know what I am missing.
My code looks like this:
Domain Shader:
float3 bezier( float3 p0, float3 p1, float3 p2, float3 p3, float u)
{
float B0 = (1. - u) * (1. - u) * (1. - u);
float B1 = 3. * u * (1. - u) * (1. - u);
float B2 = 3. * u * u * (1. - u);
float B3 = u * u * u;
float3 p = B0 * p0 + B1 * p1 + B2 * p2 + B3 * p3;
return p;
}
float t = uv.x;
float3 pos = bezier(inp[0].worldPos, inp[1].worldPos, inp[2].worldPos, inp[3].worldPos, t);
Could the problem be that the vertex points I input are not forming curves? Right now I just take a mesh such as a plane and take the vertices from there.
The detail factor in the hull shader is 16. Density factor varies by distance.
I don't know what else is relevant. If you need more information let me know. I hope I made the question clear, I have googled it but can't seem to find the error in my own code.
See the SimpleBezier sample:
float4 BernsteinBasis(float t)
{
float invT = 1.0f - t;
return float4(invT * invT * invT,
3.0f * t * invT * invT,
3.0f * t * t * invT,
t * t * t);
}
float4 dBernsteinBasis(float t)
{
float invT = 1.0f - t;
return float4(-3 * invT * invT,
3 * invT * invT - 6 * t * invT,
6 * t * invT - 3 * t * t,
3 * t * t);
}
float3 EvaluateBezier(const OutputPatch< HS_OUTPUT, OUTPUT_PATCH_SIZE > BezPatch,
float4 BasisU,
float4 BasisV)
{
float3 value = float3(0, 0, 0);
value = BasisV.x * (BezPatch[0].pos * BasisU.x + BezPatch[1].pos * BasisU.y + BezPatch[2].pos * BasisU.z + BezPatch[3].pos * BasisU.w);
value += BasisV.y * (BezPatch[4].pos * BasisU.x + BezPatch[5].pos * BasisU.y + BezPatch[6].pos * BasisU.z + BezPatch[7].pos * BasisU.w);
value += BasisV.z * (BezPatch[8].pos * BasisU.x + BezPatch[9].pos * BasisU.y + BezPatch[10].pos * BasisU.z + BezPatch[11].pos * BasisU.w);
value += BasisV.w * (BezPatch[12].pos * BasisU.x + BezPatch[13].pos * BasisU.y + BezPatch[14].pos * BasisU.z + BezPatch[15].pos * BasisU.w);
return value;
}
[domain("quad")]
DS_OUTPUT BezierDS(HS_CONSTANT_DATA_OUTPUT input,
float2 UV : SV_DomainLocation,
const OutputPatch< HS_OUTPUT, OUTPUT_PATCH_SIZE > BezPatch)
{
float4 BasisU = BernsteinBasis(UV.x);
float4 BasisV = BernsteinBasis(UV.y);
float4 dBasisU = dBernsteinBasis(UV.x);
float4 dBasisV = dBernsteinBasis(UV.y);
float3 worldPos = EvaluateBezier(BezPatch, BasisU, BasisV);
float3 tangent = EvaluateBezier(BezPatch, dBasisU, BasisV);
float3 biTangent = EvaluateBezier(BezPatch, BasisU, dBasisV);
float3 normal = normalize(cross(tangent, biTangent));
DS_OUTPUT output;
output.pos = mul(float4(worldPos, 1), g_mViewProjection);
output.worldPos = worldPos;
output.normal = normal;
return output;
}
https://github.com/microsoft/Xbox-ATG-Samples/tree/master/PCSamples/IntroGraphics/SimpleBezierPC
https://github.com/microsoft/Xbox-ATG-Samples/tree/master/PCSamples/IntroGraphics/SimpleBezierPC12
Related
I want to port a example from Shadertoy to the iPhone with GLKit(Here).However, the effect is not very good when the program runs. The rendering effect is not smooth. Even the time interval was almost four seconds. I suspect it's because of the complexity of the operation in fragment shader. So I was wondering if there was a way to render without getting stuck, thank you.
fragment shader:
precision highp float;
varying highp vec2 TextureCoordsVarying;
uniform vec2 SceneSize;
uniform float elapsedTime;
#define TAU 6.2831852
#define MOD3 vec3(.1031,.11369,.13787)
#define BLACK_COL vec3(16,21,25)/255.
vec3 hash33(vec3 p3)
{
p3 = fract(p3 * MOD3);
p3 += dot(p3, p3.yxz+19.19);
return -1.0 + 2.0 * fract(vec3((p3.x + p3.y)*p3.z, (p3.x+p3.z)*p3.y, (p3.y+p3.z)*p3.x));
}
float simplex_noise(vec3 p)
{
const float K1 = 0.333333333;
const float K2 = 0.166666667;
vec3 i = floor(p + (p.x + p.y + p.z) * K1);
vec3 d0 = p - (i - (i.x + i.y + i.z) * K2);
vec3 e = step(vec3(0.0), d0 - d0.yzx);
vec3 i1 = e * (1.0 - e.zxy);
vec3 i2 = 1.0 - e.zxy * (1.0 - e);
vec3 d1 = d0 - (i1 - 1.0 * K2);
vec3 d2 = d0 - (i2 - 2.0 * K2);
vec3 d3 = d0 - (1.0 - 3.0 * K2);
vec4 h = max(0.6 - vec4(dot(d0, d0), dot(d1, d1), dot(d2, d2), dot(d3, d3)), 0.0);
vec4 n = h * h * h * h * vec4(dot(d0, hash33(i)), dot(d1, hash33(i + i1)), dot(d2, hash33(i + i2)), dot(d3, hash33(i + 1.0)));
return dot(vec4(31.316), n);
}
void main() {
vec2 uv = 2.0 * TextureCoordsVarying.xy - 1.0;
if (SceneSize.x > SceneSize.y) {
uv.x *= SceneSize.x/SceneSize.y;
}else{
uv.y *= SceneSize.y/SceneSize.x;
}
float a = sin(atan(uv.y, uv.x));
float am = abs(a-.5)/4.;
float l = length(uv);
float t = elapsedTime;
float m1 = clamp(.1/smoothstep(.0, 1.75, l), 0., 1.);
float m2 = clamp(.1/smoothstep(.42, 0., l), 0., 1.);
float s1 = (simplex_noise(vec3(uv*2., 1. + t*.525))*(max(1.0 - l*1.75, 0.)) + .9);
float s2 = (simplex_noise(vec3(uv*1., 15. + t*.525))*(max(.0 + l*1., .025)) + 1.25);
float s3 = (simplex_noise(vec3(vec2(am, am*100. + t*3.)*.15, 30. + t*.525))*(max(.0 + l*1., .25)) + 1.5);
s3 *= smoothstep(0.0, .3345, l);
float sh = smoothstep(0.15, .35, l);
float m = m1*m1*m2 * ((s1*s2*s3) * (1.-l)) * sh;
//m = clamp(m, 0., 1.);
vec3 col = mix(BLACK_COL, (0.5 + 0.5*cos(t+uv.xyx*3.+vec3(0,2,4))), m);
gl_FragColor = vec4(col, 1.);
}
Fragment Shader
vertex VertexOutBezier bezier_vertex(constant BezierParameters *allParams[[buffer(0)]],
// constant GlobalParameters& globalParams[[buffer(1)]],
uint vertexId [[vertex_id]],
uint instanceId [[instance_id]])
{
float t = (float) vertexId / 300;
rint(t);
BezierParameters params = allParams[instanceId];
float lineWidth = (1 - (((float) (vertexId % 2)) * 2.0)) * params.lineThickness;
float2 a = params.a;
float2 b = params.b;
float nt = 1.0f - t;
float nt_2 = nt * nt;
float nt_3 = nt_2 * nt;
float t_2 = t * t;
float t_3 = t_2 * t;
float2 point = a * nt_3 + params.p1 * nt_2 * t + params.p2 * nt * t_2 + b * t_3;
float2 tangent = -3.0 * a * nt_2 + params.p1 * (1.0 - 4.0 * t + 3.0 * t_2) + params.p2 * (2.0 * t - 3.0 * t_2) + 3 * b * t_2;
tangent = normalize(float2(-tangent.y, tangent.x));
VertexOutBezier vo;
vo.pos.xy = point + (tangent * (lineWidth / 3.0f));
vo.pos.zw = float2(0, 1);
vo.color = params.color ;
return vo;
}
My Fragment shader is
fragment float4 bezier_fragment(VertexOutBezier params[[stage_in]],
texture2d<float> texture [[texture(0)]]
)
{
constexpr sampler defaultSampler;
float4 canvasColor = texture.sample(defaultSampler, params.pos.xy);
return canvasColor;
}
Here i expect to get the pixel color of the texture. But here it is only getting single color. It is not getting the color of the texture according to its position.
Even when I do this in fragment I am getting the single color it is not varying with coordinates
fragment float4 bezier_fragment(VertexOutBezier params[[stage_in]],
texture2d<float> texture [[texture(0)]]
)
{
constexpr sampler defaultSampler;
float4 canvasColor = params.color * params.pos.x;
return canvasColor;
}
If I do this in Vertex Shader I got color varying according position of x
vo.pos.xy = point + (tangent * (lineWidth / 3.0f));
vo.pos.zw = float2(0, 1);
vo.color = params.color * vo.pos.x;
What is the Issue in fragment Shader. I cannot get the coordinates from Vertex Shader
Please make sure the VertexOutBezier.pos.xy value is normalization ( 0 ~ 1.0) ,due to the defaultSampler only receive normalization position value, if always return a single may be the position is beyond the bounds.
Using Metal I am drawing line using Bezier Curves using four points. I am using nearly 1500 triangles for the lines. The line is Pixellated. How can i reduce pixellated.
vertex VertexOutBezier bezier_vertex(constant BezierParameters *allParams[[buffer(0)]],
constant GlobalParameters& globalParams[[buffer(1)]],
uint vertexId [[vertex_id]],
uint instanceId [[instance_id]])
{
float t = (float) vertexId / globalParams.elementsPerInstance;
rint(t);
BezierParameters params = allParams[instanceId];
float lineWidth = (1 - (((float) (vertexId % 2)) * 2.0)) * params.lineThickness;
float2 a = params.a;
float2 b = params.b;
float cx = distance(a , b);
float2 p1 = params.p1 * 3.0; // float2 p1 = params.p1 * 3.0;
float2 p2 = params.p2 * 3.0; // float2 p2 = params.p2 * 3.0;
float nt = 1.0f - t;
float nt_2 = nt * nt;
float nt_3 = nt_2 * nt;
float t_2 = t * t;
float t_3 = t_2 * t;
// Calculate a single point in this Bezier curve:
float2 point = a * nt_3 + p1 * nt_2 * t + p2 * nt * t_2 + b * t_3;
float2 tangent = -3.0 * a * nt_2 + p1 * (1.0 - 4.0 * t + 3.0 * t_2) + p2 * (2.0 * t - 3.0 * t_2) + 3 * b * t_2;
tangent = (float2(-tangent.y , tangent.x ));
VertexOutBezier vo;
vo.pos.xy = point + (tangent * (lineWidth / 2.0f));
vo.pos.zw = float2(0, 1);
vo.color = params.color;
return vo;
}
You need to enable MSAA (multisample anti-aliasing). How you do this depends on your exact Metal view configuration, but the easiest way is if you're using MTKView. To enable MSAA in an MTKView, all you have to do is:
metalView.sampleCount = 4
Then, when you configure your MTLRenderPipelineDescriptor before calling makeRenderPipelineState(), add the following:
pipelineDescriptor.sampleCount = 4
This should greatly improve the quality of your curves and reduce pixelation. It does come with a performance cost however, as the GPU has to do substantially more work to render your frame.
I've been porting Shadertoy shaders to Metal in order to learn how to write Metal shaders. I don't think I'm doing it correctly as I have been writing every one of my shaders as a compute shader, rather than vertex/fragment shaders. This has worked for quite a few shaders I've ported, almost 20. However some ports are extremely slow, and others include functions that aren't available.
Here is one of the shaders that is tripping me up:
https://www.shadertoy.com/view/4t2SRh
The fwidth() call in render() and mainImage() is not allowed within a metal compute shader. Metal Shader Language does however have fwidth(), but it can only be called within a fragment shader.
Here is my attempt at porting to a compute shader:
#include <metal_stdlib>
using namespace metal;
float float_mod(float f1, float f2) {
return f1-f2 * floor(f1/f2);
}
float sdfCircle(float2 center, float radius, float2 coord )
{
float2 offset = coord - center;
return sqrt((offset.x * offset.x) + (offset.y * offset.y)) - radius;
}
float sdfEllipse(float2 center, float a, float b, float2 coord)
{
float a2 = a * a;
float b2 = b * b;
return (b2 * (coord.x - center.x) * (coord.x - center.x) +
a2 * (coord.y - center.y) * (coord.y - center.y) - a2 * b2)/(a2 * b2);
}
float sdfLine(float2 p0, float2 p1, float width, float2 coord)
{
float2 dir0 = p1 - p0;
float2 dir1 = coord - p0;
float h = clamp(dot(dir0, dir1)/dot(dir0, dir0), 0.0, 1.0);
return (length(dir1 - dir0 * h) - width * 0.5);
}
float sdfUnion( const float a, const float b )
{
return min(a, b);
}
float sdfDifference( const float a, const float b)
{
return max(a, -b);
}
float sdfIntersection( const float a, const float b )
{
return max(a, b);
}
float anti(float d) {
return fwidth(d) * 1.0;
}
float4 render(float d, float3 color, float stroke)
{
//stroke = fwidth(d) * 2.0;
float anti = fwidth(d) * 1.0;
float4 strokeLayer = float4(float3(0.05), 1.0-smoothstep(-anti, anti, d - stroke));
float4 colorLayer = float4(color, 1.0-smoothstep(-anti, anti, d));
if (stroke < 0.000001) {
return colorLayer;
}
return float4(mix(strokeLayer.rgb, colorLayer.rgb, colorLayer.a), strokeLayer.a);
}
kernel void compute(texture2d<float, access::write> output [[texture(0)]],
texture2d<float, access::sample> input [[texture(1)]],
constant float &timer [[buffer(0)]],
uint2 gid [[thread_position_in_grid]])
{
float4 fragColor;
int width = output.get_width();
int height = output.get_height();
float2 resolution = float2(width,height);
float2 uv = float2(gid) / resolution;
float size = min(resolution.x, resolution.y);
float pixSize = 1.0 / size;
float stroke = pixSize * 1.5;
float2 center = float2(0.5, 0.5 * resolution.y/resolution.x);
float a = sdfEllipse(float2(0.5, center.y*2.0-0.34), 0.25, 0.25, uv);
float b = sdfEllipse(float2(0.5, center.y*2.0+0.03), 0.8, 0.35, uv);
b = sdfIntersection(a, b);
float4 layer1 = render(b, float3(0.32, 0.56, 0.53), fwidth(b) * 2.0);
// Draw strips
float4 layer2 = layer1;
float t, r0, r1, r2, e, f;
float2 sinuv = float2(uv.x, (sin(uv.x*40.0)*0.02 + 1.0)*uv.y);
for (float i = 0.0; i < 10.0; i++) {
t = float_mod(timer + 0.3 * i, 3.0) * 0.2;
r0 = (t - 0.15) / 0.2 * 0.9 + 0.1;
r1 = (t - 0.15) / 0.2 * 0.1 + 0.9;
r2 = (t - 0.15) / 0.2 * 0.15 + 0.85;
e = sdfEllipse(float2(0.5, center.y*2.0+0.37-t*r2), 0.7*r0, 0.35*r1, sinuv);
f = sdfEllipse(float2(0.5, center.y*2.0+0.41-t), 0.7*r0, 0.35*r1, sinuv);
f = sdfDifference(e, f);
f = sdfIntersection(f, b);
float4 layer = render(f, float3(1.0, 0.81, 0.27), 0.0);
layer2 = mix(layer2, layer, layer.a);
}
// Draw the handle
float bottom = 0.08;
float handleWidth = 0.01;
float handleRadius = 0.04;
float d = sdfCircle(float2(0.5-handleRadius+0.5*handleWidth, bottom), handleRadius, uv);
float c = sdfCircle(float2(0.5-handleRadius+0.5*handleWidth, bottom), handleRadius-handleWidth, uv);
d = sdfDifference(d, c);
c = uv.y - bottom;
d = sdfIntersection(d, c);
c = sdfLine(float2(0.5, center.y*2.0-0.05), float2(0.5, bottom), handleWidth, uv);
d = sdfUnion(d, c);
c = sdfCircle(float2(0.5, center.y*2.0-0.05), 0.01, uv);
d = sdfUnion(c, d);
c = sdfCircle(float2(0.5-handleRadius*2.0+handleWidth, bottom), handleWidth*0.5, uv);
d = sdfUnion(c, d);
float4 layer0 = render(d, float3(0.404, 0.298, 0.278), stroke);
float2 p = (2.0*float2(gid).xy-resolution.xy)/min(resolution.y,resolution.x);
float3 bcol = float3(1.0,0.8,0.7-0.07*p.y)*(1.0-0.25*length(p));
fragColor = float4(bcol, 1.0);
fragColor.rgb = mix(fragColor.rgb, layer0.rgb, layer0.a);
fragColor.rgb = mix(fragColor.rgb, layer1.rgb, layer1.a);
fragColor.rgb = mix(fragColor.rgb, layer2.rgb, layer2.a);
fragColor.rgb = pow(fragColor.rgb, float3(1.0/2.2));
output.write(fragColor,gid);
}
This doesn't compile, as fwidth() is not available. However, if I do get rid of fwidth(), it will compile... but of course not draw the right thing.
I was wondering if there is a better way to port this to a fragment/vertex shader, so that I can use MSL's fwidth() ? Or is writing it as a compute shader fine, and I should find a different way around using fwidth() ?
I've got a pixelshader (below) that i'm using with XNA. On my laptop (crappy graphics card) it runs a little jerky, but ok. I've just tried running it on the xbox and it's horrible!
There's nothing to the game (it's just a fractal renderer) so it's got to be the pixel shader causing the issues. I also think it's the PS code because i've lowered the iterations and it's ok. I've also checked, and the GC delta is zero.
Are there any HLSL functions that are no-no's on the xbox?? I must be doing something wrong here, performance can't be that bad!
#include "FractalBase.fxh"
float ZPower;
float3 Colour;
float3 ColourScale;
float ComAbs(float2 Arg)
{
return sqrt(Arg.x * Arg.x + Arg.y * Arg.y);
}
float2 ComPow(float2 Arg, float Power)
{
float Mod = pow(Arg.x * Arg.x + Arg.y * Arg.y, Power / 2);
float Ang = atan2(Arg.y, Arg.x) * Power;
return float2(Mod * cos(Ang), Mod * sin(Ang));
}
float4 FractalPixelShader(float2 texCoord : TEXCOORD0, uniform float Iterations) : COLOR0
{
float2 c = texCoord.xy;
float2 z = 0;
float i;
float oldBailoutTest = 0;
float bailoutTest = 0;
for(i = 0; i < Iterations; i++)
{
z = ComPow(z, ZPower) + c;
bailoutTest = z.x * z.x + z.y * z.y;
if(bailoutTest >= ZPower * ZPower)
{
break;
}
oldBailoutTest = bailoutTest;
}
float normalisedIterations = i / Iterations;
float factor = (bailoutTest - oldBailoutTest) / (ZPower * ZPower - oldBailoutTest);
float4 Result = normalisedIterations + (1 / factor / Iterations);
Result = (i >= Iterations - 1) ? float4(0.0, 0.0, 0.0, 1.0) : float4(Result.x * Colour.r * ColourScale.x, Result.y * Colour.g * ColourScale.y, Result.z * Colour.b * ColourScale.z, 1);
return Result;
}
technique Technique1
{
pass
{
VertexShader = compile vs_3_0 SpriteVertexShader();
PixelShader = compile ps_3_0 FractalPixelShader(128);
}
}
Below is FractalBase.fxh:
float4x4 MatrixTransform : register(vs, c0);
float2 Pan;
float Zoom;
float Aspect;
void SpriteVertexShader(inout float4 Colour : COLOR0,
inout float2 texCoord : TEXCOORD0,
inout float4 position : SV_Position)
{
position = mul(position, MatrixTransform);
// Convert the position into from screen space into complex coordinates
texCoord = (position) * Zoom * float2(1, Aspect) - float2(Pan.x, -Pan.y);
}
EDIT I did try removing the conditional by using lots of lerps, however when i did that i got loads of artifacts (and not the kind that "belong in a museum"!). I changed things around, and fixed a few logic errors, however the key was to multiply the GreaterThan result by 1 + epsilon, to account for rounding errors just making 0.9999 = 0 (integer). See the fixed code below:
#include "FractalBase.fxh"
float ZPower;
float3 Colour;
float3 ColourScale;
float ComAbs(float2 Arg)
{
return sqrt(Arg.x * Arg.x + Arg.y * Arg.y);
}
float2 ComPow(float2 Arg, float Power)
{
float Mod = pow(Arg.x * Arg.x + Arg.y * Arg.y, Power / 2);
float Ang = atan2(Arg.y, Arg.x) * Power;
return float2(Mod * cos(Ang), Mod * sin(Ang));
}
float GreaterThan(float x, float y)
{
return ((x - y) / (2 * abs(x - y)) + 0.5) * 1.001;
}
float4 FractalPixelShader(float2 texCoord : TEXCOORD0, uniform float Iterations) : COLOR0
{
float2 c = texCoord.xy;
float2 z = 0;
int i;
float oldBailoutTest = 0;
float bailoutTest = 0;
int KeepGoing = 1;
int DoneIterations = Iterations;
int Bailout = 0;
for(i = 0; i < Iterations; i++)
{
z = lerp(z, ComPow(z, ZPower) + c, KeepGoing);
bailoutTest = lerp(bailoutTest, z.x * z.x + z.y * z.y, KeepGoing);
Bailout = lerp(Bailout, GreaterThan(bailoutTest, ZPower * ZPower), -abs(Bailout) + 1);
KeepGoing = lerp(KeepGoing, 0.0, Bailout);
DoneIterations = lerp(DoneIterations, min(i, DoneIterations), Bailout);
oldBailoutTest = lerp(oldBailoutTest, bailoutTest, KeepGoing);
}
float normalisedIterations = DoneIterations / Iterations;
float factor = (bailoutTest - oldBailoutTest) / (ZPower * ZPower - oldBailoutTest);
float4 Result = normalisedIterations + (1 / factor / Iterations);
Result = (DoneIterations >= Iterations - 1) ? float4(0.0, 0.0, 0.0, 1.0) : float4(Result.x * Colour.r * ColourScale.x, Result.y * Colour.g * ColourScale.y, Result.z * Colour.b * ColourScale.z, 1);
return Result;
}
technique Technique1
{
pass
{
VertexShader = compile vs_3_0 SpriteVertexShader();
PixelShader = compile ps_3_0 FractalPixelShader(128);
}
}
The xbox has a pretty large block size, so branching on the xbox isn't always so great. Also the compiler isn't always the most effective at emitting dynamic branches which your code seems to use.
Look into the branch attribute: http://msdn.microsoft.com/en-us/library/bb313972%28v=xnagamestudio.31%29.aspx
Also, if you move the early bailout, does the PC become more more similar to the Xbox?
Keep in mind that modern graphic cards are actually quite a bit faster then the Xenon unit by now.