From dcc5b4f6b005a2c89bb4e77bca4cfe8705734021 Mon Sep 17 00:00:00 2001 From: Marshall Mohror Date: Fri, 22 Oct 2021 23:09:29 -0500 Subject: Presentation: Only use FP16 in scaling shaders on supported devices in Vulkan --- src/video_core/host_shaders/fidelityfx_fsr.comp | 106 ++++++++++++------------ 1 file changed, 54 insertions(+), 52 deletions(-) (limited to 'src/video_core/host_shaders/fidelityfx_fsr.comp') diff --git a/src/video_core/host_shaders/fidelityfx_fsr.comp b/src/video_core/host_shaders/fidelityfx_fsr.comp index cbb601580..6b97f789d 100644 --- a/src/video_core/host_shaders/fidelityfx_fsr.comp +++ b/src/video_core/host_shaders/fidelityfx_fsr.comp @@ -28,80 +28,82 @@ // THE SOFTWARE. layout( push_constant ) uniform constants { - u32vec2 input_size; + uvec4 Const0; + uvec4 Const1; + uvec4 Const2; + uvec4 Const3; }; -uvec4 Const0; -uvec4 Const1; -uvec4 Const2; -uvec4 Const3; +layout(set=0,binding=0) uniform sampler2D InputTexture; +layout(set=0,binding=1,rgba16f) uniform image2D OutputTexture; #define A_GPU 1 #define A_GLSL 1 -#define A_HALF -#include "ffx_a.h" +#ifndef YUZU_USE_FP16 + #include "ffx_a.h" -f16vec4 LinearToSRGB(f16vec4 linear) { - bvec4 selector = greaterThan(linear, f16vec4(0.00313066844250063)); - f16vec4 low = linear * float16_t(12.92); - f16vec4 high = float16_t(1.055) * pow(linear, f16vec4(1 / 2.4)) - float16_t(0.055); - return mix(low, high, selector); -} - -f16vec4 SRGBToLinear(f16vec4 srgb) { - bvec4 selector = greaterThan(srgb, f16vec4(0.0404482362771082)); - f16vec4 low = srgb * float16_t(1.0 / 12.92); - f16vec4 high = pow((srgb + float16_t(0.055)) * float16_t(1.0 / 1.055), f16vec4(2.4)); - return mix(low, high, selector); -} + #if USE_EASU + #define FSR_EASU_F 1 + AF4 FsrEasuRF(AF2 p) { AF4 res = textureGather(InputTexture, p, 0); return res; } + AF4 FsrEasuGF(AF2 p) { AF4 res = textureGather(InputTexture, p, 1); return res; } + AF4 FsrEasuBF(AF2 p) { AF4 res = textureGather(InputTexture, p, 2); return res; } + #endif + #if USE_RCAS + #define FSR_RCAS_F 1 + AF4 FsrRcasLoadF(ASU2 p) { return texelFetch(InputTexture, ASU2(p), 0); } + void FsrRcasInputF(inout AF1 r, inout AF1 g, inout AF1 b) {} + #endif +#else + #define A_HALF + #include "ffx_a.h" -#if USE_EASU - #define FSR_EASU_H 1 - f16vec4 FsrEasuRH(vec2 p) { f16vec4 res = f16vec4(textureGather(InputTexture, p, 0)); return res; } - f16vec4 FsrEasuGH(vec2 p) { f16vec4 res = f16vec4(textureGather(InputTexture, p, 1)); return res; } - f16vec4 FsrEasuBH(vec2 p) { f16vec4 res = f16vec4(textureGather(InputTexture, p, 2)); return res; } -#endif -#if USE_RCAS - #define FSR_RCAS_H 1 - f16vec4 FsrRcasLoadH(ASW2 p) { return f16vec4(texelFetch(InputTexture, ASU2(p), 0)); } - void FsrRcasInputH(inout float16_t r, inout float16_t g, inout float16_t b) {} + #if USE_EASU + #define FSR_EASU_H 1 + AH4 FsrEasuRH(AF2 p) { AH4 res = AH4(textureGather(InputTexture, p, 0)); return res; } + AH4 FsrEasuGH(AF2 p) { AH4 res = AH4(textureGather(InputTexture, p, 1)); return res; } + AH4 FsrEasuBH(AF2 p) { AH4 res = AH4(textureGather(InputTexture, p, 2)); return res; } + #endif + #if USE_RCAS + #define FSR_RCAS_H 1 + AH4 FsrRcasLoadH(ASW2 p) { return AH4(texelFetch(InputTexture, ASU2(p), 0)); } + void FsrRcasInputH(inout AH1 r,inout AH1 g,inout AH1 b){} + #endif #endif #include "ffx_fsr1.h" -void CurrFilter(u32vec2 pos) { - // For debugging +void CurrFilter(AU2 pos) { #if USE_BILINEAR - vec2 pp = (vec2(pos) * vec2_AU2(Const0.xy) + vec2_AU2(Const0.zw)) * vec2_AU2(Const1.xy) + vec2(0.5, -0.5) * vec2_AU2(Const1.zw); - imageStore(OutputTexture, ivec2(pos), textureLod(InputTexture, pp, 0.0)); + AF2 pp = (AF2(pos) * AF2_AU2(Const0.xy) + AF2_AU2(Const0.zw)) * AF2_AU2(Const1.xy) + AF2(0.5, -0.5) * AF2_AU2(Const1.zw); + imageStore(OutputTexture, ASU2(pos), textureLod(InputTexture, pp, 0.0)); #endif #if USE_EASU - f16vec3 c; - FsrEasuH(c, pos, Const0, Const1, Const2, Const3); - imageStore(OutputTexture, ivec2(pos), f16vec4(c, 1)); + #ifndef YUZU_USE_FP16 + AF3 c; + FsrEasuF(c, pos, Const0, Const1, Const2, Const3); + imageStore(OutputTexture, ASU2(pos), AF4(c, 1)); + #else + AH3 c; + FsrEasuH(c, pos, Const0, Const1, Const2, Const3); + imageStore(OutputTexture, ASU2(pos), AH4(c, 1)); + #endif #endif #if USE_RCAS - f16vec3 c; - FsrRcasH(c.r, c.g, c.b, pos, Const0); - imageStore(OutputTexture, ivec2(pos), f16vec4(c, 1)); + #ifndef YUZU_USE_FP16 + AF3 c; + FsrRcasF(c.r, c.g, c.b, pos, Const0); + imageStore(OutputTexture, ASU2(pos), AF4(c, 1)); + #else + AH3 c; + FsrRcasH(c.r, c.g, c.b, pos, Const0); + imageStore(OutputTexture, ASU2(pos), AH4(c, 1)); + #endif #endif - } layout(local_size_x=64) in; void main() { - -#if USE_EASU || USE_BILINEAR - vec2 ires = vec2(input_size); - vec2 tres = textureSize(InputTexture, 0); - vec2 ores = imageSize(OutputTexture); - FsrEasuCon(Const0, Const1, Const2, Const3, ires.x, ires.y, tres.x, tres.y, ores.x, ores.y); -#endif -#if USE_RCAS - FsrRcasCon(Const0, 0.25f); -#endif - // Do remapping of local xy in workgroup for a more PS-like swizzle pattern. AU2 gxy = ARmp8x8(gl_LocalInvocationID.x) + AU2(gl_WorkGroupID.x << 4u, gl_WorkGroupID.y << 4u); CurrFilter(gxy); -- cgit v1.2.3