summaryrefslogtreecommitdiffstats
path: root/src/video_core
diff options
context:
space:
mode:
Diffstat (limited to 'src/video_core')
-rw-r--r--src/video_core/CMakeLists.txt2
-rw-r--r--src/video_core/buffer_cache/buffer_cache.h4
-rw-r--r--src/video_core/dirty_flags.h2
-rw-r--r--src/video_core/host_shaders/CMakeLists.txt26
-rw-r--r--src/video_core/host_shaders/fidelityfx_fsr.comp116
-rw-r--r--src/video_core/host_shaders/fxaa.frag76
-rw-r--r--src/video_core/host_shaders/fxaa.vert38
-rw-r--r--src/video_core/host_shaders/opengl_present_scaleforce.frag130
-rw-r--r--src/video_core/host_shaders/present_bicubic.frag67
-rw-r--r--src/video_core/host_shaders/present_gaussian.frag70
-rw-r--r--src/video_core/host_shaders/vulkan_fidelityfx_fsr_easu_fp16.comp11
-rw-r--r--src/video_core/host_shaders/vulkan_fidelityfx_fsr_easu_fp32.comp10
-rw-r--r--src/video_core/host_shaders/vulkan_fidelityfx_fsr_rcas_fp16.comp11
-rw-r--r--src/video_core/host_shaders/vulkan_fidelityfx_fsr_rcas_fp32.comp10
-rw-r--r--src/video_core/host_shaders/vulkan_present_scaleforce_fp16.frag7
-rw-r--r--src/video_core/host_shaders/vulkan_present_scaleforce_fp32.frag5
-rw-r--r--src/video_core/renderer_opengl/gl_buffer_cache.cpp13
-rw-r--r--src/video_core/renderer_opengl/gl_compute_pipeline.cpp80
-rw-r--r--src/video_core/renderer_opengl/gl_graphics_pipeline.cpp106
-rw-r--r--src/video_core/renderer_opengl/gl_rasterizer.cpp94
-rw-r--r--src/video_core/renderer_opengl/gl_resource_manager.cpp5
-rw-r--r--src/video_core/renderer_opengl/gl_shader_cache.cpp15
-rw-r--r--src/video_core/renderer_opengl/gl_texture_cache.cpp322
-rw-r--r--src/video_core/renderer_opengl/gl_texture_cache.h50
-rw-r--r--src/video_core/renderer_opengl/renderer_opengl.cpp185
-rw-r--r--src/video_core/renderer_opengl/renderer_opengl.h11
-rw-r--r--src/video_core/renderer_vulkan/blit_image.cpp61
-rw-r--r--src/video_core/renderer_vulkan/blit_image.h23
-rw-r--r--src/video_core/renderer_vulkan/pipeline_helper.h73
-rw-r--r--src/video_core/renderer_vulkan/vk_blit_screen.cpp698
-rw-r--r--src/video_core/renderer_vulkan/vk_blit_screen.h38
-rw-r--r--src/video_core/renderer_vulkan/vk_buffer_cache.cpp38
-rw-r--r--src/video_core/renderer_vulkan/vk_buffer_cache.h6
-rw-r--r--src/video_core/renderer_vulkan/vk_compute_pipeline.cpp49
-rw-r--r--src/video_core/renderer_vulkan/vk_fsr.cpp553
-rw-r--r--src/video_core/renderer_vulkan/vk_fsr.h54
-rw-r--r--src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp83
-rw-r--r--src/video_core/renderer_vulkan/vk_graphics_pipeline.h4
-rw-r--r--src/video_core/renderer_vulkan/vk_master_semaphore.h4
-rw-r--r--src/video_core/renderer_vulkan/vk_rasterizer.cpp134
-rw-r--r--src/video_core/renderer_vulkan/vk_scheduler.cpp10
-rw-r--r--src/video_core/renderer_vulkan/vk_scheduler.h5
-rw-r--r--src/video_core/renderer_vulkan/vk_state_tracker.h8
-rw-r--r--src/video_core/renderer_vulkan/vk_texture_cache.cpp391
-rw-r--r--src/video_core/renderer_vulkan/vk_texture_cache.h86
-rw-r--r--src/video_core/surface.cpp74
-rw-r--r--src/video_core/surface.h6
-rw-r--r--src/video_core/texture_cache/image_base.cpp10
-rw-r--r--src/video_core/texture_cache/image_base.h15
-rw-r--r--src/video_core/texture_cache/image_info.cpp22
-rw-r--r--src/video_core/texture_cache/image_info.h4
-rw-r--r--src/video_core/texture_cache/image_view_base.cpp13
-rw-r--r--src/video_core/texture_cache/image_view_base.h4
-rw-r--r--src/video_core/texture_cache/texture_cache.h495
-rw-r--r--src/video_core/texture_cache/texture_cache_base.h43
-rw-r--r--src/video_core/texture_cache/types.h7
-rw-r--r--src/video_core/texture_cache/util.cpp12
-rw-r--r--src/video_core/texture_cache/util.h3
-rw-r--r--src/video_core/textures/texture.cpp29
-rw-r--r--src/video_core/video_core.cpp11
-rw-r--r--src/video_core/video_core.h2
-rw-r--r--src/video_core/vulkan_common/vulkan_device.h8
62 files changed, 3968 insertions, 574 deletions
diff --git a/src/video_core/CMakeLists.txt b/src/video_core/CMakeLists.txt
index 6aac7f305..91a30fef7 100644
--- a/src/video_core/CMakeLists.txt
+++ b/src/video_core/CMakeLists.txt
@@ -132,6 +132,8 @@ add_library(video_core STATIC
renderer_vulkan/vk_descriptor_pool.h
renderer_vulkan/vk_fence_manager.cpp
renderer_vulkan/vk_fence_manager.h
+ renderer_vulkan/vk_fsr.cpp
+ renderer_vulkan/vk_fsr.h
renderer_vulkan/vk_graphics_pipeline.cpp
renderer_vulkan/vk_graphics_pipeline.h
renderer_vulkan/vk_master_semaphore.cpp
diff --git a/src/video_core/buffer_cache/buffer_cache.h b/src/video_core/buffer_cache/buffer_cache.h
index d350c9b36..43bed63ac 100644
--- a/src/video_core/buffer_cache/buffer_cache.h
+++ b/src/video_core/buffer_cache/buffer_cache.h
@@ -853,12 +853,14 @@ void BufferCache<P>::CommitAsyncFlushesHigh() {
}
if constexpr (USE_MEMORY_MAPS) {
auto download_staging = runtime.DownloadStagingBuffer(total_size_bytes);
+ runtime.PreCopyBarrier();
for (auto& [copy, buffer_id] : downloads) {
// Have in mind the staging buffer offset for the copy
copy.dst_offset += download_staging.offset;
const std::array copies{copy};
- runtime.CopyBuffer(download_staging.buffer, slot_buffers[buffer_id], copies);
+ runtime.CopyBuffer(download_staging.buffer, slot_buffers[buffer_id], copies, false);
}
+ runtime.PostCopyBarrier();
runtime.Finish();
for (const auto& [copy, buffer_id] : downloads) {
const Buffer& buffer = slot_buffers[buffer_id];
diff --git a/src/video_core/dirty_flags.h b/src/video_core/dirty_flags.h
index f0d545f90..d63ad5a35 100644
--- a/src/video_core/dirty_flags.h
+++ b/src/video_core/dirty_flags.h
@@ -29,6 +29,8 @@ enum : u8 {
ColorBuffer6,
ColorBuffer7,
ZetaBuffer,
+ RescaleViewports,
+ RescaleScissors,
VertexBuffers,
VertexBuffer0,
diff --git a/src/video_core/host_shaders/CMakeLists.txt b/src/video_core/host_shaders/CMakeLists.txt
index 20d748c12..d779a967a 100644
--- a/src/video_core/host_shaders/CMakeLists.txt
+++ b/src/video_core/host_shaders/CMakeLists.txt
@@ -1,3 +1,11 @@
+set(FIDELITYFX_INCLUDE_DIR ${CMAKE_SOURCE_DIR}/externals/FidelityFX-FSR/ffx-fsr)
+
+set(GLSL_INCLUDES
+ fidelityfx_fsr.comp
+ ${FIDELITYFX_INCLUDE_DIR}/ffx_a.h
+ ${FIDELITYFX_INCLUDE_DIR}/ffx_fsr1.h
+)
+
set(SHADER_FILES
astc_decoder.comp
block_linear_unswizzle_2d.comp
@@ -5,14 +13,25 @@ set(SHADER_FILES
convert_depth_to_float.frag
convert_float_to_depth.frag
full_screen_triangle.vert
+ fxaa.frag
+ fxaa.vert
opengl_copy_bc4.comp
opengl_present.frag
opengl_present.vert
+ opengl_present_scaleforce.frag
pitch_unswizzle.comp
+ present_bicubic.frag
+ present_gaussian.frag
vulkan_blit_color_float.frag
vulkan_blit_depth_stencil.frag
+ vulkan_fidelityfx_fsr_easu_fp16.comp
+ vulkan_fidelityfx_fsr_easu_fp32.comp
+ vulkan_fidelityfx_fsr_rcas_fp16.comp
+ vulkan_fidelityfx_fsr_rcas_fp32.comp
vulkan_present.frag
vulkan_present.vert
+ vulkan_present_scaleforce_fp16.frag
+ vulkan_present_scaleforce_fp32.frag
vulkan_quad_indexed.comp
vulkan_uint8.comp
)
@@ -76,7 +95,7 @@ foreach(FILENAME IN ITEMS ${SHADER_FILES})
OUTPUT
${SPIRV_HEADER_FILE}
COMMAND
- ${GLSLANGVALIDATOR} -V ${QUIET_FLAG} ${GLSL_FLAGS} --variable-name ${SPIRV_VARIABLE_NAME} -o ${SPIRV_HEADER_FILE} ${SOURCE_FILE}
+ ${GLSLANGVALIDATOR} -V ${QUIET_FLAG} -I"${FIDELITYFX_INCLUDE_DIR}" ${GLSL_FLAGS} --variable-name ${SPIRV_VARIABLE_NAME} -o ${SPIRV_HEADER_FILE} ${SOURCE_FILE}
MAIN_DEPENDENCY
${SOURCE_FILE}
)
@@ -84,9 +103,12 @@ foreach(FILENAME IN ITEMS ${SHADER_FILES})
endif()
endforeach()
+set(SHADER_SOURCES ${SHADER_FILES})
+list(APPEND SHADER_SOURCES ${GLSL_INCLUDES})
+
add_custom_target(host_shaders
DEPENDS
${SHADER_HEADERS}
SOURCES
- ${SHADER_FILES}
+ ${SHADER_SOURCES}
)
diff --git a/src/video_core/host_shaders/fidelityfx_fsr.comp b/src/video_core/host_shaders/fidelityfx_fsr.comp
new file mode 100644
index 000000000..6b97f789d
--- /dev/null
+++ b/src/video_core/host_shaders/fidelityfx_fsr.comp
@@ -0,0 +1,116 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+//!#version 460 core
+#extension GL_ARB_separate_shader_objects : enable
+#extension GL_ARB_shading_language_420pack : enable
+#extension GL_GOOGLE_include_directive : enable
+#extension GL_EXT_shader_explicit_arithmetic_types : require
+
+// FidelityFX Super Resolution Sample
+//
+// Copyright (c) 2021 Advanced Micro Devices, Inc. All rights reserved.
+// Permission is hereby granted, free of charge, to any person obtaining a copy
+// of this software and associated documentation files(the "Software"), to deal
+// in the Software without restriction, including without limitation the rights
+// to use, copy, modify, merge, publish, distribute, sublicense, and / or sell
+// copies of the Software, and to permit persons to whom the Software is
+// furnished to do so, subject to the following conditions :
+// The above copyright notice and this permission notice shall be included in
+// all copies or substantial portions of the Software.
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.IN NO EVENT SHALL THE
+// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+// THE SOFTWARE.
+
+layout( push_constant ) uniform constants {
+ uvec4 Const0;
+ uvec4 Const1;
+ uvec4 Const2;
+ uvec4 Const3;
+};
+
+layout(set=0,binding=0) uniform sampler2D InputTexture;
+layout(set=0,binding=1,rgba16f) uniform image2D OutputTexture;
+
+#define A_GPU 1
+#define A_GLSL 1
+
+#ifndef YUZU_USE_FP16
+ #include "ffx_a.h"
+
+ #if USE_EASU
+ #define FSR_EASU_F 1
+ AF4 FsrEasuRF(AF2 p) { AF4 res = textureGather(InputTexture, p, 0); return res; }
+ AF4 FsrEasuGF(AF2 p) { AF4 res = textureGather(InputTexture, p, 1); return res; }
+ AF4 FsrEasuBF(AF2 p) { AF4 res = textureGather(InputTexture, p, 2); return res; }
+ #endif
+ #if USE_RCAS
+ #define FSR_RCAS_F 1
+ AF4 FsrRcasLoadF(ASU2 p) { return texelFetch(InputTexture, ASU2(p), 0); }
+ void FsrRcasInputF(inout AF1 r, inout AF1 g, inout AF1 b) {}
+ #endif
+#else
+ #define A_HALF
+ #include "ffx_a.h"
+
+ #if USE_EASU
+ #define FSR_EASU_H 1
+ AH4 FsrEasuRH(AF2 p) { AH4 res = AH4(textureGather(InputTexture, p, 0)); return res; }
+ AH4 FsrEasuGH(AF2 p) { AH4 res = AH4(textureGather(InputTexture, p, 1)); return res; }
+ AH4 FsrEasuBH(AF2 p) { AH4 res = AH4(textureGather(InputTexture, p, 2)); return res; }
+ #endif
+ #if USE_RCAS
+ #define FSR_RCAS_H 1
+ AH4 FsrRcasLoadH(ASW2 p) { return AH4(texelFetch(InputTexture, ASU2(p), 0)); }
+ void FsrRcasInputH(inout AH1 r,inout AH1 g,inout AH1 b){}
+ #endif
+#endif
+
+#include "ffx_fsr1.h"
+
+void CurrFilter(AU2 pos) {
+#if USE_BILINEAR
+ AF2 pp = (AF2(pos) * AF2_AU2(Const0.xy) + AF2_AU2(Const0.zw)) * AF2_AU2(Const1.xy) + AF2(0.5, -0.5) * AF2_AU2(Const1.zw);
+ imageStore(OutputTexture, ASU2(pos), textureLod(InputTexture, pp, 0.0));
+#endif
+#if USE_EASU
+ #ifndef YUZU_USE_FP16
+ AF3 c;
+ FsrEasuF(c, pos, Const0, Const1, Const2, Const3);
+ imageStore(OutputTexture, ASU2(pos), AF4(c, 1));
+ #else
+ AH3 c;
+ FsrEasuH(c, pos, Const0, Const1, Const2, Const3);
+ imageStore(OutputTexture, ASU2(pos), AH4(c, 1));
+ #endif
+#endif
+#if USE_RCAS
+ #ifndef YUZU_USE_FP16
+ AF3 c;
+ FsrRcasF(c.r, c.g, c.b, pos, Const0);
+ imageStore(OutputTexture, ASU2(pos), AF4(c, 1));
+ #else
+ AH3 c;
+ FsrRcasH(c.r, c.g, c.b, pos, Const0);
+ imageStore(OutputTexture, ASU2(pos), AH4(c, 1));
+ #endif
+#endif
+}
+
+layout(local_size_x=64) in;
+void main() {
+ // Do remapping of local xy in workgroup for a more PS-like swizzle pattern.
+ AU2 gxy = ARmp8x8(gl_LocalInvocationID.x) + AU2(gl_WorkGroupID.x << 4u, gl_WorkGroupID.y << 4u);
+ CurrFilter(gxy);
+ gxy.x += 8u;
+ CurrFilter(gxy);
+ gxy.y += 8u;
+ CurrFilter(gxy);
+ gxy.x -= 8u;
+ CurrFilter(gxy);
+}
diff --git a/src/video_core/host_shaders/fxaa.frag b/src/video_core/host_shaders/fxaa.frag
new file mode 100644
index 000000000..02f4068d1
--- /dev/null
+++ b/src/video_core/host_shaders/fxaa.frag
@@ -0,0 +1,76 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+// Source code is adapted from
+// https://www.geeks3d.com/20110405/fxaa-fast-approximate-anti-aliasing-demo-glsl-opengl-test-radeon-geforce/3/
+
+#version 460
+
+#ifdef VULKAN
+
+#define BINDING_COLOR_TEXTURE 1
+
+#else // ^^^ Vulkan ^^^ // vvv OpenGL vvv
+
+#define BINDING_COLOR_TEXTURE 0
+
+#endif
+
+layout (location = 0) in vec4 posPos;
+
+layout (location = 0) out vec4 frag_color;
+
+layout (binding = BINDING_COLOR_TEXTURE) uniform sampler2D input_texture;
+
+const float FXAA_SPAN_MAX = 8.0;
+const float FXAA_REDUCE_MUL = 1.0 / 8.0;
+const float FXAA_REDUCE_MIN = 1.0 / 128.0;
+
+#define FxaaTexLod0(t, p) textureLod(t, p, 0.0)
+#define FxaaTexOff(t, p, o) textureLodOffset(t, p, 0.0, o)
+
+vec3 FxaaPixelShader(vec4 posPos, sampler2D tex) {
+
+ vec3 rgbNW = FxaaTexLod0(tex, posPos.zw).xyz;
+ vec3 rgbNE = FxaaTexOff(tex, posPos.zw, ivec2(1,0)).xyz;
+ vec3 rgbSW = FxaaTexOff(tex, posPos.zw, ivec2(0,1)).xyz;
+ vec3 rgbSE = FxaaTexOff(tex, posPos.zw, ivec2(1,1)).xyz;
+ vec3 rgbM = FxaaTexLod0(tex, posPos.xy).xyz;
+/*---------------------------------------------------------*/
+ vec3 luma = vec3(0.299, 0.587, 0.114);
+ float lumaNW = dot(rgbNW, luma);
+ float lumaNE = dot(rgbNE, luma);
+ float lumaSW = dot(rgbSW, luma);
+ float lumaSE = dot(rgbSE, luma);
+ float lumaM = dot(rgbM, luma);
+/*---------------------------------------------------------*/
+ float lumaMin = min(lumaM, min(min(lumaNW, lumaNE), min(lumaSW, lumaSE)));
+ float lumaMax = max(lumaM, max(max(lumaNW, lumaNE), max(lumaSW, lumaSE)));
+/*---------------------------------------------------------*/
+ vec2 dir;
+ dir.x = -((lumaNW + lumaNE) - (lumaSW + lumaSE));
+ dir.y = ((lumaNW + lumaSW) - (lumaNE + lumaSE));
+/*---------------------------------------------------------*/
+ float dirReduce = max(
+ (lumaNW + lumaNE + lumaSW + lumaSE) * (0.25 * FXAA_REDUCE_MUL),
+ FXAA_REDUCE_MIN);
+ float rcpDirMin = 1.0/(min(abs(dir.x), abs(dir.y)) + dirReduce);
+ dir = min(vec2( FXAA_SPAN_MAX, FXAA_SPAN_MAX),
+ max(vec2(-FXAA_SPAN_MAX, -FXAA_SPAN_MAX),
+ dir * rcpDirMin)) / textureSize(tex, 0);
+/*--------------------------------------------------------*/
+ vec3 rgbA = (1.0 / 2.0) * (
+ FxaaTexLod0(tex, posPos.xy + dir * (1.0 / 3.0 - 0.5)).xyz +
+ FxaaTexLod0(tex, posPos.xy + dir * (2.0 / 3.0 - 0.5)).xyz);
+ vec3 rgbB = rgbA * (1.0 / 2.0) + (1.0 / 4.0) * (
+ FxaaTexLod0(tex, posPos.xy + dir * (0.0 / 3.0 - 0.5)).xyz +
+ FxaaTexLod0(tex, posPos.xy + dir * (3.0 / 3.0 - 0.5)).xyz);
+ float lumaB = dot(rgbB, luma);
+ if((lumaB < lumaMin) || (lumaB > lumaMax)) return rgbA;
+ return rgbB;
+}
+
+void main() {
+ frag_color = vec4(FxaaPixelShader(posPos, input_texture), 1.0);
+}
diff --git a/src/video_core/host_shaders/fxaa.vert b/src/video_core/host_shaders/fxaa.vert
new file mode 100644
index 000000000..ac20c04e9
--- /dev/null
+++ b/src/video_core/host_shaders/fxaa.vert
@@ -0,0 +1,38 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#version 460
+
+out gl_PerVertex {
+ vec4 gl_Position;
+};
+
+const vec2 vertices[4] =
+ vec2[4](vec2(-1.0, 1.0), vec2(1.0, 1.0), vec2(-1.0, -1.0), vec2(1.0, -1.0));
+
+layout (location = 0) out vec4 posPos;
+
+#ifdef VULKAN
+
+#define BINDING_COLOR_TEXTURE 0
+#define VERTEX_ID gl_VertexIndex
+
+#else // ^^^ Vulkan ^^^ // vvv OpenGL vvv
+
+#define BINDING_COLOR_TEXTURE 0
+#define VERTEX_ID gl_VertexID
+
+#endif
+
+layout (binding = BINDING_COLOR_TEXTURE) uniform sampler2D input_texture;
+
+const float FXAA_SUBPIX_SHIFT = 0;
+
+void main() {
+ vec2 vertex = vertices[VERTEX_ID];
+ gl_Position = vec4(vertex, 0.0, 1.0);
+ vec2 vert_tex_coord = (vertex + 1.0) / 2.0;
+ posPos.xy = vert_tex_coord;
+ posPos.zw = vert_tex_coord - (0.5 + FXAA_SUBPIX_SHIFT) / textureSize(input_texture, 0);
+}
diff --git a/src/video_core/host_shaders/opengl_present_scaleforce.frag b/src/video_core/host_shaders/opengl_present_scaleforce.frag
new file mode 100644
index 000000000..71ff9e1e3
--- /dev/null
+++ b/src/video_core/host_shaders/opengl_present_scaleforce.frag
@@ -0,0 +1,130 @@
+// MIT License
+//
+// Copyright (c) 2020 BreadFish64
+//
+// Permission is hereby granted, free of charge, to any person obtaining a copy
+// of this software and associated documentation files (the "Software"), to deal
+// in the Software without restriction, including without limitation the rights
+// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+// copies of the Software, and to permit persons to whom the Software is
+// furnished to do so, subject to the following conditions:
+//
+// The above copyright notice and this permission notice shall be included in all
+// copies or substantial portions of the Software.
+//
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+// SOFTWARE.
+
+// Adapted from https://github.com/BreadFish64/ScaleFish/tree/master/scaleforce
+
+//! #version 460
+
+#extension GL_ARB_separate_shader_objects : enable
+
+#ifdef YUZU_USE_FP16
+
+#extension GL_AMD_gpu_shader_half_float : enable
+#extension GL_NV_gpu_shader5 : enable
+
+#define lfloat float16_t
+#define lvec2 f16vec2
+#define lvec3 f16vec3
+#define lvec4 f16vec4
+
+#else
+
+#define lfloat float
+#define lvec2 vec2
+#define lvec3 vec3
+#define lvec4 vec4
+
+#endif
+
+#ifdef VULKAN
+
+#define BINDING_COLOR_TEXTURE 1
+
+#else // ^^^ Vulkan ^^^ // vvv OpenGL vvv
+
+#define BINDING_COLOR_TEXTURE 0
+
+#endif
+
+layout (location = 0) in vec2 tex_coord;
+
+layout (location = 0) out vec4 frag_color;
+
+layout (binding = BINDING_COLOR_TEXTURE) uniform sampler2D input_texture;
+
+const bool ignore_alpha = true;
+
+lfloat ColorDist1(lvec4 a, lvec4 b) {
+ // https://en.wikipedia.org/wiki/YCbCr#ITU-R_BT.2020_conversion
+ const lvec3 K = lvec3(0.2627, 0.6780, 0.0593);
+ const lfloat scaleB = lfloat(0.5) / (lfloat(1.0) - K.b);
+ const lfloat scaleR = lfloat(0.5) / (lfloat(1.0) - K.r);
+ lvec4 diff = a - b;
+ lfloat Y = dot(diff.rgb, K);
+ lfloat Cb = scaleB * (diff.b - Y);
+ lfloat Cr = scaleR * (diff.r - Y);
+ lvec3 YCbCr = lvec3(Y, Cb, Cr);
+ lfloat d = length(YCbCr);
+ if (ignore_alpha) {
+ return d;
+ }
+ return sqrt(a.a * b.a * d * d + diff.a * diff.a);
+}
+
+lvec4 ColorDist(lvec4 ref, lvec4 A, lvec4 B, lvec4 C, lvec4 D) {
+ return lvec4(
+ ColorDist1(ref, A),
+ ColorDist1(ref, B),
+ ColorDist1(ref, C),
+ ColorDist1(ref, D)
+ );
+}
+
+vec4 Scaleforce(sampler2D tex, vec2 tex_coord) {
+ lvec4 bl = lvec4(textureOffset(tex, tex_coord, ivec2(-1, -1)));
+ lvec4 bc = lvec4(textureOffset(tex, tex_coord, ivec2(0, -1)));
+ lvec4 br = lvec4(textureOffset(tex, tex_coord, ivec2(1, -1)));
+ lvec4 cl = lvec4(textureOffset(tex, tex_coord, ivec2(-1, 0)));
+ lvec4 cc = lvec4(texture(tex, tex_coord));
+ lvec4 cr = lvec4(textureOffset(tex, tex_coord, ivec2(1, 0)));
+ lvec4 tl = lvec4(textureOffset(tex, tex_coord, ivec2(-1, 1)));
+ lvec4 tc = lvec4(textureOffset(tex, tex_coord, ivec2(0, 1)));
+ lvec4 tr = lvec4(textureOffset(tex, tex_coord, ivec2(1, 1)));
+
+ lvec4 offset_tl = ColorDist(cc, tl, tc, tr, cr);
+ lvec4 offset_br = ColorDist(cc, br, bc, bl, cl);
+
+ // Calculate how different cc is from the texels around it
+ const lfloat plus_weight = lfloat(1.5);
+ const lfloat cross_weight = lfloat(1.5);
+ lfloat total_dist = dot(offset_tl + offset_br, lvec4(cross_weight, plus_weight, cross_weight, plus_weight));
+
+ if (total_dist == lfloat(0.0)) {
+ return cc;
+ } else {
+ // Add together all the distances with direction taken into account
+ lvec4 tmp = offset_tl - offset_br;
+ lvec2 total_offset = tmp.wy * plus_weight + (tmp.zz + lvec2(-tmp.x, tmp.x)) * cross_weight;
+
+ // When the image has thin points, they tend to split apart.
+ // This is because the texels all around are different and total_offset reaches into clear areas.
+ // This works pretty well to keep the offset in bounds for these cases.
+ lfloat clamp_val = length(total_offset) / total_dist;
+ vec2 final_offset = vec2(clamp(total_offset, -clamp_val, clamp_val)) / textureSize(tex, 0);
+
+ return texture(tex, tex_coord - final_offset);
+ }
+}
+
+void main() {
+ frag_color = Scaleforce(input_texture, tex_coord);
+}
diff --git a/src/video_core/host_shaders/present_bicubic.frag b/src/video_core/host_shaders/present_bicubic.frag
new file mode 100644
index 000000000..902b70c2b
--- /dev/null
+++ b/src/video_core/host_shaders/present_bicubic.frag
@@ -0,0 +1,67 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#version 460 core
+
+#ifdef VULKAN
+
+#define BINDING_COLOR_TEXTURE 1
+
+#else // ^^^ Vulkan ^^^ // vvv OpenGL vvv
+
+#define BINDING_COLOR_TEXTURE 0
+
+#endif
+
+
+layout (location = 0) in vec2 frag_tex_coord;
+
+layout (location = 0) out vec4 color;
+
+layout (binding = BINDING_COLOR_TEXTURE) uniform sampler2D color_texture;
+
+vec4 cubic(float v) {
+ vec4 n = vec4(1.0, 2.0, 3.0, 4.0) - v;
+ vec4 s = n * n * n;
+ float x = s.x;
+ float y = s.y - 4.0 * s.x;
+ float z = s.z - 4.0 * s.y + 6.0 * s.x;
+ float w = 6.0 - x - y - z;
+ return vec4(x, y, z, w) * (1.0 / 6.0);
+}
+
+vec4 textureBicubic( sampler2D textureSampler, vec2 texCoords ) {
+
+ vec2 texSize = textureSize(textureSampler, 0);
+ vec2 invTexSize = 1.0 / texSize;
+
+ texCoords = texCoords * texSize - 0.5;
+
+ vec2 fxy = fract(texCoords);
+ texCoords -= fxy;
+
+ vec4 xcubic = cubic(fxy.x);
+ vec4 ycubic = cubic(fxy.y);
+
+ vec4 c = texCoords.xxyy + vec2(-0.5, +1.5).xyxy;
+
+ vec4 s = vec4(xcubic.xz + xcubic.yw, ycubic.xz + ycubic.yw);
+ vec4 offset = c + vec4(xcubic.yw, ycubic.yw) / s;
+
+ offset *= invTexSize.xxyy;
+
+ vec4 sample0 = texture(textureSampler, offset.xz);
+ vec4 sample1 = texture(textureSampler, offset.yz);
+ vec4 sample2 = texture(textureSampler, offset.xw);
+ vec4 sample3 = texture(textureSampler, offset.yw);
+
+ float sx = s.x / (s.x + s.y);
+ float sy = s.z / (s.z + s.w);
+
+ return mix(mix(sample3, sample2, sx), mix(sample1, sample0, sx), sy);
+}
+
+void main() {
+ color = vec4(textureBicubic(color_texture, frag_tex_coord).rgb, 1.0f);
+}
diff --git a/src/video_core/host_shaders/present_gaussian.frag b/src/video_core/host_shaders/present_gaussian.frag
new file mode 100644
index 000000000..66fed3238
--- /dev/null
+++ b/src/video_core/host_shaders/present_gaussian.frag
@@ -0,0 +1,70 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+// Code adapted from the following sources:
+// - https://learnopengl.com/Advanced-Lighting/Bloom
+// - https://www.rastergrid.com/blog/2010/09/efficient-gaussian-blur-with-linear-sampling/
+
+#version 460 core
+
+#ifdef VULKAN
+
+#define BINDING_COLOR_TEXTURE 1
+
+#else // ^^^ Vulkan ^^^ // vvv OpenGL vvv
+
+#define BINDING_COLOR_TEXTURE 0
+
+#endif
+
+layout(location = 0) in vec2 frag_tex_coord;
+
+layout(location = 0) out vec4 color;
+
+layout(binding = BINDING_COLOR_TEXTURE) uniform sampler2D color_texture;
+
+const float offset[3] = float[](0.0, 1.3846153846, 3.2307692308);
+const float weight[3] = float[](0.2270270270, 0.3162162162, 0.0702702703);
+
+vec4 blurVertical(sampler2D textureSampler, vec2 coord, vec2 norm) {
+ vec4 result = vec4(0.0f);
+ for (int i = 1; i < 3; i++) {
+ result += texture(textureSampler, vec2(coord) + (vec2(0.0, offset[i]) * norm)) * weight[i];
+ result += texture(textureSampler, vec2(coord) - (vec2(0.0, offset[i]) * norm)) * weight[i];
+ }
+ return result;
+}
+
+vec4 blurHorizontal(sampler2D textureSampler, vec2 coord, vec2 norm) {
+ vec4 result = vec4(0.0f);
+ for (int i = 1; i < 3; i++) {
+ result += texture(textureSampler, vec2(coord) + (vec2(offset[i], 0.0) * norm)) * weight[i];
+ result += texture(textureSampler, vec2(coord) - (vec2(offset[i], 0.0) * norm)) * weight[i];
+ }
+ return result;
+}
+
+vec4 blurDiagonal(sampler2D textureSampler, vec2 coord, vec2 norm) {
+ vec4 result = vec4(0.0f);
+ for (int i = 1; i < 3; i++) {
+ result +=
+ texture(textureSampler, vec2(coord) + (vec2(offset[i], offset[i]) * norm)) * weight[i];
+ result +=
+ texture(textureSampler, vec2(coord) - (vec2(offset[i], offset[i]) * norm)) * weight[i];
+ }
+ return result;
+}
+
+void main() {
+ vec3 base = texture(color_texture, vec2(frag_tex_coord)).rgb * weight[0];
+ vec2 tex_offset = 1.0f / textureSize(color_texture, 0);
+
+ // TODO(Blinkhawk): This code can be optimized through shader group instructions.
+ vec3 horizontal = blurHorizontal(color_texture, frag_tex_coord, tex_offset).rgb;
+ vec3 vertical = blurVertical(color_texture, frag_tex_coord, tex_offset).rgb;
+ vec3 diagonalA = blurDiagonal(color_texture, frag_tex_coord, tex_offset).rgb;
+ vec3 diagonalB = blurDiagonal(color_texture, frag_tex_coord, tex_offset * vec2(1.0, -1.0)).rgb;
+ vec3 combination = mix(mix(horizontal, vertical, 0.5f), mix(diagonalA, diagonalB, 0.5f), 0.5f);
+ color = vec4(combination + base, 1.0f);
+}
diff --git a/src/video_core/host_shaders/vulkan_fidelityfx_fsr_easu_fp16.comp b/src/video_core/host_shaders/vulkan_fidelityfx_fsr_easu_fp16.comp
new file mode 100644
index 000000000..1c96a7905
--- /dev/null
+++ b/src/video_core/host_shaders/vulkan_fidelityfx_fsr_easu_fp16.comp
@@ -0,0 +1,11 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#version 460 core
+#extension GL_GOOGLE_include_directive : enable
+
+#define YUZU_USE_FP16
+#define USE_EASU 1
+
+#include "fidelityfx_fsr.comp"
diff --git a/src/video_core/host_shaders/vulkan_fidelityfx_fsr_easu_fp32.comp b/src/video_core/host_shaders/vulkan_fidelityfx_fsr_easu_fp32.comp
new file mode 100644
index 000000000..f4daff739
--- /dev/null
+++ b/src/video_core/host_shaders/vulkan_fidelityfx_fsr_easu_fp32.comp
@@ -0,0 +1,10 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#version 460 core
+#extension GL_GOOGLE_include_directive : enable
+
+#define USE_EASU 1
+
+#include "fidelityfx_fsr.comp"
diff --git a/src/video_core/host_shaders/vulkan_fidelityfx_fsr_rcas_fp16.comp b/src/video_core/host_shaders/vulkan_fidelityfx_fsr_rcas_fp16.comp
new file mode 100644
index 000000000..6b6796dd1
--- /dev/null
+++ b/src/video_core/host_shaders/vulkan_fidelityfx_fsr_rcas_fp16.comp
@@ -0,0 +1,11 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#version 460 core
+#extension GL_GOOGLE_include_directive : enable
+
+#define YUZU_USE_FP16
+#define USE_RCAS 1
+
+#include "fidelityfx_fsr.comp"
diff --git a/src/video_core/host_shaders/vulkan_fidelityfx_fsr_rcas_fp32.comp b/src/video_core/host_shaders/vulkan_fidelityfx_fsr_rcas_fp32.comp
new file mode 100644
index 000000000..f785eebf3
--- /dev/null
+++ b/src/video_core/host_shaders/vulkan_fidelityfx_fsr_rcas_fp32.comp
@@ -0,0 +1,10 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#version 460 core
+#extension GL_GOOGLE_include_directive : enable
+
+#define USE_RCAS 1
+
+#include "fidelityfx_fsr.comp"
diff --git a/src/video_core/host_shaders/vulkan_present_scaleforce_fp16.frag b/src/video_core/host_shaders/vulkan_present_scaleforce_fp16.frag
new file mode 100644
index 000000000..924c03060
--- /dev/null
+++ b/src/video_core/host_shaders/vulkan_present_scaleforce_fp16.frag
@@ -0,0 +1,7 @@
+#version 460
+
+#extension GL_GOOGLE_include_directive : enable
+
+#define YUZU_USE_FP16
+
+#include "opengl_present_scaleforce.frag"
diff --git a/src/video_core/host_shaders/vulkan_present_scaleforce_fp32.frag b/src/video_core/host_shaders/vulkan_present_scaleforce_fp32.frag
new file mode 100644
index 000000000..a594b83ca
--- /dev/null
+++ b/src/video_core/host_shaders/vulkan_present_scaleforce_fp32.frag
@@ -0,0 +1,5 @@
+#version 460
+
+#extension GL_GOOGLE_include_directive : enable
+
+#include "opengl_present_scaleforce.frag"
diff --git a/src/video_core/renderer_opengl/gl_buffer_cache.cpp b/src/video_core/renderer_opengl/gl_buffer_cache.cpp
index 187a28e4d..d4dd10bb6 100644
--- a/src/video_core/renderer_opengl/gl_buffer_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_buffer_cache.cpp
@@ -5,6 +5,7 @@
#include <algorithm>
#include <span>
+#include "shader_recompiler/backend/glasm/emit_glasm.h"
#include "video_core/buffer_cache/buffer_cache.h"
#include "video_core/renderer_opengl/gl_buffer_cache.h"
#include "video_core/renderer_opengl/gl_device.h"
@@ -229,8 +230,10 @@ void BufferCacheRuntime::BindStorageBuffer(size_t stage, u32 binding_index, Buff
.padding = 0,
};
buffer.MakeResident(is_written ? GL_READ_WRITE : GL_READ_ONLY);
- glProgramLocalParametersI4uivNV(PROGRAM_LUT[stage], binding_index, 1,
- reinterpret_cast<const GLuint*>(&ssbo));
+ glProgramLocalParametersI4uivNV(
+ PROGRAM_LUT[stage],
+ Shader::Backend::GLASM::PROGRAM_LOCAL_PARAMETER_STORAGE_BUFFER_BASE + binding_index, 1,
+ reinterpret_cast<const GLuint*>(&ssbo));
}
}
@@ -250,8 +253,10 @@ void BufferCacheRuntime::BindComputeStorageBuffer(u32 binding_index, Buffer& buf
.padding = 0,
};
buffer.MakeResident(is_written ? GL_READ_WRITE : GL_READ_ONLY);
- glProgramLocalParametersI4uivNV(GL_COMPUTE_PROGRAM_NV, binding_index, 1,
- reinterpret_cast<const GLuint*>(&ssbo));
+ glProgramLocalParametersI4uivNV(
+ GL_COMPUTE_PROGRAM_NV,
+ Shader::Backend::GLASM::PROGRAM_LOCAL_PARAMETER_STORAGE_BUFFER_BASE + binding_index, 1,
+ reinterpret_cast<const GLuint*>(&ssbo));
}
}
diff --git a/src/video_core/renderer_opengl/gl_compute_pipeline.cpp b/src/video_core/renderer_opengl/gl_compute_pipeline.cpp
index aa1cc592f..5c1f21c65 100644
--- a/src/video_core/renderer_opengl/gl_compute_pipeline.cpp
+++ b/src/video_core/renderer_opengl/gl_compute_pipeline.cpp
@@ -19,15 +19,6 @@ using VideoCommon::ImageId;
constexpr u32 MAX_TEXTURES = 64;
constexpr u32 MAX_IMAGES = 16;
-template <typename Range>
-u32 AccumulateCount(const Range& range) {
- u32 num{};
- for (const auto& desc : range) {
- num += desc.count;
- }
- return num;
-}
-
size_t ComputePipelineKey::Hash() const noexcept {
return static_cast<size_t>(
Common::CityHash64(reinterpret_cast<const char*>(this), sizeof *this));
@@ -58,17 +49,17 @@ ComputePipeline::ComputePipeline(const Device& device, TextureCache& texture_cac
std::copy_n(info.constant_buffer_used_sizes.begin(), uniform_buffer_sizes.size(),
uniform_buffer_sizes.begin());
- num_texture_buffers = AccumulateCount(info.texture_buffer_descriptors);
- num_image_buffers = AccumulateCount(info.image_buffer_descriptors);
+ num_texture_buffers = Shader::NumDescriptors(info.texture_buffer_descriptors);
+ num_image_buffers = Shader::NumDescriptors(info.image_buffer_descriptors);
- const u32 num_textures{num_texture_buffers + AccumulateCount(info.texture_descriptors)};
+ const u32 num_textures{num_texture_buffers + Shader::NumDescriptors(info.texture_descriptors)};
ASSERT(num_textures <= MAX_TEXTURES);
- const u32 num_images{num_image_buffers + AccumulateCount(info.image_descriptors)};
+ const u32 num_images{num_image_buffers + Shader::NumDescriptors(info.image_descriptors)};
ASSERT(num_images <= MAX_IMAGES);
const bool is_glasm{assembly_program.handle != 0};
- const u32 num_storage_buffers{AccumulateCount(info.storage_buffers_descriptors)};
+ const u32 num_storage_buffers{Shader::NumDescriptors(info.storage_buffers_descriptors)};
use_storage_buffers =
!is_glasm || num_storage_buffers < device.GetMaxGLASMStorageBufferBlocks();
writes_global_memory = !use_storage_buffers &&
@@ -88,8 +79,7 @@ void ComputePipeline::Configure() {
}
texture_cache.SynchronizeComputeDescriptors();
- std::array<ImageViewId, MAX_TEXTURES + MAX_IMAGES> image_view_ids;
- boost::container::static_vector<u32, MAX_TEXTURES + MAX_IMAGES> image_view_indices;
+ boost::container::static_vector<VideoCommon::ImageViewInOut, MAX_TEXTURES + MAX_IMAGES> views;
std::array<GLuint, MAX_TEXTURES> samplers;
std::array<GLuint, MAX_TEXTURES> textures;
std::array<GLuint, MAX_IMAGES> images;
@@ -119,33 +109,39 @@ void ComputePipeline::Configure() {
}
return TexturePair(gpu_memory.Read<u32>(addr), via_header_index);
}};
- const auto add_image{[&](const auto& desc) {
+ const auto add_image{[&](const auto& desc, bool blacklist) {
for (u32 index = 0; index < desc.count; ++index) {
const auto handle{read_handle(desc, index)};
- image_view_indices.push_back(handle.first);
+ views.push_back({
+ .index = handle.first,
+ .blacklist = blacklist,
+ .id = {},
+ });
}
}};
for (const auto& desc : info.texture_buffer_descriptors) {
for (u32 index = 0; index < desc.count; ++index) {
const auto handle{read_handle(desc, index)};
- image_view_indices.push_back(handle.first);
+ views.push_back({handle.first});
samplers[sampler_binding++] = 0;
}
}
- std::ranges::for_each(info.image_buffer_descriptors, add_image);
+ for (const auto& desc : info.image_buffer_descriptors) {
+ add_image(desc, false);
+ }
for (const auto& desc : info.texture_descriptors) {
for (u32 index = 0; index < desc.count; ++index) {
const auto handle{read_handle(desc, index)};
- image_view_indices.push_back(handle.first);
+ views.push_back({handle.first});
Sampler* const sampler = texture_cache.GetComputeSampler(handle.second);
samplers[sampler_binding++] = sampler->Handle();
}
}
- std::ranges::for_each(info.image_descriptors, add_image);
-
- const std::span indices_span(image_view_indices.data(), image_view_indices.size());
- texture_cache.FillComputeImageViews(indices_span, image_view_ids);
+ for (const auto& desc : info.image_descriptors) {
+ add_image(desc, desc.is_written);
+ }
+ texture_cache.FillComputeImageViews(std::span(views.data(), views.size()));
if (assembly_program.handle != 0) {
program_manager.BindComputeAssemblyProgram(assembly_program.handle);
@@ -161,7 +157,7 @@ void ComputePipeline::Configure() {
if constexpr (is_image) {
is_written = desc.is_written;
}
- ImageView& image_view{texture_cache.GetImageView(image_view_ids[texbuf_index])};
+ ImageView& image_view{texture_cache.GetImageView(views[texbuf_index].id)};
buffer_cache.BindComputeTextureBuffer(texbuf_index, image_view.GpuAddr(),
image_view.BufferSize(), image_view.format,
is_written, is_image);
@@ -177,23 +173,45 @@ void ComputePipeline::Configure() {
buffer_cache.runtime.SetImagePointers(textures.data(), images.data());
buffer_cache.BindHostComputeBuffers();
- const ImageId* views_it{image_view_ids.data() + num_texture_buffers + num_image_buffers};
+ const VideoCommon::ImageViewInOut* views_it{views.data() + num_texture_buffers +
+ num_image_buffers};
texture_binding += num_texture_buffers;
image_binding += num_image_buffers;
+ u32 texture_scaling_mask{};
for (const auto& desc : info.texture_descriptors) {
for (u32 index = 0; index < desc.count; ++index) {
- ImageView& image_view{texture_cache.GetImageView(*(views_it++))};
- textures[texture_binding++] = image_view.Handle(desc.type);
+ ImageView& image_view{texture_cache.GetImageView((views_it++)->id)};
+ textures[texture_binding] = image_view.Handle(desc.type);
+ if (texture_cache.IsRescaling(image_view)) {
+ texture_scaling_mask |= 1u << texture_binding;
+ }
+ ++texture_binding;
}
}
+ u32 image_scaling_mask{};
for (const auto& desc : info.image_descriptors) {
for (u32 index = 0; index < desc.count; ++index) {
- ImageView& image_view{texture_cache.GetImageView(*(views_it++))};
+ ImageView& image_view{texture_cache.GetImageView((views_it++)->id)};
if (desc.is_written) {
texture_cache.MarkModification(image_view.image_id);
}
- images[image_binding++] = image_view.StorageView(desc.type, desc.format);
+ images[image_binding] = image_view.StorageView(desc.type, desc.format);
+ if (texture_cache.IsRescaling(image_view)) {
+ image_scaling_mask |= 1u << image_binding;
+ }
+ ++image_binding;
+ }
+ }
+ if (info.uses_rescaling_uniform) {
+ const f32 float_texture_scaling_mask{Common::BitCast<f32>(texture_scaling_mask)};
+ const f32 float_image_scaling_mask{Common::BitCast<f32>(image_scaling_mask)};
+ if (assembly_program.handle != 0) {
+ glProgramLocalParameter4fARB(GL_COMPUTE_PROGRAM_NV, 0, float_texture_scaling_mask,
+ float_image_scaling_mask, 0.0f, 0.0f);
+ } else {
+ glProgramUniform4f(source_program.handle, 0, float_texture_scaling_mask,
+ float_image_scaling_mask, 0.0f, 0.0f);
}
}
if (texture_binding != 0) {
diff --git a/src/video_core/renderer_opengl/gl_graphics_pipeline.cpp b/src/video_core/renderer_opengl/gl_graphics_pipeline.cpp
index bccb37a58..f8495896c 100644
--- a/src/video_core/renderer_opengl/gl_graphics_pipeline.cpp
+++ b/src/video_core/renderer_opengl/gl_graphics_pipeline.cpp
@@ -15,7 +15,7 @@
#include "video_core/renderer_opengl/gl_shader_util.h"
#include "video_core/renderer_opengl/gl_state_tracker.h"
#include "video_core/shader_notify.h"
-#include "video_core/texture_cache/texture_cache_base.h"
+#include "video_core/texture_cache/texture_cache.h"
#if defined(_MSC_VER) && defined(NDEBUG)
#define LAMBDA_FORCEINLINE [[msvc::forceinline]]
@@ -27,6 +27,7 @@ namespace OpenGL {
namespace {
using Shader::ImageBufferDescriptor;
using Shader::ImageDescriptor;
+using Shader::NumDescriptors;
using Shader::TextureBufferDescriptor;
using Shader::TextureDescriptor;
using Tegra::Texture::TexturePair;
@@ -35,15 +36,6 @@ using VideoCommon::ImageId;
constexpr u32 MAX_TEXTURES = 64;
constexpr u32 MAX_IMAGES = 8;
-template <typename Range>
-u32 AccumulateCount(const Range& range) {
- u32 num{};
- for (const auto& desc : range) {
- num += desc.count;
- }
- return num;
-}
-
GLenum Stage(size_t stage_index) {
switch (stage_index) {
case 0:
@@ -204,23 +196,23 @@ GraphicsPipeline::GraphicsPipeline(
base_uniform_bindings[stage + 1] = base_uniform_bindings[stage];
base_storage_bindings[stage + 1] = base_storage_bindings[stage];
- base_uniform_bindings[stage + 1] += AccumulateCount(info.constant_buffer_descriptors);
- base_storage_bindings[stage + 1] += AccumulateCount(info.storage_buffers_descriptors);
+ base_uniform_bindings[stage + 1] += NumDescriptors(info.constant_buffer_descriptors);
+ base_storage_bindings[stage + 1] += NumDescriptors(info.storage_buffers_descriptors);
}
enabled_uniform_buffer_masks[stage] = info.constant_buffer_mask;
std::ranges::copy(info.constant_buffer_used_sizes, uniform_buffer_sizes[stage].begin());
- const u32 num_tex_buffer_bindings{AccumulateCount(info.texture_buffer_descriptors)};
+ const u32 num_tex_buffer_bindings{NumDescriptors(info.texture_buffer_descriptors)};
num_texture_buffers[stage] += num_tex_buffer_bindings;
num_textures += num_tex_buffer_bindings;
- const u32 num_img_buffers_bindings{AccumulateCount(info.image_buffer_descriptors)};
+ const u32 num_img_buffers_bindings{NumDescriptors(info.image_buffer_descriptors)};
num_image_buffers[stage] += num_img_buffers_bindings;
num_images += num_img_buffers_bindings;
- num_textures += AccumulateCount(info.texture_descriptors);
- num_images += AccumulateCount(info.image_descriptors);
- num_storage_buffers += AccumulateCount(info.storage_buffers_descriptors);
+ num_textures += NumDescriptors(info.texture_descriptors);
+ num_images += NumDescriptors(info.image_descriptors);
+ num_storage_buffers += NumDescriptors(info.storage_buffers_descriptors);
writes_global_memory |= std::ranges::any_of(
info.storage_buffers_descriptors, [](const auto& desc) { return desc.is_written; });
@@ -288,10 +280,9 @@ GraphicsPipeline::GraphicsPipeline(
template <typename Spec>
void GraphicsPipeline::ConfigureImpl(bool is_indexed) {
- std::array<ImageId, MAX_TEXTURES + MAX_IMAGES> image_view_ids;
- std::array<u32, MAX_TEXTURES + MAX_IMAGES> image_view_indices;
+ std::array<VideoCommon::ImageViewInOut, MAX_TEXTURES + MAX_IMAGES> views;
std::array<GLuint, MAX_TEXTURES> samplers;
- size_t image_view_index{};
+ size_t views_index{};
GLsizei sampler_binding{};
texture_cache.SynchronizeGraphicsDescriptors();
@@ -336,30 +327,34 @@ void GraphicsPipeline::ConfigureImpl(bool is_indexed) {
}
return TexturePair(gpu_memory.Read<u32>(addr), via_header_index);
}};
- const auto add_image{[&](const auto& desc) {
+ const auto add_image{[&](const auto& desc, bool blacklist) LAMBDA_FORCEINLINE {
for (u32 index = 0; index < desc.count; ++index) {
const auto handle{read_handle(desc, index)};
- image_view_indices[image_view_index++] = handle.first;
+ views[views_index++] = {
+ .index = handle.first,
+ .blacklist = blacklist,
+ .id = {},
+ };
}
}};
if constexpr (Spec::has_texture_buffers) {
for (const auto& desc : info.texture_buffer_descriptors) {
for (u32 index = 0; index < desc.count; ++index) {
const auto handle{read_handle(desc, index)};
- image_view_indices[image_view_index++] = handle.first;
+ views[views_index++] = {handle.first};
samplers[sampler_binding++] = 0;
}
}
}
if constexpr (Spec::has_image_buffers) {
for (const auto& desc : info.image_buffer_descriptors) {
- add_image(desc);
+ add_image(desc, false);
}
}
for (const auto& desc : info.texture_descriptors) {
for (u32 index = 0; index < desc.count; ++index) {
const auto handle{read_handle(desc, index)};
- image_view_indices[image_view_index++] = handle.first;
+ views[views_index++] = {handle.first};
Sampler* const sampler{texture_cache.GetGraphicsSampler(handle.second)};
samplers[sampler_binding++] = sampler->Handle();
@@ -367,7 +362,7 @@ void GraphicsPipeline::ConfigureImpl(bool is_indexed) {
}
if constexpr (Spec::has_images) {
for (const auto& desc : info.image_descriptors) {
- add_image(desc);
+ add_image(desc, desc.is_written);
}
}
}};
@@ -386,13 +381,12 @@ void GraphicsPipeline::ConfigureImpl(bool is_indexed) {
if constexpr (Spec::enabled_stages[4]) {
config_stage(4);
}
- const std::span indices_span(image_view_indices.data(), image_view_index);
- texture_cache.FillGraphicsImageViews(indices_span, image_view_ids);
+ texture_cache.FillGraphicsImageViews<Spec::has_images>(std::span(views.data(), views_index));
texture_cache.UpdateRenderTargets(false);
state_tracker.BindFramebuffer(texture_cache.GetFramebuffer()->Handle());
- ImageId* texture_buffer_index{image_view_ids.data()};
+ VideoCommon::ImageViewInOut* texture_buffer_it{views.data()};
const auto bind_stage_info{[&](size_t stage) LAMBDA_FORCEINLINE {
size_t index{};
const auto add_buffer{[&](const auto& desc) {
@@ -402,12 +396,12 @@ void GraphicsPipeline::ConfigureImpl(bool is_indexed) {
if constexpr (is_image) {
is_written = desc.is_written;
}
- ImageView& image_view{texture_cache.GetImageView(*texture_buffer_index)};
+ ImageView& image_view{texture_cache.GetImageView(texture_buffer_it->id)};
buffer_cache.BindGraphicsTextureBuffer(stage, index, image_view.GpuAddr(),
image_view.BufferSize(), image_view.format,
is_written, is_image);
++index;
- ++texture_buffer_index;
+ ++texture_buffer_it;
}
}};
const Shader::Info& info{stage_infos[stage]};
@@ -423,13 +417,9 @@ void GraphicsPipeline::ConfigureImpl(bool is_indexed) {
add_buffer(desc);
}
}
- for (const auto& desc : info.texture_descriptors) {
- texture_buffer_index += desc.count;
- }
+ texture_buffer_it += Shader::NumDescriptors(info.texture_descriptors);
if constexpr (Spec::has_images) {
- for (const auto& desc : info.image_descriptors) {
- texture_buffer_index += desc.count;
- }
+ texture_buffer_it += Shader::NumDescriptors(info.image_descriptors);
}
}};
if constexpr (Spec::enabled_stages[0]) {
@@ -453,12 +443,13 @@ void GraphicsPipeline::ConfigureImpl(bool is_indexed) {
if (!is_built.load(std::memory_order::relaxed)) {
WaitForBuild();
}
- if (assembly_programs[0].handle != 0) {
+ const bool use_assembly{assembly_programs[0].handle != 0};
+ if (use_assembly) {
program_manager.BindAssemblyPrograms(assembly_programs, enabled_stages_mask);
} else {
program_manager.BindSourcePrograms(source_programs);
}
- const ImageId* views_it{image_view_ids.data()};
+ const VideoCommon::ImageViewInOut* views_it{views.data()};
GLsizei texture_binding = 0;
GLsizei image_binding = 0;
std::array<GLuint, MAX_TEXTURES> textures;
@@ -473,20 +464,49 @@ void GraphicsPipeline::ConfigureImpl(bool is_indexed) {
views_it += num_texture_buffers[stage];
views_it += num_image_buffers[stage];
+ u32 texture_scaling_mask{};
+ u32 image_scaling_mask{};
+ u32 stage_texture_binding{};
+ u32 stage_image_binding{};
+
const auto& info{stage_infos[stage]};
for (const auto& desc : info.texture_descriptors) {
for (u32 index = 0; index < desc.count; ++index) {
- ImageView& image_view{texture_cache.GetImageView(*(views_it++))};
- textures[texture_binding++] = image_view.Handle(desc.type);
+ ImageView& image_view{texture_cache.GetImageView((views_it++)->id)};
+ textures[texture_binding] = image_view.Handle(desc.type);
+ if (texture_cache.IsRescaling(image_view)) {
+ texture_scaling_mask |= 1u << stage_texture_binding;
+ }
+ ++texture_binding;
+ ++stage_texture_binding;
}
}
for (const auto& desc : info.image_descriptors) {
for (u32 index = 0; index < desc.count; ++index) {
- ImageView& image_view{texture_cache.GetImageView(*(views_it++))};
+ ImageView& image_view{texture_cache.GetImageView((views_it++)->id)};
if (desc.is_written) {
texture_cache.MarkModification(image_view.image_id);
}
- images[image_binding++] = image_view.StorageView(desc.type, desc.format);
+ images[image_binding] = image_view.StorageView(desc.type, desc.format);
+ if (texture_cache.IsRescaling(image_view)) {
+ image_scaling_mask |= 1u << stage_image_binding;
+ }
+ ++image_binding;
+ ++stage_image_binding;
+ }
+ }
+ if (info.uses_rescaling_uniform) {
+ const f32 float_texture_scaling_mask{Common::BitCast<f32>(texture_scaling_mask)};
+ const f32 float_image_scaling_mask{Common::BitCast<f32>(image_scaling_mask)};
+ const bool is_rescaling{texture_cache.IsRescaling()};
+ const f32 config_down_factor{Settings::values.resolution_info.down_factor};
+ const f32 down_factor{is_rescaling ? config_down_factor : 1.0f};
+ if (use_assembly) {
+ glProgramLocalParameter4fARB(AssemblyStage(stage), 0, float_texture_scaling_mask,
+ float_image_scaling_mask, down_factor, 0.0f);
+ } else {
+ glProgramUniform4f(source_programs[stage].handle, 0, float_texture_scaling_mask,
+ float_image_scaling_mask, down_factor, 0.0f);
}
}
}};
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp
index a6d9f7c43..9b516c64f 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer.cpp
+++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp
@@ -184,6 +184,10 @@ void RasterizerOpenGL::Clear() {
SyncRasterizeEnable();
SyncStencilTestState();
+ std::scoped_lock lock{texture_cache.mutex};
+ texture_cache.UpdateRenderTargets(true);
+ state_tracker.BindFramebuffer(texture_cache.GetFramebuffer()->Handle());
+ SyncViewport();
if (regs.clear_flags.scissor) {
SyncScissorTest();
} else {
@@ -192,10 +196,6 @@ void RasterizerOpenGL::Clear() {
}
UNIMPLEMENTED_IF(regs.clear_flags.viewport);
- std::scoped_lock lock{texture_cache.mutex};
- texture_cache.UpdateRenderTargets(true);
- state_tracker.BindFramebuffer(texture_cache.GetFramebuffer()->Handle());
-
if (use_color) {
glClearBufferfv(GL_COLOR, regs.clear_buffers.RT, regs.clear_color);
}
@@ -214,8 +214,6 @@ void RasterizerOpenGL::Draw(bool is_indexed, bool is_instanced) {
query_cache.UpdateCounters();
- SyncState();
-
GraphicsPipeline* const pipeline{shader_cache.CurrentGraphicsPipeline()};
if (!pipeline) {
return;
@@ -223,6 +221,8 @@ void RasterizerOpenGL::Draw(bool is_indexed, bool is_instanced) {
std::scoped_lock lock{buffer_cache.mutex, texture_cache.mutex};
pipeline->Configure(is_indexed);
+ SyncState();
+
const GLenum primitive_mode = MaxwellToGL::PrimitiveTopology(maxwell3d.regs.draw.topology);
BeginTransformFeedback(pipeline, primitive_mode);
@@ -533,7 +533,8 @@ void RasterizerOpenGL::SyncViewport() {
auto& flags = maxwell3d.dirty.flags;
const auto& regs = maxwell3d.regs;
- const bool dirty_viewport = flags[Dirty::Viewports];
+ const bool rescale_viewports = flags[VideoCommon::Dirty::RescaleViewports];
+ const bool dirty_viewport = flags[Dirty::Viewports] || rescale_viewports;
const bool dirty_clip_control = flags[Dirty::ClipControl];
if (dirty_clip_control || flags[Dirty::FrontFace]) {
@@ -553,8 +554,7 @@ void RasterizerOpenGL::SyncViewport() {
}
glFrontFace(mode);
}
-
- if (dirty_viewport || flags[Dirty::ClipControl]) {
+ if (dirty_viewport || dirty_clip_control) {
flags[Dirty::ClipControl] = false;
bool flip_y = false;
@@ -570,37 +570,58 @@ void RasterizerOpenGL::SyncViewport() {
state_tracker.ClipControl(origin, depth);
state_tracker.SetYNegate(regs.screen_y_control.y_negate != 0);
}
+ const bool is_rescaling{texture_cache.IsRescaling()};
+ const float scale = is_rescaling ? Settings::values.resolution_info.up_factor : 1.0f;
+ const auto conv = [scale](float value) -> GLfloat {
+ float new_value = value * scale;
+ if (scale < 1.0f) {
+ const bool sign = std::signbit(value);
+ new_value = std::round(std::abs(new_value));
+ new_value = sign ? -new_value : new_value;
+ }
+ return static_cast<GLfloat>(new_value);
+ };
if (dirty_viewport) {
flags[Dirty::Viewports] = false;
- const bool force = flags[Dirty::ViewportTransform];
+ const bool force = flags[Dirty::ViewportTransform] || rescale_viewports;
flags[Dirty::ViewportTransform] = false;
+ flags[VideoCommon::Dirty::RescaleViewports] = false;
- for (std::size_t i = 0; i < Maxwell::NumViewports; ++i) {
- if (!force && !flags[Dirty::Viewport0 + i]) {
+ for (size_t index = 0; index < Maxwell::NumViewports; ++index) {
+ if (!force && !flags[Dirty::Viewport0 + index]) {
continue;
}
- flags[Dirty::Viewport0 + i] = false;
+ flags[Dirty::Viewport0 + index] = false;
- const auto& src = regs.viewport_transform[i];
- const Common::Rectangle<f32> rect{src.GetRect()};
- glViewportIndexedf(static_cast<GLuint>(i), rect.left, rect.bottom, rect.GetWidth(),
- rect.GetHeight());
+ const auto& src = regs.viewport_transform[index];
+ GLfloat x = conv(src.translate_x - src.scale_x);
+ GLfloat y = conv(src.translate_y - src.scale_y);
+ GLfloat width = conv(src.scale_x * 2.0f);
+ GLfloat height = conv(src.scale_y * 2.0f);
+
+ if (height < 0) {
+ y += height;
+ height = -height;
+ }
+ glViewportIndexedf(static_cast<GLuint>(index), x, y, width != 0.0f ? width : 1.0f,
+ height != 0.0f ? height : 1.0f);
const GLdouble reduce_z = regs.depth_mode == Maxwell::DepthMode::MinusOneToOne;
const GLdouble near_depth = src.translate_z - src.scale_z * reduce_z;
const GLdouble far_depth = src.translate_z + src.scale_z;
if (device.HasDepthBufferFloat()) {
- glDepthRangeIndexeddNV(static_cast<GLuint>(i), near_depth, far_depth);
+ glDepthRangeIndexeddNV(static_cast<GLuint>(index), near_depth, far_depth);
} else {
- glDepthRangeIndexed(static_cast<GLuint>(i), near_depth, far_depth);
+ glDepthRangeIndexed(static_cast<GLuint>(index), near_depth, far_depth);
}
if (!GLAD_GL_NV_viewport_swizzle) {
continue;
}
- glViewportSwizzleNV(static_cast<GLuint>(i), MaxwellToGL::ViewportSwizzle(src.swizzle.x),
+ glViewportSwizzleNV(static_cast<GLuint>(index),
+ MaxwellToGL::ViewportSwizzle(src.swizzle.x),
MaxwellToGL::ViewportSwizzle(src.swizzle.y),
MaxwellToGL::ViewportSwizzle(src.swizzle.z),
MaxwellToGL::ViewportSwizzle(src.swizzle.w));
@@ -903,14 +924,34 @@ void RasterizerOpenGL::SyncLogicOpState() {
void RasterizerOpenGL::SyncScissorTest() {
auto& flags = maxwell3d.dirty.flags;
- if (!flags[Dirty::Scissors]) {
+ if (!flags[Dirty::Scissors] && !flags[VideoCommon::Dirty::RescaleScissors]) {
return;
}
flags[Dirty::Scissors] = false;
+ const bool force = flags[VideoCommon::Dirty::RescaleScissors];
+ flags[VideoCommon::Dirty::RescaleScissors] = false;
+
const auto& regs = maxwell3d.regs;
+
+ const auto& resolution = Settings::values.resolution_info;
+ const bool is_rescaling{texture_cache.IsRescaling()};
+ const u32 up_scale = is_rescaling ? resolution.up_scale : 1U;
+ const u32 down_shift = is_rescaling ? resolution.down_shift : 0U;
+ const auto scale_up = [up_scale, down_shift](u32 value) -> u32 {
+ if (value == 0) {
+ return 0U;
+ }
+ const u32 upset = value * up_scale;
+ u32 acumm{};
+ if ((up_scale >> down_shift) == 0) {
+ acumm = upset % 2;
+ }
+ const u32 converted_value = upset >> down_shift;
+ return std::max<u32>(converted_value + acumm, 1U);
+ };
for (std::size_t index = 0; index < Maxwell::NumViewports; ++index) {
- if (!flags[Dirty::Scissor0 + index]) {
+ if (!force && !flags[Dirty::Scissor0 + index]) {
continue;
}
flags[Dirty::Scissor0 + index] = false;
@@ -918,8 +959,8 @@ void RasterizerOpenGL::SyncScissorTest() {
const auto& src = regs.scissor_test[index];
if (src.enable) {
glEnablei(GL_SCISSOR_TEST, static_cast<GLuint>(index));
- glScissorIndexed(static_cast<GLuint>(index), src.min_x, src.min_y,
- src.max_x - src.min_x, src.max_y - src.min_y);
+ glScissorIndexed(static_cast<GLuint>(index), scale_up(src.min_x), scale_up(src.min_y),
+ scale_up(src.max_x - src.min_x), scale_up(src.max_y - src.min_y));
} else {
glDisablei(GL_SCISSOR_TEST, static_cast<GLuint>(index));
}
@@ -935,8 +976,9 @@ void RasterizerOpenGL::SyncPointState() {
oglEnable(GL_POINT_SPRITE, maxwell3d.regs.point_sprite_enable);
oglEnable(GL_PROGRAM_POINT_SIZE, maxwell3d.regs.vp_point_size.enable);
-
- glPointSize(std::max(1.0f, maxwell3d.regs.point_size));
+ const bool is_rescaling{texture_cache.IsRescaling()};
+ const float scale = is_rescaling ? Settings::values.resolution_info.up_factor : 1.0f;
+ glPointSize(std::max(1.0f, maxwell3d.regs.point_size * scale));
}
void RasterizerOpenGL::SyncLineState() {
diff --git a/src/video_core/renderer_opengl/gl_resource_manager.cpp b/src/video_core/renderer_opengl/gl_resource_manager.cpp
index 8695c29e3..5e7101d28 100644
--- a/src/video_core/renderer_opengl/gl_resource_manager.cpp
+++ b/src/video_core/renderer_opengl/gl_resource_manager.cpp
@@ -166,7 +166,12 @@ void OGLFramebuffer::Create() {
return;
MICROPROFILE_SCOPE(OpenGL_ResourceCreation);
+ // Bind to READ_FRAMEBUFFER to stop Nvidia's driver from creating an EXT_framebuffer instead of
+ // a core framebuffer. EXT framebuffer attachments have to match in size and can be shared
+ // across contexts. yuzu doesn't share framebuffers across contexts and we need attachments with
+ // mismatching size, this is why core framebuffers are preferred.
glGenFramebuffers(1, &handle);
+ glBindFramebuffer(GL_READ_FRAMEBUFFER, handle);
}
void OGLFramebuffer::Release() {
diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp
index 02682bd76..42ef67628 100644
--- a/src/video_core/renderer_opengl/gl_shader_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp
@@ -426,16 +426,14 @@ std::unique_ptr<GraphicsPipeline> ShaderCache::CreateGraphicsPipeline(
// Normal path
programs[index] = TranslateProgram(pools.inst, pools.block, env, cfg, host_info);
- for (const auto& desc : programs[index].info.storage_buffers_descriptors) {
- total_storage_buffers += desc.count;
- }
+ total_storage_buffers +=
+ Shader::NumDescriptors(programs[index].info.storage_buffers_descriptors);
} else {
// VertexB path when VertexA is present.
auto& program_va{programs[0]};
auto program_vb{TranslateProgram(pools.inst, pools.block, env, cfg, host_info)};
- for (const auto& desc : program_vb.info.storage_buffers_descriptors) {
- total_storage_buffers += desc.count;
- }
+ total_storage_buffers +=
+ Shader::NumDescriptors(program_vb.info.storage_buffers_descriptors);
programs[index] = MergeDualVertexPrograms(program_va, program_vb, env);
}
}
@@ -510,10 +508,7 @@ std::unique_ptr<ComputePipeline> ShaderCache::CreateComputePipeline(
Shader::Maxwell::Flow::CFG cfg{env, pools.flow_block, env.StartAddress()};
auto program{TranslateProgram(pools.inst, pools.block, env, cfg, host_info)};
- u32 num_storage_buffers{};
- for (const auto& desc : program.info.storage_buffers_descriptors) {
- num_storage_buffers += desc.count;
- }
+ const u32 num_storage_buffers{Shader::NumDescriptors(program.info.storage_buffers_descriptors)};
Shader::RuntimeInfo info;
info.glasm_use_storage_buffers = num_storage_buffers <= device.GetMaxGLASMStorageBufferBlocks();
diff --git a/src/video_core/renderer_opengl/gl_texture_cache.cpp b/src/video_core/renderer_opengl/gl_texture_cache.cpp
index 8c3ca3d82..2f7d98d8b 100644
--- a/src/video_core/renderer_opengl/gl_texture_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_texture_cache.cpp
@@ -9,8 +9,8 @@
#include <glad/glad.h>
+#include "common/literals.h"
#include "common/settings.h"
-
#include "video_core/renderer_opengl/gl_device.h"
#include "video_core/renderer_opengl/gl_shader_manager.h"
#include "video_core/renderer_opengl/gl_state_tracker.h"
@@ -42,6 +42,7 @@ using VideoCore::Surface::IsPixelFormatSRGB;
using VideoCore::Surface::MaxPixelFormat;
using VideoCore::Surface::PixelFormat;
using VideoCore::Surface::SurfaceType;
+using namespace Common::Literals;
struct CopyOrigin {
GLint level;
@@ -316,6 +317,52 @@ void AttachTexture(GLuint fbo, GLenum attachment, const ImageView* image_view) {
}
}
+OGLTexture MakeImage(const VideoCommon::ImageInfo& info, GLenum gl_internal_format) {
+ const GLenum target = ImageTarget(info);
+ const GLsizei width = info.size.width;
+ const GLsizei height = info.size.height;
+ const GLsizei depth = info.size.depth;
+ const int max_host_mip_levels = std::bit_width(info.size.width);
+ const GLsizei num_levels = std::min(info.resources.levels, max_host_mip_levels);
+ const GLsizei num_layers = info.resources.layers;
+ const GLsizei num_samples = info.num_samples;
+
+ GLuint handle = 0;
+ OGLTexture texture;
+ if (target != GL_TEXTURE_BUFFER) {
+ texture.Create(target);
+ handle = texture.handle;
+ }
+ switch (target) {
+ case GL_TEXTURE_1D_ARRAY:
+ glTextureStorage2D(handle, num_levels, gl_internal_format, width, num_layers);
+ break;
+ case GL_TEXTURE_2D_ARRAY:
+ glTextureStorage3D(handle, num_levels, gl_internal_format, width, height, num_layers);
+ break;
+ case GL_TEXTURE_2D_MULTISAMPLE_ARRAY: {
+ // TODO: Where should 'fixedsamplelocations' come from?
+ const auto [samples_x, samples_y] = SamplesLog2(info.num_samples);
+ glTextureStorage3DMultisample(handle, num_samples, gl_internal_format, width >> samples_x,
+ height >> samples_y, num_layers, GL_FALSE);
+ break;
+ }
+ case GL_TEXTURE_RECTANGLE:
+ glTextureStorage2D(handle, num_levels, gl_internal_format, width, height);
+ break;
+ case GL_TEXTURE_3D:
+ glTextureStorage3D(handle, num_levels, gl_internal_format, width, height, depth);
+ break;
+ case GL_TEXTURE_BUFFER:
+ UNREACHABLE();
+ break;
+ default:
+ UNREACHABLE_MSG("Invalid target=0x{:x}", target);
+ break;
+ }
+ return texture;
+}
+
[[nodiscard]] bool IsPixelFormatBGR(PixelFormat format) {
switch (format) {
case PixelFormat::B5G6R5_UNORM:
@@ -359,7 +406,8 @@ ImageBufferMap::~ImageBufferMap() {
TextureCacheRuntime::TextureCacheRuntime(const Device& device_, ProgramManager& program_manager,
StateTracker& state_tracker_)
- : device{device_}, state_tracker{state_tracker_}, util_shaders(program_manager) {
+ : device{device_}, state_tracker{state_tracker_},
+ util_shaders(program_manager), resolution{Settings::values.resolution_info} {
static constexpr std::array TARGETS{GL_TEXTURE_1D_ARRAY, GL_TEXTURE_2D_ARRAY, GL_TEXTURE_3D};
for (size_t i = 0; i < TARGETS.size(); ++i) {
const GLenum target = TARGETS[i];
@@ -426,6 +474,13 @@ TextureCacheRuntime::TextureCacheRuntime(const Device& device_, ProgramManager&
set_view(Shader::TextureType::ColorArray1D, null_image_1d_array.handle);
set_view(Shader::TextureType::ColorArray2D, null_image_view_2d_array.handle);
set_view(Shader::TextureType::ColorArrayCube, null_image_cube_array.handle);
+
+ if (resolution.active) {
+ for (size_t i = 0; i < rescale_draw_fbos.size(); ++i) {
+ rescale_draw_fbos[i].Create();
+ rescale_read_fbos[i].Create();
+ }
+ }
}
TextureCacheRuntime::~TextureCacheRuntime() = default;
@@ -442,6 +497,15 @@ ImageBufferMap TextureCacheRuntime::DownloadStagingBuffer(size_t size) {
return download_buffers.RequestMap(size, false);
}
+u64 TextureCacheRuntime::GetDeviceLocalMemory() const {
+ if (GLAD_GL_NVX_gpu_memory_info) {
+ GLint cur_avail_mem_kb = 0;
+ glGetIntegerv(GL_GPU_MEMORY_INFO_CURRENT_AVAILABLE_VIDMEM_NVX, &cur_avail_mem_kb);
+ return static_cast<u64>(cur_avail_mem_kb) * 1_KiB;
+ }
+ return 2_GiB; // Return minimum requirements
+}
+
void TextureCacheRuntime::CopyImage(Image& dst_image, Image& src_image,
std::span<const ImageCopy> copies) {
const GLuint dst_name = dst_image.Handle();
@@ -605,13 +669,13 @@ std::optional<size_t> TextureCacheRuntime::StagingBuffers::FindBuffer(size_t req
return found;
}
-Image::Image(TextureCacheRuntime& runtime, const VideoCommon::ImageInfo& info_, GPUVAddr gpu_addr_,
+Image::Image(TextureCacheRuntime& runtime_, const VideoCommon::ImageInfo& info_, GPUVAddr gpu_addr_,
VAddr cpu_addr_)
- : VideoCommon::ImageBase(info_, gpu_addr_, cpu_addr_) {
- if (CanBeAccelerated(runtime, info)) {
+ : VideoCommon::ImageBase(info_, gpu_addr_, cpu_addr_), runtime{&runtime_} {
+ if (CanBeAccelerated(*runtime, info)) {
flags |= ImageFlagBits::AcceleratedUpload;
}
- if (IsConverted(runtime.device, info.format, info.type)) {
+ if (IsConverted(runtime->device, info.format, info.type)) {
flags |= ImageFlagBits::Converted;
gl_internal_format = IsPixelFormatSRGB(info.format) ? GL_SRGB8_ALPHA8 : GL_RGBA8;
gl_format = GL_RGBA;
@@ -622,58 +686,25 @@ Image::Image(TextureCacheRuntime& runtime, const VideoCommon::ImageInfo& info_,
gl_format = tuple.format;
gl_type = tuple.type;
}
- const GLenum target = ImageTarget(info);
- const GLsizei width = info.size.width;
- const GLsizei height = info.size.height;
- const GLsizei depth = info.size.depth;
- const int max_host_mip_levels = std::bit_width(info.size.width);
- const GLsizei num_levels = std::min(info.resources.levels, max_host_mip_levels);
- const GLsizei num_layers = info.resources.layers;
- const GLsizei num_samples = info.num_samples;
-
- GLuint handle = 0;
- if (target != GL_TEXTURE_BUFFER) {
- texture.Create(target);
- handle = texture.handle;
- }
- switch (target) {
- case GL_TEXTURE_1D_ARRAY:
- glTextureStorage2D(handle, num_levels, gl_internal_format, width, num_layers);
- break;
- case GL_TEXTURE_2D_ARRAY:
- glTextureStorage3D(handle, num_levels, gl_internal_format, width, height, num_layers);
- break;
- case GL_TEXTURE_2D_MULTISAMPLE_ARRAY: {
- // TODO: Where should 'fixedsamplelocations' come from?
- const auto [samples_x, samples_y] = SamplesLog2(info.num_samples);
- glTextureStorage3DMultisample(handle, num_samples, gl_internal_format, width >> samples_x,
- height >> samples_y, num_layers, GL_FALSE);
- break;
- }
- case GL_TEXTURE_RECTANGLE:
- glTextureStorage2D(handle, num_levels, gl_internal_format, width, height);
- break;
- case GL_TEXTURE_3D:
- glTextureStorage3D(handle, num_levels, gl_internal_format, width, height, depth);
- break;
- case GL_TEXTURE_BUFFER:
- UNREACHABLE();
- break;
- default:
- UNREACHABLE_MSG("Invalid target=0x{:x}", target);
- break;
- }
- if (runtime.device.HasDebuggingToolAttached()) {
+ texture = MakeImage(info, gl_internal_format);
+ current_texture = texture.handle;
+ if (runtime->device.HasDebuggingToolAttached()) {
const std::string name = VideoCommon::Name(*this);
- glObjectLabel(target == GL_TEXTURE_BUFFER ? GL_BUFFER : GL_TEXTURE, handle,
- static_cast<GLsizei>(name.size()), name.data());
+ glObjectLabel(ImageTarget(info) == GL_TEXTURE_BUFFER ? GL_BUFFER : GL_TEXTURE,
+ texture.handle, static_cast<GLsizei>(name.size()), name.data());
}
}
+Image::Image(const VideoCommon::NullImageParams& params) : VideoCommon::ImageBase{params} {}
+
Image::~Image() = default;
void Image::UploadMemory(const ImageBufferMap& map,
std::span<const VideoCommon::BufferImageCopy> copies) {
+ const bool is_rescaled = True(flags & ImageFlagBits::Rescaled);
+ if (is_rescaled) {
+ ScaleDown(true);
+ }
glBindBuffer(GL_PIXEL_UNPACK_BUFFER, map.buffer);
glFlushMappedBufferRange(GL_PIXEL_UNPACK_BUFFER, map.offset, unswizzled_size_bytes);
@@ -693,12 +724,18 @@ void Image::UploadMemory(const ImageBufferMap& map,
}
CopyBufferToImage(copy, map.offset);
}
+ if (is_rescaled) {
+ ScaleUp();
+ }
}
void Image::DownloadMemory(ImageBufferMap& map,
std::span<const VideoCommon::BufferImageCopy> copies) {
+ const bool is_rescaled = True(flags & ImageFlagBits::Rescaled);
+ if (is_rescaled) {
+ ScaleDown();
+ }
glMemoryBarrier(GL_PIXEL_BUFFER_BARRIER_BIT); // TODO: Move this to its own API
-
glBindBuffer(GL_PIXEL_PACK_BUFFER, map.buffer);
glPixelStorei(GL_PACK_ALIGNMENT, 1);
@@ -716,6 +753,9 @@ void Image::DownloadMemory(ImageBufferMap& map,
}
CopyImageToBuffer(copy, map.offset);
}
+ if (is_rescaled) {
+ ScaleUp(true);
+ }
}
GLuint Image::StorageHandle() noexcept {
@@ -741,11 +781,11 @@ GLuint Image::StorageHandle() noexcept {
return store_view.handle;
}
store_view.Create();
- glTextureView(store_view.handle, ImageTarget(info), texture.handle, GL_RGBA8, 0,
+ glTextureView(store_view.handle, ImageTarget(info), current_texture, GL_RGBA8, 0,
info.resources.levels, 0, info.resources.layers);
return store_view.handle;
default:
- return texture.handle;
+ return current_texture;
}
}
@@ -849,6 +889,140 @@ void Image::CopyImageToBuffer(const VideoCommon::BufferImageCopy& copy, size_t b
}
}
+void Image::Scale(bool up_scale) {
+ const auto format_type = GetFormatType(info.format);
+ const GLenum attachment = [format_type] {
+ switch (format_type) {
+ case SurfaceType::ColorTexture:
+ return GL_COLOR_ATTACHMENT0;
+ case SurfaceType::Depth:
+ return GL_DEPTH_ATTACHMENT;
+ case SurfaceType::DepthStencil:
+ return GL_DEPTH_STENCIL_ATTACHMENT;
+ default:
+ UNREACHABLE();
+ return GL_COLOR_ATTACHMENT0;
+ }
+ }();
+ const GLenum mask = [format_type] {
+ switch (format_type) {
+ case SurfaceType::ColorTexture:
+ return GL_COLOR_BUFFER_BIT;
+ case SurfaceType::Depth:
+ return GL_DEPTH_BUFFER_BIT;
+ case SurfaceType::DepthStencil:
+ return GL_STENCIL_BUFFER_BIT | GL_DEPTH_BUFFER_BIT;
+ default:
+ UNREACHABLE();
+ return GL_COLOR_BUFFER_BIT;
+ }
+ }();
+ const size_t fbo_index = [format_type] {
+ switch (format_type) {
+ case SurfaceType::ColorTexture:
+ return 0;
+ case SurfaceType::Depth:
+ return 1;
+ case SurfaceType::DepthStencil:
+ return 2;
+ default:
+ UNREACHABLE();
+ return 0;
+ }
+ }();
+ const bool is_2d = info.type == ImageType::e2D;
+ const bool is_color{(mask & GL_COLOR_BUFFER_BIT) != 0};
+ // Integer formats must use NEAREST filter
+ const bool linear_color_format{is_color && !IsPixelFormatInteger(info.format)};
+ const GLenum filter = linear_color_format ? GL_LINEAR : GL_NEAREST;
+
+ const auto& resolution = runtime->resolution;
+ const u32 scaled_width = resolution.ScaleUp(info.size.width);
+ const u32 scaled_height = is_2d ? resolution.ScaleUp(info.size.height) : info.size.height;
+ const u32 original_width = info.size.width;
+ const u32 original_height = info.size.height;
+
+ if (!upscaled_backup.handle) {
+ auto dst_info = info;
+ dst_info.size.width = scaled_width;
+ dst_info.size.height = scaled_height;
+ upscaled_backup = MakeImage(dst_info, gl_internal_format);
+ }
+ const u32 src_width = up_scale ? original_width : scaled_width;
+ const u32 src_height = up_scale ? original_height : scaled_height;
+ const u32 dst_width = up_scale ? scaled_width : original_width;
+ const u32 dst_height = up_scale ? scaled_height : original_height;
+ const auto src_handle = up_scale ? texture.handle : upscaled_backup.handle;
+ const auto dst_handle = up_scale ? upscaled_backup.handle : texture.handle;
+
+ // TODO (ameerj): Investigate other GL states that affect blitting.
+ glDisablei(GL_SCISSOR_TEST, 0);
+ glViewportIndexedf(0, 0.0f, 0.0f, static_cast<GLfloat>(dst_width),
+ static_cast<GLfloat>(dst_height));
+
+ const GLuint read_fbo = runtime->rescale_read_fbos[fbo_index].handle;
+ const GLuint draw_fbo = runtime->rescale_draw_fbos[fbo_index].handle;
+ for (s32 layer = 0; layer < info.resources.layers; ++layer) {
+ for (s32 level = 0; level < info.resources.levels; ++level) {
+ const u32 src_level_width = std::max(1u, src_width >> level);
+ const u32 src_level_height = std::max(1u, src_height >> level);
+ const u32 dst_level_width = std::max(1u, dst_width >> level);
+ const u32 dst_level_height = std::max(1u, dst_height >> level);
+
+ glNamedFramebufferTextureLayer(read_fbo, attachment, src_handle, level, layer);
+ glNamedFramebufferTextureLayer(draw_fbo, attachment, dst_handle, level, layer);
+
+ glBlitNamedFramebuffer(read_fbo, draw_fbo, 0, 0, src_level_width, src_level_height, 0,
+ 0, dst_level_width, dst_level_height, mask, filter);
+ }
+ }
+ current_texture = dst_handle;
+ auto& state_tracker = runtime->GetStateTracker();
+ state_tracker.NotifyViewport0();
+ state_tracker.NotifyScissor0();
+}
+
+bool Image::ScaleUp(bool ignore) {
+ if (True(flags & ImageFlagBits::Rescaled)) {
+ return false;
+ }
+ if (gl_format == 0 && gl_type == 0) {
+ // compressed textures
+ return false;
+ }
+ if (info.type == ImageType::Linear) {
+ UNREACHABLE();
+ return false;
+ }
+ flags |= ImageFlagBits::Rescaled;
+ if (!runtime->resolution.active) {
+ return false;
+ }
+ has_scaled = true;
+ if (ignore) {
+ current_texture = upscaled_backup.handle;
+ return true;
+ }
+ Scale(true);
+ return true;
+}
+
+bool Image::ScaleDown(bool ignore) {
+ if (False(flags & ImageFlagBits::Rescaled)) {
+ return false;
+ }
+ flags &= ~ImageFlagBits::Rescaled;
+ if (!runtime->resolution.active) {
+ return false;
+ }
+ if (ignore) {
+ current_texture = texture.handle;
+ return true;
+ }
+ Scale(false);
+ return true;
+}
+
ImageView::ImageView(TextureCacheRuntime& runtime, const VideoCommon::ImageViewInfo& info,
ImageId image_id_, Image& image)
: VideoCommon::ImageViewBase{info, image.info, image_id_}, views{runtime.null_image_views} {
@@ -862,7 +1036,7 @@ ImageView::ImageView(TextureCacheRuntime& runtime, const VideoCommon::ImageViewI
flat_range = info.range;
set_object_label = device.HasDebuggingToolAttached();
is_render_target = info.IsRenderTarget();
- original_texture = image.texture.handle;
+ original_texture = image.Handle();
num_samples = image.info.num_samples;
if (!is_render_target) {
swizzle[0] = info.x_source;
@@ -950,9 +1124,11 @@ ImageView::ImageView(TextureCacheRuntime&, const VideoCommon::ImageInfo& info,
const VideoCommon::ImageViewInfo& view_info)
: VideoCommon::ImageViewBase{info, view_info} {}
-ImageView::ImageView(TextureCacheRuntime& runtime, const VideoCommon::NullImageParams& params)
+ImageView::ImageView(TextureCacheRuntime& runtime, const VideoCommon::NullImageViewParams& params)
: VideoCommon::ImageViewBase{params}, views{runtime.null_image_views} {}
+ImageView::~ImageView() = default;
+
GLuint ImageView::StorageView(Shader::TextureType texture_type, Shader::ImageFormat image_format) {
if (image_format == Shader::ImageFormat::Typeless) {
return Handle(texture_type);
@@ -1037,7 +1213,8 @@ Sampler::Sampler(TextureCacheRuntime& runtime, const TSCEntry& config) {
glSamplerParameterfv(handle, GL_TEXTURE_BORDER_COLOR, config.BorderColor().data());
if (GLAD_GL_ARB_texture_filter_anisotropic || GLAD_GL_EXT_texture_filter_anisotropic) {
- glSamplerParameterf(handle, GL_TEXTURE_MAX_ANISOTROPY, config.MaxAnisotropy());
+ const f32 max_anisotropy = std::clamp(config.MaxAnisotropy(), 1.0f, 16.0f);
+ glSamplerParameterf(handle, GL_TEXTURE_MAX_ANISOTROPY, max_anisotropy);
} else {
LOG_WARNING(Render_OpenGL, "GL_ARB_texture_filter_anisotropic is required");
}
@@ -1056,13 +1233,8 @@ Sampler::Sampler(TextureCacheRuntime& runtime, const TSCEntry& config) {
Framebuffer::Framebuffer(TextureCacheRuntime& runtime, std::span<ImageView*, NUM_RT> color_buffers,
ImageView* depth_buffer, const VideoCommon::RenderTargets& key) {
- // Bind to READ_FRAMEBUFFER to stop Nvidia's driver from creating an EXT_framebuffer instead of
- // a core framebuffer. EXT framebuffer attachments have to match in size and can be shared
- // across contexts. yuzu doesn't share framebuffers across contexts and we need attachments with
- // mismatching size, this is why core framebuffers are preferred.
- GLuint handle;
- glGenFramebuffers(1, &handle);
- glBindFramebuffer(GL_READ_FRAMEBUFFER, handle);
+ framebuffer.Create();
+ GLuint handle = framebuffer.handle;
GLsizei num_buffers = 0;
std::array<GLenum, NUM_RT> gl_draw_buffers;
@@ -1110,31 +1282,31 @@ Framebuffer::Framebuffer(TextureCacheRuntime& runtime, std::span<ImageView*, NUM
const std::string name = VideoCommon::Name(key);
glObjectLabel(GL_FRAMEBUFFER, handle, static_cast<GLsizei>(name.size()), name.data());
}
- framebuffer.handle = handle;
}
+Framebuffer::~Framebuffer() = default;
+
void BGRCopyPass::CopyBGR(Image& dst_image, Image& src_image,
std::span<const VideoCommon::ImageCopy> copies) {
static constexpr VideoCommon::Offset3D zero_offset{0, 0, 0};
- const u32 requested_pbo_size =
- std::max(src_image.unswizzled_size_bytes, dst_image.unswizzled_size_bytes);
-
- if (bgr_pbo_size < requested_pbo_size) {
- bgr_pbo.Create();
- bgr_pbo_size = requested_pbo_size;
- glNamedBufferData(bgr_pbo.handle, bgr_pbo_size, nullptr, GL_STREAM_COPY);
- }
+ const u32 img_bpp = BytesPerBlock(src_image.info.format);
for (const ImageCopy& copy : copies) {
ASSERT(copy.src_offset == zero_offset);
ASSERT(copy.dst_offset == zero_offset);
-
+ const u32 num_src_layers = static_cast<u32>(copy.src_subresource.num_layers);
+ const u32 copy_size = copy.extent.width * copy.extent.height * num_src_layers * img_bpp;
+ if (bgr_pbo_size < copy_size) {
+ bgr_pbo.Create();
+ bgr_pbo_size = copy_size;
+ glNamedBufferData(bgr_pbo.handle, bgr_pbo_size, nullptr, GL_STREAM_COPY);
+ }
// Copy from source to PBO
glPixelStorei(GL_PACK_ALIGNMENT, 1);
glPixelStorei(GL_PACK_ROW_LENGTH, copy.extent.width);
glBindBuffer(GL_PIXEL_PACK_BUFFER, bgr_pbo.handle);
glGetTextureSubImage(src_image.Handle(), 0, 0, 0, 0, copy.extent.width, copy.extent.height,
- copy.src_subresource.num_layers, src_image.GlFormat(),
- src_image.GlType(), static_cast<GLsizei>(bgr_pbo_size), nullptr);
+ num_src_layers, src_image.GlFormat(), src_image.GlType(),
+ static_cast<GLsizei>(bgr_pbo_size), nullptr);
// Copy from PBO to destination in desired GL format
glPixelStorei(GL_UNPACK_ALIGNMENT, 1);
diff --git a/src/video_core/renderer_opengl/gl_texture_cache.h b/src/video_core/renderer_opengl/gl_texture_cache.h
index 1ca2c90be..1bb762568 100644
--- a/src/video_core/renderer_opengl/gl_texture_cache.h
+++ b/src/video_core/renderer_opengl/gl_texture_cache.h
@@ -15,6 +15,10 @@
#include "video_core/texture_cache/image_view_base.h"
#include "video_core/texture_cache/texture_cache_base.h"
+namespace Settings {
+struct ResolutionScalingInfo;
+}
+
namespace OpenGL {
class Device;
@@ -78,9 +82,11 @@ public:
ImageBufferMap DownloadStagingBuffer(size_t size);
+ u64 GetDeviceLocalMemory() const;
+
void CopyImage(Image& dst, Image& src, std::span<const VideoCommon::ImageCopy> copies);
- void ConvertImage(Framebuffer* dst, ImageView& dst_view, ImageView& src_view) {
+ void ConvertImage(Framebuffer* dst, ImageView& dst_view, ImageView& src_view, bool rescaled) {
UNIMPLEMENTED();
}
@@ -110,6 +116,12 @@ public:
bool HasNativeASTC() const noexcept;
+ void TickFrame() {}
+
+ StateTracker& GetStateTracker() {
+ return state_tracker;
+ }
+
private:
struct StagingBuffers {
explicit StagingBuffers(GLenum storage_flags_, GLenum map_flags_);
@@ -149,6 +161,10 @@ private:
OGLTextureView null_image_view_cube;
std::array<GLuint, Shader::NUM_TEXTURE_TYPES> null_image_views{};
+
+ std::array<OGLFramebuffer, 3> rescale_draw_fbos;
+ std::array<OGLFramebuffer, 3> rescale_read_fbos;
+ const Settings::ResolutionScalingInfo& resolution;
};
class Image : public VideoCommon::ImageBase {
@@ -157,6 +173,7 @@ class Image : public VideoCommon::ImageBase {
public:
explicit Image(TextureCacheRuntime&, const VideoCommon::ImageInfo& info, GPUVAddr gpu_addr,
VAddr cpu_addr);
+ explicit Image(const VideoCommon::NullImageParams&);
~Image();
@@ -174,7 +191,7 @@ public:
GLuint StorageHandle() noexcept;
GLuint Handle() const noexcept {
- return texture.handle;
+ return current_texture;
}
GLuint GlFormat() const noexcept {
@@ -185,16 +202,25 @@ public:
return gl_type;
}
+ bool ScaleUp(bool ignore = false);
+
+ bool ScaleDown(bool ignore = false);
+
private:
void CopyBufferToImage(const VideoCommon::BufferImageCopy& copy, size_t buffer_offset);
void CopyImageToBuffer(const VideoCommon::BufferImageCopy& copy, size_t buffer_offset);
+ void Scale(bool up_scale);
+
OGLTexture texture;
+ OGLTexture upscaled_backup;
OGLTextureView store_view;
GLenum gl_internal_format = GL_NONE;
GLenum gl_format = GL_NONE;
GLenum gl_type = GL_NONE;
+ TextureCacheRuntime* runtime{};
+ GLuint current_texture{};
};
class ImageView : public VideoCommon::ImageViewBase {
@@ -206,7 +232,15 @@ public:
const VideoCommon::ImageViewInfo&, GPUVAddr);
explicit ImageView(TextureCacheRuntime&, const VideoCommon::ImageInfo& info,
const VideoCommon::ImageViewInfo& view_info);
- explicit ImageView(TextureCacheRuntime&, const VideoCommon::NullImageParams&);
+ explicit ImageView(TextureCacheRuntime&, const VideoCommon::NullImageViewParams&);
+
+ ~ImageView();
+
+ ImageView(const ImageView&) = delete;
+ ImageView& operator=(const ImageView&) = delete;
+
+ ImageView(ImageView&&) = default;
+ ImageView& operator=(ImageView&&) = default;
[[nodiscard]] GLuint StorageView(Shader::TextureType texture_type,
Shader::ImageFormat image_format);
@@ -276,6 +310,14 @@ public:
explicit Framebuffer(TextureCacheRuntime&, std::span<ImageView*, NUM_RT> color_buffers,
ImageView* depth_buffer, const VideoCommon::RenderTargets& key);
+ ~Framebuffer();
+
+ Framebuffer(const Framebuffer&) = delete;
+ Framebuffer& operator=(const Framebuffer&) = delete;
+
+ Framebuffer(Framebuffer&&) = default;
+ Framebuffer& operator=(Framebuffer&&) = default;
+
[[nodiscard]] GLuint Handle() const noexcept {
return framebuffer.handle;
}
@@ -293,7 +335,7 @@ struct TextureCacheParams {
static constexpr bool ENABLE_VALIDATION = true;
static constexpr bool FRAMEBUFFER_BLITS = true;
static constexpr bool HAS_EMULATED_COPIES = true;
- static constexpr bool HAS_DEVICE_MEMORY_INFO = false;
+ static constexpr bool HAS_DEVICE_MEMORY_INFO = true;
using Runtime = OpenGL::TextureCacheRuntime;
using Image = OpenGL::Image;
diff --git a/src/video_core/renderer_opengl/renderer_opengl.cpp b/src/video_core/renderer_opengl/renderer_opengl.cpp
index 7d7cba69c..28daacd82 100644
--- a/src/video_core/renderer_opengl/renderer_opengl.cpp
+++ b/src/video_core/renderer_opengl/renderer_opengl.cpp
@@ -21,8 +21,13 @@
#include "core/memory.h"
#include "core/perf_stats.h"
#include "core/telemetry_session.h"
+#include "video_core/host_shaders/fxaa_frag.h"
+#include "video_core/host_shaders/fxaa_vert.h"
#include "video_core/host_shaders/opengl_present_frag.h"
+#include "video_core/host_shaders/opengl_present_scaleforce_frag.h"
#include "video_core/host_shaders/opengl_present_vert.h"
+#include "video_core/host_shaders/present_bicubic_frag.h"
+#include "video_core/host_shaders/present_gaussian_frag.h"
#include "video_core/renderer_opengl/gl_rasterizer.h"
#include "video_core/renderer_opengl/gl_shader_manager.h"
#include "video_core/renderer_opengl/gl_shader_util.h"
@@ -208,7 +213,9 @@ void RendererOpenGL::LoadFBToScreenInfo(const Tegra::FramebufferConfig& framebuf
framebuffer_crop_rect = framebuffer.crop_rect;
const VAddr framebuffer_addr{framebuffer.address + framebuffer.offset};
- if (rasterizer.AccelerateDisplay(framebuffer, framebuffer_addr, framebuffer.stride)) {
+ screen_info.was_accelerated =
+ rasterizer.AccelerateDisplay(framebuffer, framebuffer_addr, framebuffer.stride);
+ if (screen_info.was_accelerated) {
return;
}
@@ -251,12 +258,25 @@ void RendererOpenGL::LoadColorToActiveGLTexture(u8 color_r, u8 color_g, u8 color
void RendererOpenGL::InitOpenGLObjects() {
// Create shader programs
+ fxaa_vertex = CreateProgram(HostShaders::FXAA_VERT, GL_VERTEX_SHADER);
+ fxaa_fragment = CreateProgram(HostShaders::FXAA_FRAG, GL_FRAGMENT_SHADER);
present_vertex = CreateProgram(HostShaders::OPENGL_PRESENT_VERT, GL_VERTEX_SHADER);
- present_fragment = CreateProgram(HostShaders::OPENGL_PRESENT_FRAG, GL_FRAGMENT_SHADER);
+ present_bilinear_fragment = CreateProgram(HostShaders::OPENGL_PRESENT_FRAG, GL_FRAGMENT_SHADER);
+ present_bicubic_fragment = CreateProgram(HostShaders::PRESENT_BICUBIC_FRAG, GL_FRAGMENT_SHADER);
+ present_gaussian_fragment =
+ CreateProgram(HostShaders::PRESENT_GAUSSIAN_FRAG, GL_FRAGMENT_SHADER);
+ present_scaleforce_fragment =
+ CreateProgram(fmt::format("#version 460\n{}", HostShaders::OPENGL_PRESENT_SCALEFORCE_FRAG),
+ GL_FRAGMENT_SHADER);
// Generate presentation sampler
present_sampler.Create();
glSamplerParameteri(present_sampler.handle, GL_TEXTURE_MIN_FILTER, GL_LINEAR);
+ glSamplerParameteri(present_sampler.handle, GL_TEXTURE_MAG_FILTER, GL_LINEAR);
+
+ present_sampler_nn.Create();
+ glSamplerParameteri(present_sampler_nn.handle, GL_TEXTURE_MIN_FILTER, GL_NEAREST);
+ glSamplerParameteri(present_sampler_nn.handle, GL_TEXTURE_MAG_FILTER, GL_NEAREST);
// Generate VBO handle for drawing
vertex_buffer.Create();
@@ -274,6 +294,8 @@ void RendererOpenGL::InitOpenGLObjects() {
// Clear screen to black
LoadColorToActiveGLTexture(0, 0, 0, 0, screen_info.texture);
+
+ fxaa_framebuffer.Create();
}
void RendererOpenGL::AddTelemetryFields() {
@@ -325,18 +347,130 @@ void RendererOpenGL::ConfigureFramebufferTexture(TextureInfo& texture,
texture.resource.Release();
texture.resource.Create(GL_TEXTURE_2D);
glTextureStorage2D(texture.resource.handle, 1, internal_format, texture.width, texture.height);
+ fxaa_texture.Release();
+ fxaa_texture.Create(GL_TEXTURE_2D);
+ glTextureStorage2D(fxaa_texture.handle, 1, GL_RGBA16F,
+ Settings::values.resolution_info.ScaleUp(screen_info.texture.width),
+ Settings::values.resolution_info.ScaleUp(screen_info.texture.height));
+ glNamedFramebufferTexture(fxaa_framebuffer.handle, GL_COLOR_ATTACHMENT0, fxaa_texture.handle,
+ 0);
}
void RendererOpenGL::DrawScreen(const Layout::FramebufferLayout& layout) {
+ // TODO: Signal state tracker about these changes
+ state_tracker.NotifyScreenDrawVertexArray();
+ state_tracker.NotifyPolygonModes();
+ state_tracker.NotifyViewport0();
+ state_tracker.NotifyScissor0();
+ state_tracker.NotifyColorMask(0);
+ state_tracker.NotifyBlend0();
+ state_tracker.NotifyFramebuffer();
+ state_tracker.NotifyFrontFace();
+ state_tracker.NotifyCullTest();
+ state_tracker.NotifyDepthTest();
+ state_tracker.NotifyStencilTest();
+ state_tracker.NotifyPolygonOffset();
+ state_tracker.NotifyRasterizeEnable();
+ state_tracker.NotifyFramebufferSRGB();
+ state_tracker.NotifyLogicOp();
+ state_tracker.NotifyClipControl();
+ state_tracker.NotifyAlphaTest();
+
+ state_tracker.ClipControl(GL_LOWER_LEFT, GL_ZERO_TO_ONE);
+
// Update background color before drawing
glClearColor(Settings::values.bg_red.GetValue() / 255.0f,
Settings::values.bg_green.GetValue() / 255.0f,
Settings::values.bg_blue.GetValue() / 255.0f, 1.0f);
+ glEnable(GL_CULL_FACE);
+ glDisable(GL_COLOR_LOGIC_OP);
+ glDisable(GL_DEPTH_TEST);
+ glDisable(GL_STENCIL_TEST);
+ glDisable(GL_POLYGON_OFFSET_FILL);
+ glDisable(GL_RASTERIZER_DISCARD);
+ glDisable(GL_ALPHA_TEST);
+ glDisablei(GL_BLEND, 0);
+ glPolygonMode(GL_FRONT_AND_BACK, GL_FILL);
+ glCullFace(GL_BACK);
+ glFrontFace(GL_CW);
+ glColorMaski(0, GL_TRUE, GL_TRUE, GL_TRUE, GL_TRUE);
+
+ glBindTextureUnit(0, screen_info.display_texture);
+
+ if (Settings::values.anti_aliasing.GetValue() == Settings::AntiAliasing::Fxaa) {
+ program_manager.BindPresentPrograms(fxaa_vertex.handle, fxaa_fragment.handle);
+
+ glEnablei(GL_SCISSOR_TEST, 0);
+ auto viewport_width = screen_info.texture.width;
+ auto scissor_width = framebuffer_crop_rect.GetWidth();
+ if (scissor_width <= 0) {
+ scissor_width = viewport_width;
+ }
+ auto viewport_height = screen_info.texture.height;
+ auto scissor_height = framebuffer_crop_rect.GetHeight();
+ if (scissor_height <= 0) {
+ scissor_height = viewport_height;
+ }
+ if (screen_info.was_accelerated) {
+ viewport_width = Settings::values.resolution_info.ScaleUp(viewport_width);
+ scissor_width = Settings::values.resolution_info.ScaleUp(scissor_width);
+ viewport_height = Settings::values.resolution_info.ScaleUp(viewport_height);
+ scissor_height = Settings::values.resolution_info.ScaleUp(scissor_height);
+ }
+ glScissorIndexed(0, 0, 0, scissor_width, scissor_height);
+ glViewportIndexedf(0, 0.0f, 0.0f, static_cast<GLfloat>(viewport_width),
+ static_cast<GLfloat>(viewport_height));
+ glDepthRangeIndexed(0, 0.0, 0.0);
+
+ glBindSampler(0, present_sampler.handle);
+ GLint old_read_fb;
+ GLint old_draw_fb;
+ glGetIntegerv(GL_READ_FRAMEBUFFER_BINDING, &old_read_fb);
+ glGetIntegerv(GL_DRAW_FRAMEBUFFER_BINDING, &old_draw_fb);
+ glBindFramebuffer(GL_DRAW_FRAMEBUFFER, fxaa_framebuffer.handle);
+
+ glDrawArrays(GL_TRIANGLE_STRIP, 0, 4);
+
+ glBindFramebuffer(GL_READ_FRAMEBUFFER, old_read_fb);
+ glBindFramebuffer(GL_DRAW_FRAMEBUFFER, old_draw_fb);
+
+ glBindTextureUnit(0, fxaa_texture.handle);
+ }
+
// Set projection matrix
const std::array ortho_matrix =
MakeOrthographicMatrix(static_cast<float>(layout.width), static_cast<float>(layout.height));
- program_manager.BindPresentPrograms(present_vertex.handle, present_fragment.handle);
+
+ GLuint fragment_handle;
+ const auto filter = Settings::values.scaling_filter.GetValue();
+ switch (filter) {
+ case Settings::ScalingFilter::NearestNeighbor:
+ fragment_handle = present_bilinear_fragment.handle;
+ break;
+ case Settings::ScalingFilter::Bilinear:
+ fragment_handle = present_bilinear_fragment.handle;
+ break;
+ case Settings::ScalingFilter::Bicubic:
+ fragment_handle = present_bicubic_fragment.handle;
+ break;
+ case Settings::ScalingFilter::Gaussian:
+ fragment_handle = present_gaussian_fragment.handle;
+ break;
+ case Settings::ScalingFilter::ScaleForce:
+ fragment_handle = present_scaleforce_fragment.handle;
+ break;
+ case Settings::ScalingFilter::Fsr:
+ LOG_WARNING(
+ Render_OpenGL,
+ "FidelityFX FSR Super Sampling is not supported in OpenGL, changing to ScaleForce");
+ fragment_handle = present_scaleforce_fragment.handle;
+ break;
+ default:
+ fragment_handle = present_bilinear_fragment.handle;
+ break;
+ }
+ program_manager.BindPresentPrograms(present_vertex.handle, fragment_handle);
glProgramUniformMatrix3x2fv(present_vertex.handle, ModelViewMatrixLocation, 1, GL_FALSE,
ortho_matrix.data());
@@ -370,6 +504,11 @@ void RendererOpenGL::DrawScreen(const Layout::FramebufferLayout& layout) {
scale_v = static_cast<f32>(framebuffer_crop_rect.GetHeight()) /
static_cast<f32>(screen_info.texture.height);
}
+ if (Settings::values.anti_aliasing.GetValue() == Settings::AntiAliasing::Fxaa &&
+ !screen_info.was_accelerated) {
+ scale_u /= Settings::values.resolution_info.up_factor;
+ scale_v /= Settings::values.resolution_info.up_factor;
+ }
const auto& screen = layout.screen;
const std::array vertices = {
@@ -380,47 +519,14 @@ void RendererOpenGL::DrawScreen(const Layout::FramebufferLayout& layout) {
};
glNamedBufferSubData(vertex_buffer.handle, 0, sizeof(vertices), std::data(vertices));
- // TODO: Signal state tracker about these changes
- state_tracker.NotifyScreenDrawVertexArray();
- state_tracker.NotifyPolygonModes();
- state_tracker.NotifyViewport0();
- state_tracker.NotifyScissor0();
- state_tracker.NotifyColorMask(0);
- state_tracker.NotifyBlend0();
- state_tracker.NotifyFramebuffer();
- state_tracker.NotifyFrontFace();
- state_tracker.NotifyCullTest();
- state_tracker.NotifyDepthTest();
- state_tracker.NotifyStencilTest();
- state_tracker.NotifyPolygonOffset();
- state_tracker.NotifyRasterizeEnable();
- state_tracker.NotifyFramebufferSRGB();
- state_tracker.NotifyLogicOp();
- state_tracker.NotifyClipControl();
- state_tracker.NotifyAlphaTest();
-
- state_tracker.ClipControl(GL_LOWER_LEFT, GL_ZERO_TO_ONE);
- glEnable(GL_CULL_FACE);
if (screen_info.display_srgb) {
glEnable(GL_FRAMEBUFFER_SRGB);
} else {
glDisable(GL_FRAMEBUFFER_SRGB);
}
- glDisable(GL_COLOR_LOGIC_OP);
- glDisable(GL_DEPTH_TEST);
- glDisable(GL_STENCIL_TEST);
- glDisable(GL_POLYGON_OFFSET_FILL);
- glDisable(GL_RASTERIZER_DISCARD);
- glDisable(GL_ALPHA_TEST);
- glDisablei(GL_BLEND, 0);
glDisablei(GL_SCISSOR_TEST, 0);
- glPolygonMode(GL_FRONT_AND_BACK, GL_FILL);
- glCullFace(GL_BACK);
- glFrontFace(GL_CW);
- glColorMaski(0, GL_TRUE, GL_TRUE, GL_TRUE, GL_TRUE);
glViewportIndexedf(0, 0.0f, 0.0f, static_cast<GLfloat>(layout.width),
static_cast<GLfloat>(layout.height));
- glDepthRangeIndexed(0, 0.0, 0.0);
glEnableVertexAttribArray(PositionLocation);
glEnableVertexAttribArray(TexCoordLocation);
@@ -440,8 +546,11 @@ void RendererOpenGL::DrawScreen(const Layout::FramebufferLayout& layout) {
glBindVertexBuffer(0, vertex_buffer.handle, 0, sizeof(ScreenRectVertex));
}
- glBindTextureUnit(0, screen_info.display_texture);
- glBindSampler(0, present_sampler.handle);
+ if (Settings::values.scaling_filter.GetValue() != Settings::ScalingFilter::NearestNeighbor) {
+ glBindSampler(0, present_sampler.handle);
+ } else {
+ glBindSampler(0, present_sampler_nn.handle);
+ }
glClear(GL_COLOR_BUFFER_BIT);
glDrawArrays(GL_TRIANGLE_STRIP, 0, 4);
diff --git a/src/video_core/renderer_opengl/renderer_opengl.h b/src/video_core/renderer_opengl/renderer_opengl.h
index d455f572f..cda333cad 100644
--- a/src/video_core/renderer_opengl/renderer_opengl.h
+++ b/src/video_core/renderer_opengl/renderer_opengl.h
@@ -50,6 +50,7 @@ struct TextureInfo {
/// Structure used for storing information about the display target for the Switch screen
struct ScreenInfo {
GLuint display_texture{};
+ bool was_accelerated = false;
bool display_srgb{};
const Common::Rectangle<float> display_texcoords{0.0f, 0.0f, 1.0f, 1.0f};
TextureInfo texture;
@@ -109,9 +110,15 @@ private:
// OpenGL object IDs
OGLSampler present_sampler;
+ OGLSampler present_sampler_nn;
OGLBuffer vertex_buffer;
+ OGLProgram fxaa_vertex;
+ OGLProgram fxaa_fragment;
OGLProgram present_vertex;
- OGLProgram present_fragment;
+ OGLProgram present_bilinear_fragment;
+ OGLProgram present_bicubic_fragment;
+ OGLProgram present_gaussian_fragment;
+ OGLProgram present_scaleforce_fragment;
OGLFramebuffer screenshot_framebuffer;
// GPU address of the vertex buffer
@@ -119,6 +126,8 @@ private:
/// Display information for Switch screen
ScreenInfo screen_info;
+ OGLTexture fxaa_texture;
+ OGLFramebuffer fxaa_framebuffer;
/// OpenGL framebuffer data
std::vector<u8> gl_framebuffer_data;
diff --git a/src/video_core/renderer_vulkan/blit_image.cpp b/src/video_core/renderer_vulkan/blit_image.cpp
index 6c1b2f063..b3884a4f5 100644
--- a/src/video_core/renderer_vulkan/blit_image.cpp
+++ b/src/video_core/renderer_vulkan/blit_image.cpp
@@ -363,7 +363,7 @@ BlitImageHelper::BlitImageHelper(const Device& device_, VKScheduler& scheduler_,
BlitImageHelper::~BlitImageHelper() = default;
-void BlitImageHelper::BlitColor(const Framebuffer* dst_framebuffer, const ImageView& src_image_view,
+void BlitImageHelper::BlitColor(const Framebuffer* dst_framebuffer, VkImageView src_view,
const Region2D& dst_region, const Region2D& src_region,
Tegra::Engines::Fermi2D::Filter filter,
Tegra::Engines::Fermi2D::Operation operation) {
@@ -373,9 +373,8 @@ void BlitImageHelper::BlitColor(const Framebuffer* dst_framebuffer, const ImageV
.operation = operation,
};
const VkPipelineLayout layout = *one_texture_pipeline_layout;
- const VkImageView src_view = src_image_view.Handle(Shader::TextureType::Color2D);
const VkSampler sampler = is_linear ? *linear_sampler : *nearest_sampler;
- const VkPipeline pipeline = FindOrEmplacePipeline(key);
+ const VkPipeline pipeline = FindOrEmplaceColorPipeline(key);
scheduler.RequestRenderpass(dst_framebuffer);
scheduler.Record([this, dst_region, src_region, pipeline, layout, sampler,
src_view](vk::CommandBuffer cmdbuf) {
@@ -398,10 +397,13 @@ void BlitImageHelper::BlitDepthStencil(const Framebuffer* dst_framebuffer,
Tegra::Engines::Fermi2D::Operation operation) {
ASSERT(filter == Tegra::Engines::Fermi2D::Filter::Point);
ASSERT(operation == Tegra::Engines::Fermi2D::Operation::SrcCopy);
-
+ const BlitImagePipelineKey key{
+ .renderpass = dst_framebuffer->RenderPass(),
+ .operation = operation,
+ };
const VkPipelineLayout layout = *two_textures_pipeline_layout;
const VkSampler sampler = *nearest_sampler;
- const VkPipeline pipeline = BlitDepthStencilPipeline(dst_framebuffer->RenderPass());
+ const VkPipeline pipeline = FindOrEmplaceDepthStencilPipeline(key);
scheduler.RequestRenderpass(dst_framebuffer);
scheduler.Record([dst_region, src_region, pipeline, layout, sampler, src_depth_view,
src_stencil_view, this](vk::CommandBuffer cmdbuf) {
@@ -419,40 +421,45 @@ void BlitImageHelper::BlitDepthStencil(const Framebuffer* dst_framebuffer,
}
void BlitImageHelper::ConvertD32ToR32(const Framebuffer* dst_framebuffer,
- const ImageView& src_image_view) {
+ const ImageView& src_image_view, u32 up_scale,
+ u32 down_shift) {
ConvertDepthToColorPipeline(convert_d32_to_r32_pipeline, dst_framebuffer->RenderPass());
- Convert(*convert_d32_to_r32_pipeline, dst_framebuffer, src_image_view);
+ Convert(*convert_d32_to_r32_pipeline, dst_framebuffer, src_image_view, up_scale, down_shift);
}
void BlitImageHelper::ConvertR32ToD32(const Framebuffer* dst_framebuffer,
- const ImageView& src_image_view) {
+ const ImageView& src_image_view, u32 up_scale,
+ u32 down_shift) {
ConvertColorToDepthPipeline(convert_r32_to_d32_pipeline, dst_framebuffer->RenderPass());
- Convert(*convert_r32_to_d32_pipeline, dst_framebuffer, src_image_view);
+ Convert(*convert_r32_to_d32_pipeline, dst_framebuffer, src_image_view, up_scale, down_shift);
}
void BlitImageHelper::ConvertD16ToR16(const Framebuffer* dst_framebuffer,
- const ImageView& src_image_view) {
+ const ImageView& src_image_view, u32 up_scale,
+ u32 down_shift) {
ConvertDepthToColorPipeline(convert_d16_to_r16_pipeline, dst_framebuffer->RenderPass());
- Convert(*convert_d16_to_r16_pipeline, dst_framebuffer, src_image_view);
+ Convert(*convert_d16_to_r16_pipeline, dst_framebuffer, src_image_view, up_scale, down_shift);
}
void BlitImageHelper::ConvertR16ToD16(const Framebuffer* dst_framebuffer,
- const ImageView& src_image_view) {
+ const ImageView& src_image_view, u32 up_scale,
+ u32 down_shift) {
ConvertColorToDepthPipeline(convert_r16_to_d16_pipeline, dst_framebuffer->RenderPass());
- Convert(*convert_r16_to_d16_pipeline, dst_framebuffer, src_image_view);
+ Convert(*convert_r16_to_d16_pipeline, dst_framebuffer, src_image_view, up_scale, down_shift);
}
void BlitImageHelper::Convert(VkPipeline pipeline, const Framebuffer* dst_framebuffer,
- const ImageView& src_image_view) {
+ const ImageView& src_image_view, u32 up_scale, u32 down_shift) {
const VkPipelineLayout layout = *one_texture_pipeline_layout;
const VkImageView src_view = src_image_view.Handle(Shader::TextureType::Color2D);
const VkSampler sampler = *nearest_sampler;
const VkExtent2D extent{
- .width = src_image_view.size.width,
- .height = src_image_view.size.height,
+ .width = std::max((src_image_view.size.width * up_scale) >> down_shift, 1U),
+ .height = std::max((src_image_view.size.height * up_scale) >> down_shift, 1U),
};
scheduler.RequestRenderpass(dst_framebuffer);
- scheduler.Record([pipeline, layout, sampler, src_view, extent, this](vk::CommandBuffer cmdbuf) {
+ scheduler.Record([pipeline, layout, sampler, src_view, extent, up_scale, down_shift,
+ this](vk::CommandBuffer cmdbuf) {
const VkOffset2D offset{
.x = 0,
.y = 0,
@@ -488,7 +495,7 @@ void BlitImageHelper::Convert(VkPipeline pipeline, const Framebuffer* dst_frameb
scheduler.InvalidateState();
}
-VkPipeline BlitImageHelper::FindOrEmplacePipeline(const BlitImagePipelineKey& key) {
+VkPipeline BlitImageHelper::FindOrEmplaceColorPipeline(const BlitImagePipelineKey& key) {
const auto it = std::ranges::find(blit_color_keys, key);
if (it != blit_color_keys.end()) {
return *blit_color_pipelines[std::distance(blit_color_keys.begin(), it)];
@@ -542,12 +549,14 @@ VkPipeline BlitImageHelper::FindOrEmplacePipeline(const BlitImagePipelineKey& ke
return *blit_color_pipelines.back();
}
-VkPipeline BlitImageHelper::BlitDepthStencilPipeline(VkRenderPass renderpass) {
- if (blit_depth_stencil_pipeline) {
- return *blit_depth_stencil_pipeline;
+VkPipeline BlitImageHelper::FindOrEmplaceDepthStencilPipeline(const BlitImagePipelineKey& key) {
+ const auto it = std::ranges::find(blit_depth_stencil_keys, key);
+ if (it != blit_depth_stencil_keys.end()) {
+ return *blit_depth_stencil_pipelines[std::distance(blit_depth_stencil_keys.begin(), it)];
}
+ blit_depth_stencil_keys.push_back(key);
const std::array stages = MakeStages(*full_screen_vert, *blit_depth_stencil_frag);
- blit_depth_stencil_pipeline = device.GetLogical().CreateGraphicsPipeline({
+ blit_depth_stencil_pipelines.push_back(device.GetLogical().CreateGraphicsPipeline({
.sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO,
.pNext = nullptr,
.flags = 0,
@@ -560,15 +569,15 @@ VkPipeline BlitImageHelper::BlitDepthStencilPipeline(VkRenderPass renderpass) {
.pRasterizationState = &PIPELINE_RASTERIZATION_STATE_CREATE_INFO,
.pMultisampleState = &PIPELINE_MULTISAMPLE_STATE_CREATE_INFO,
.pDepthStencilState = &PIPELINE_DEPTH_STENCIL_STATE_CREATE_INFO,
- .pColorBlendState = &PIPELINE_COLOR_BLEND_STATE_EMPTY_CREATE_INFO,
+ .pColorBlendState = &PIPELINE_COLOR_BLEND_STATE_GENERIC_CREATE_INFO,
.pDynamicState = &PIPELINE_DYNAMIC_STATE_CREATE_INFO,
.layout = *two_textures_pipeline_layout,
- .renderPass = renderpass,
+ .renderPass = key.renderpass,
.subpass = 0,
.basePipelineHandle = VK_NULL_HANDLE,
.basePipelineIndex = 0,
- });
- return *blit_depth_stencil_pipeline;
+ }));
+ return *blit_depth_stencil_pipelines.back();
}
void BlitImageHelper::ConvertDepthToColorPipeline(vk::Pipeline& pipeline, VkRenderPass renderpass) {
diff --git a/src/video_core/renderer_vulkan/blit_image.h b/src/video_core/renderer_vulkan/blit_image.h
index 33ee095c1..d77f76678 100644
--- a/src/video_core/renderer_vulkan/blit_image.h
+++ b/src/video_core/renderer_vulkan/blit_image.h
@@ -34,7 +34,7 @@ public:
StateTracker& state_tracker, DescriptorPool& descriptor_pool);
~BlitImageHelper();
- void BlitColor(const Framebuffer* dst_framebuffer, const ImageView& src_image_view,
+ void BlitColor(const Framebuffer* dst_framebuffer, VkImageView src_image_view,
const Region2D& dst_region, const Region2D& src_region,
Tegra::Engines::Fermi2D::Filter filter,
Tegra::Engines::Fermi2D::Operation operation);
@@ -44,21 +44,25 @@ public:
const Region2D& src_region, Tegra::Engines::Fermi2D::Filter filter,
Tegra::Engines::Fermi2D::Operation operation);
- void ConvertD32ToR32(const Framebuffer* dst_framebuffer, const ImageView& src_image_view);
+ void ConvertD32ToR32(const Framebuffer* dst_framebuffer, const ImageView& src_image_view,
+ u32 up_scale, u32 down_shift);
- void ConvertR32ToD32(const Framebuffer* dst_framebuffer, const ImageView& src_image_view);
+ void ConvertR32ToD32(const Framebuffer* dst_framebuffer, const ImageView& src_image_view,
+ u32 up_scale, u32 down_shift);
- void ConvertD16ToR16(const Framebuffer* dst_framebuffer, const ImageView& src_image_view);
+ void ConvertD16ToR16(const Framebuffer* dst_framebuffer, const ImageView& src_image_view,
+ u32 up_scale, u32 down_shift);
- void ConvertR16ToD16(const Framebuffer* dst_framebuffer, const ImageView& src_image_view);
+ void ConvertR16ToD16(const Framebuffer* dst_framebuffer, const ImageView& src_image_view,
+ u32 up_scale, u32 down_shift);
private:
void Convert(VkPipeline pipeline, const Framebuffer* dst_framebuffer,
- const ImageView& src_image_view);
+ const ImageView& src_image_view, u32 up_scale, u32 down_shift);
- [[nodiscard]] VkPipeline FindOrEmplacePipeline(const BlitImagePipelineKey& key);
+ [[nodiscard]] VkPipeline FindOrEmplaceColorPipeline(const BlitImagePipelineKey& key);
- [[nodiscard]] VkPipeline BlitDepthStencilPipeline(VkRenderPass renderpass);
+ [[nodiscard]] VkPipeline FindOrEmplaceDepthStencilPipeline(const BlitImagePipelineKey& key);
void ConvertDepthToColorPipeline(vk::Pipeline& pipeline, VkRenderPass renderpass);
@@ -84,7 +88,8 @@ private:
std::vector<BlitImagePipelineKey> blit_color_keys;
std::vector<vk::Pipeline> blit_color_pipelines;
- vk::Pipeline blit_depth_stencil_pipeline;
+ std::vector<BlitImagePipelineKey> blit_depth_stencil_keys;
+ std::vector<vk::Pipeline> blit_depth_stencil_pipelines;
vk::Pipeline convert_d32_to_r32_pipeline;
vk::Pipeline convert_r32_to_d32_pipeline;
vk::Pipeline convert_d16_to_r16_pipeline;
diff --git a/src/video_core/renderer_vulkan/pipeline_helper.h b/src/video_core/renderer_vulkan/pipeline_helper.h
index 4847db6b6..11c160570 100644
--- a/src/video_core/renderer_vulkan/pipeline_helper.h
+++ b/src/video_core/renderer_vulkan/pipeline_helper.h
@@ -10,6 +10,7 @@
#include "common/assert.h"
#include "common/common_types.h"
+#include "shader_recompiler/backend/spirv/emit_spirv.h"
#include "shader_recompiler/shader_info.h"
#include "video_core/renderer_vulkan/vk_texture_cache.h"
#include "video_core/renderer_vulkan/vk_update_descriptor.h"
@@ -20,6 +21,8 @@
namespace Vulkan {
+using Shader::Backend::SPIRV::NUM_TEXTURE_AND_IMAGE_SCALING_WORDS;
+
class DescriptorLayoutBuilder {
public:
DescriptorLayoutBuilder(const Device& device_) : device{&device_} {}
@@ -68,18 +71,28 @@ public:
}
vk::PipelineLayout CreatePipelineLayout(VkDescriptorSetLayout descriptor_set_layout) const {
+ using Shader::Backend::SPIRV::RescalingLayout;
+ const u32 size_offset = is_compute ? sizeof(RescalingLayout::down_factor) : 0u;
+ const VkPushConstantRange range{
+ .stageFlags = static_cast<VkShaderStageFlags>(
+ is_compute ? VK_SHADER_STAGE_COMPUTE_BIT : VK_SHADER_STAGE_ALL_GRAPHICS),
+ .offset = 0,
+ .size = static_cast<u32>(sizeof(RescalingLayout)) - size_offset,
+ };
return device->GetLogical().CreatePipelineLayout({
.sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,
.pNext = nullptr,
.flags = 0,
.setLayoutCount = descriptor_set_layout ? 1U : 0U,
.pSetLayouts = bindings.empty() ? nullptr : &descriptor_set_layout,
- .pushConstantRangeCount = 0,
- .pPushConstantRanges = nullptr,
+ .pushConstantRangeCount = 1,
+ .pPushConstantRanges = &range,
});
}
void Add(const Shader::Info& info, VkShaderStageFlags stage) {
+ is_compute |= (stage & VK_SHADER_STAGE_COMPUTE_BIT) != 0;
+
Add(VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, stage, info.constant_buffer_descriptors);
Add(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, stage, info.storage_buffers_descriptors);
Add(VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER, stage, info.texture_buffer_descriptors);
@@ -115,6 +128,7 @@ private:
}
const Device* device{};
+ bool is_compute{};
boost::container::small_vector<VkDescriptorSetLayoutBinding, 32> bindings;
boost::container::small_vector<VkDescriptorUpdateTemplateEntryKHR, 32> entries;
u32 binding{};
@@ -122,31 +136,68 @@ private:
size_t offset{};
};
-inline void PushImageDescriptors(const Shader::Info& info, const VkSampler*& samplers,
- const ImageId*& image_view_ids, TextureCache& texture_cache,
- VKUpdateDescriptorQueue& update_descriptor_queue) {
- for (const auto& desc : info.texture_buffer_descriptors) {
- image_view_ids += desc.count;
+class RescalingPushConstant {
+public:
+ explicit RescalingPushConstant() noexcept {}
+
+ void PushTexture(bool is_rescaled) noexcept {
+ *texture_ptr |= is_rescaled ? texture_bit : 0u;
+ texture_bit <<= 1u;
+ if (texture_bit == 0u) {
+ texture_bit = 1u;
+ ++texture_ptr;
+ }
}
- for (const auto& desc : info.image_buffer_descriptors) {
- image_view_ids += desc.count;
+
+ void PushImage(bool is_rescaled) noexcept {
+ *image_ptr |= is_rescaled ? image_bit : 0u;
+ image_bit <<= 1u;
+ if (image_bit == 0u) {
+ image_bit = 1u;
+ ++image_ptr;
+ }
}
+
+ const std::array<u32, NUM_TEXTURE_AND_IMAGE_SCALING_WORDS>& Data() const noexcept {
+ return words;
+ }
+
+private:
+ std::array<u32, NUM_TEXTURE_AND_IMAGE_SCALING_WORDS> words{};
+ u32* texture_ptr{words.data()};
+ u32* image_ptr{words.data() + Shader::Backend::SPIRV::NUM_TEXTURE_SCALING_WORDS};
+ u32 texture_bit{1u};
+ u32 image_bit{1u};
+};
+
+inline void PushImageDescriptors(TextureCache& texture_cache,
+ VKUpdateDescriptorQueue& update_descriptor_queue,
+ const Shader::Info& info, RescalingPushConstant& rescaling,
+ const VkSampler*& samplers,
+ const VideoCommon::ImageViewInOut*& views) {
+ const u32 num_texture_buffers = Shader::NumDescriptors(info.texture_buffer_descriptors);
+ const u32 num_image_buffers = Shader::NumDescriptors(info.image_buffer_descriptors);
+ views += num_texture_buffers;
+ views += num_image_buffers;
for (const auto& desc : info.texture_descriptors) {
for (u32 index = 0; index < desc.count; ++index) {
+ const VideoCommon::ImageViewId image_view_id{(views++)->id};
const VkSampler sampler{*(samplers++)};
- ImageView& image_view{texture_cache.GetImageView(*(image_view_ids++))};
+ ImageView& image_view{texture_cache.GetImageView(image_view_id)};
const VkImageView vk_image_view{image_view.Handle(desc.type)};
update_descriptor_queue.AddSampledImage(vk_image_view, sampler);
+ rescaling.PushTexture(texture_cache.IsRescaling(image_view));
}
}
for (const auto& desc : info.image_descriptors) {
for (u32 index = 0; index < desc.count; ++index) {
- ImageView& image_view{texture_cache.GetImageView(*(image_view_ids++))};
+ ImageView& image_view{texture_cache.GetImageView((views++)->id)};
if (desc.is_written) {
texture_cache.MarkModification(image_view.image_id);
}
const VkImageView vk_image_view{image_view.StorageView(desc.type, desc.format)};
update_descriptor_queue.AddImage(vk_image_view);
+ rescaling.PushImage(texture_cache.IsRescaling(image_view));
}
}
}
diff --git a/src/video_core/renderer_vulkan/vk_blit_screen.cpp b/src/video_core/renderer_vulkan/vk_blit_screen.cpp
index 888bc7392..1e447e621 100644
--- a/src/video_core/renderer_vulkan/vk_blit_screen.cpp
+++ b/src/video_core/renderer_vulkan/vk_blit_screen.cpp
@@ -12,14 +12,22 @@
#include "common/assert.h"
#include "common/common_types.h"
#include "common/math_util.h"
+#include "common/settings.h"
#include "core/core.h"
#include "core/frontend/emu_window.h"
#include "core/memory.h"
#include "video_core/gpu.h"
+#include "video_core/host_shaders/fxaa_frag_spv.h"
+#include "video_core/host_shaders/fxaa_vert_spv.h"
+#include "video_core/host_shaders/present_bicubic_frag_spv.h"
+#include "video_core/host_shaders/present_gaussian_frag_spv.h"
#include "video_core/host_shaders/vulkan_present_frag_spv.h"
+#include "video_core/host_shaders/vulkan_present_scaleforce_fp16_frag_spv.h"
+#include "video_core/host_shaders/vulkan_present_scaleforce_fp32_frag_spv.h"
#include "video_core/host_shaders/vulkan_present_vert_spv.h"
#include "video_core/renderer_vulkan/renderer_vulkan.h"
#include "video_core/renderer_vulkan/vk_blit_screen.h"
+#include "video_core/renderer_vulkan/vk_fsr.h"
#include "video_core/renderer_vulkan/vk_master_semaphore.h"
#include "video_core/renderer_vulkan/vk_scheduler.h"
#include "video_core/renderer_vulkan/vk_shader_util.h"
@@ -144,8 +152,8 @@ VkSemaphore VKBlitScreen::Draw(const Tegra::FramebufferConfig& framebuffer,
scheduler.Wait(resource_ticks[image_index]);
resource_ticks[image_index] = scheduler.CurrentTick();
- UpdateDescriptorSet(image_index,
- use_accelerated ? screen_info.image_view : *raw_image_views[image_index]);
+ VkImageView source_image_view =
+ use_accelerated ? screen_info.image_view : *raw_image_views[image_index];
BufferData data;
SetUniformData(data, layout);
@@ -222,9 +230,134 @@ VkSemaphore VKBlitScreen::Draw(const Tegra::FramebufferConfig& framebuffer,
read_barrier);
cmdbuf.CopyBufferToImage(*buffer, image, VK_IMAGE_LAYOUT_GENERAL, copy);
cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_TRANSFER_BIT,
- VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT, 0, write_barrier);
+ VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT |
+ VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT,
+ 0, write_barrier);
});
}
+
+ const auto anti_alias_pass = Settings::values.anti_aliasing.GetValue();
+ if (use_accelerated && anti_alias_pass != Settings::AntiAliasing::None) {
+ UpdateAADescriptorSet(image_index, source_image_view, false);
+ const u32 up_scale = Settings::values.resolution_info.up_scale;
+ const u32 down_shift = Settings::values.resolution_info.down_shift;
+ VkExtent2D size{
+ .width = (up_scale * framebuffer.width) >> down_shift,
+ .height = (up_scale * framebuffer.height) >> down_shift,
+ };
+ scheduler.Record([this, image_index, size, anti_alias_pass](vk::CommandBuffer cmdbuf) {
+ const VkImageMemoryBarrier base_barrier{
+ .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER,
+ .pNext = nullptr,
+ .srcAccessMask = 0,
+ .dstAccessMask = 0,
+ .oldLayout = VK_IMAGE_LAYOUT_GENERAL,
+ .newLayout = VK_IMAGE_LAYOUT_GENERAL,
+ .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
+ .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
+ .image = {},
+ .subresourceRange =
+ {
+ .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
+ .baseMipLevel = 0,
+ .levelCount = 1,
+ .baseArrayLayer = 0,
+ .layerCount = 1,
+ },
+ };
+
+ {
+ VkImageMemoryBarrier fsr_write_barrier = base_barrier;
+ fsr_write_barrier.image = *aa_image;
+ fsr_write_barrier.oldLayout = VK_IMAGE_LAYOUT_UNDEFINED;
+ cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_ALL_COMMANDS_BIT,
+ VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT, 0, fsr_write_barrier);
+ }
+
+ const f32 bg_red = Settings::values.bg_red.GetValue() / 255.0f;
+ const f32 bg_green = Settings::values.bg_green.GetValue() / 255.0f;
+ const f32 bg_blue = Settings::values.bg_blue.GetValue() / 255.0f;
+ const VkClearValue clear_color{
+ .color = {.float32 = {bg_red, bg_green, bg_blue, 1.0f}},
+ };
+ const VkRenderPassBeginInfo renderpass_bi{
+ .sType = VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO,
+ .pNext = nullptr,
+ .renderPass = *aa_renderpass,
+ .framebuffer = *aa_framebuffer,
+ .renderArea =
+ {
+ .offset = {0, 0},
+ .extent = size,
+ },
+ .clearValueCount = 1,
+ .pClearValues = &clear_color,
+ };
+ const VkViewport viewport{
+ .x = 0.0f,
+ .y = 0.0f,
+ .width = static_cast<float>(size.width),
+ .height = static_cast<float>(size.height),
+ .minDepth = 0.0f,
+ .maxDepth = 1.0f,
+ };
+ const VkRect2D scissor{
+ .offset = {0, 0},
+ .extent = size,
+ };
+ cmdbuf.BeginRenderPass(renderpass_bi, VK_SUBPASS_CONTENTS_INLINE);
+ switch (anti_alias_pass) {
+ case Settings::AntiAliasing::Fxaa:
+ cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_GRAPHICS, *aa_pipeline);
+ break;
+ default:
+ cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_GRAPHICS, *aa_pipeline);
+ break;
+ }
+ cmdbuf.SetViewport(0, viewport);
+ cmdbuf.SetScissor(0, scissor);
+
+ cmdbuf.BindVertexBuffer(0, *buffer, offsetof(BufferData, vertices));
+ cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_GRAPHICS, *aa_pipeline_layout, 0,
+ aa_descriptor_sets[image_index], {});
+ cmdbuf.Draw(4, 1, 0, 0);
+ cmdbuf.EndRenderPass();
+
+ {
+ VkImageMemoryBarrier blit_read_barrier = base_barrier;
+ blit_read_barrier.image = *aa_image;
+ blit_read_barrier.srcAccessMask = VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT;
+ blit_read_barrier.dstAccessMask = VK_ACCESS_SHADER_READ_BIT;
+
+ cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT,
+ VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT, 0, blit_read_barrier);
+ }
+ });
+ source_image_view = *aa_image_view;
+ }
+
+ if (fsr) {
+ auto crop_rect = framebuffer.crop_rect;
+ if (crop_rect.GetWidth() == 0) {
+ crop_rect.right = framebuffer.width;
+ }
+ if (crop_rect.GetHeight() == 0) {
+ crop_rect.bottom = framebuffer.height;
+ }
+ crop_rect = crop_rect.Scale(Settings::values.resolution_info.up_factor);
+ VkExtent2D fsr_input_size{
+ .width = Settings::values.resolution_info.ScaleUp(framebuffer.width),
+ .height = Settings::values.resolution_info.ScaleUp(framebuffer.height),
+ };
+ VkImageView fsr_image_view =
+ fsr->Draw(scheduler, image_index, source_image_view, fsr_input_size, crop_rect);
+ UpdateDescriptorSet(image_index, fsr_image_view, true);
+ } else {
+ const bool is_nn =
+ Settings::values.scaling_filter.GetValue() == Settings::ScalingFilter::NearestNeighbor;
+ UpdateDescriptorSet(image_index, source_image_view, is_nn);
+ }
+
scheduler.Record(
[this, host_framebuffer, image_index, size = render_area](vk::CommandBuffer cmdbuf) {
const f32 bg_red = Settings::values.bg_red.GetValue() / 255.0f;
@@ -258,8 +391,28 @@ VkSemaphore VKBlitScreen::Draw(const Tegra::FramebufferConfig& framebuffer,
.offset = {0, 0},
.extent = size,
};
+ const auto filter = Settings::values.scaling_filter.GetValue();
cmdbuf.BeginRenderPass(renderpass_bi, VK_SUBPASS_CONTENTS_INLINE);
- cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_GRAPHICS, *pipeline);
+ switch (filter) {
+ case Settings::ScalingFilter::NearestNeighbor:
+ cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_GRAPHICS, *bilinear_pipeline);
+ break;
+ case Settings::ScalingFilter::Bilinear:
+ cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_GRAPHICS, *bilinear_pipeline);
+ break;
+ case Settings::ScalingFilter::Bicubic:
+ cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_GRAPHICS, *bicubic_pipeline);
+ break;
+ case Settings::ScalingFilter::Gaussian:
+ cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_GRAPHICS, *gaussian_pipeline);
+ break;
+ case Settings::ScalingFilter::ScaleForce:
+ cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_GRAPHICS, *scaleforce_pipeline);
+ break;
+ default:
+ cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_GRAPHICS, *bilinear_pipeline);
+ break;
+ }
cmdbuf.SetViewport(0, viewport);
cmdbuf.SetScissor(0, scissor);
@@ -281,11 +434,16 @@ VkSemaphore VKBlitScreen::DrawToSwapchain(const Tegra::FramebufferConfig& frameb
}
vk::Framebuffer VKBlitScreen::CreateFramebuffer(const VkImageView& image_view, VkExtent2D extent) {
+ return CreateFramebuffer(image_view, extent, renderpass);
+}
+
+vk::Framebuffer VKBlitScreen::CreateFramebuffer(const VkImageView& image_view, VkExtent2D extent,
+ vk::RenderPass& rd) {
return device.GetLogical().CreateFramebuffer(VkFramebufferCreateInfo{
.sType = VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO,
.pNext = nullptr,
.flags = 0,
- .renderPass = *renderpass,
+ .renderPass = *rd,
.attachmentCount = 1,
.pAttachments = &image_view,
.width = extent.width,
@@ -308,9 +466,21 @@ void VKBlitScreen::CreateDynamicResources() {
CreateRenderPass();
CreateFramebuffers();
CreateGraphicsPipeline();
+ fsr.reset();
+ if (Settings::values.scaling_filter.GetValue() == Settings::ScalingFilter::Fsr) {
+ CreateFSR();
+ }
}
void VKBlitScreen::RefreshResources(const Tegra::FramebufferConfig& framebuffer) {
+ if (Settings::values.scaling_filter.GetValue() == Settings::ScalingFilter::Fsr) {
+ if (!fsr) {
+ CreateFSR();
+ }
+ } else {
+ fsr.reset();
+ }
+
if (framebuffer.width == raw_width && framebuffer.height == raw_height && !raw_images.empty()) {
return;
}
@@ -324,7 +494,16 @@ void VKBlitScreen::RefreshResources(const Tegra::FramebufferConfig& framebuffer)
void VKBlitScreen::CreateShaders() {
vertex_shader = BuildShader(device, VULKAN_PRESENT_VERT_SPV);
- fragment_shader = BuildShader(device, VULKAN_PRESENT_FRAG_SPV);
+ fxaa_vertex_shader = BuildShader(device, FXAA_VERT_SPV);
+ fxaa_fragment_shader = BuildShader(device, FXAA_FRAG_SPV);
+ bilinear_fragment_shader = BuildShader(device, VULKAN_PRESENT_FRAG_SPV);
+ bicubic_fragment_shader = BuildShader(device, PRESENT_BICUBIC_FRAG_SPV);
+ gaussian_fragment_shader = BuildShader(device, PRESENT_GAUSSIAN_FRAG_SPV);
+ if (device.IsFloat16Supported()) {
+ scaleforce_fragment_shader = BuildShader(device, VULKAN_PRESENT_SCALEFORCE_FP16_FRAG_SPV);
+ } else {
+ scaleforce_fragment_shader = BuildShader(device, VULKAN_PRESENT_SCALEFORCE_FP32_FRAG_SPV);
+ }
}
void VKBlitScreen::CreateSemaphores() {
@@ -344,6 +523,13 @@ void VKBlitScreen::CreateDescriptorPool() {
},
}};
+ const std::array<VkDescriptorPoolSize, 1> pool_sizes_aa{{
+ {
+ .type = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER,
+ .descriptorCount = static_cast<u32>(image_count * 2),
+ },
+ }};
+
const VkDescriptorPoolCreateInfo ci{
.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_CREATE_INFO,
.pNext = nullptr,
@@ -353,19 +539,33 @@ void VKBlitScreen::CreateDescriptorPool() {
.pPoolSizes = pool_sizes.data(),
};
descriptor_pool = device.GetLogical().CreateDescriptorPool(ci);
+
+ const VkDescriptorPoolCreateInfo ci_aa{
+ .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_CREATE_INFO,
+ .pNext = nullptr,
+ .flags = VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT,
+ .maxSets = static_cast<u32>(image_count),
+ .poolSizeCount = static_cast<u32>(pool_sizes_aa.size()),
+ .pPoolSizes = pool_sizes_aa.data(),
+ };
+ aa_descriptor_pool = device.GetLogical().CreateDescriptorPool(ci_aa);
}
void VKBlitScreen::CreateRenderPass() {
+ renderpass = CreateRenderPassImpl(swapchain.GetImageViewFormat());
+}
+
+vk::RenderPass VKBlitScreen::CreateRenderPassImpl(VkFormat format, bool is_present) {
const VkAttachmentDescription color_attachment{
.flags = 0,
- .format = swapchain.GetImageViewFormat(),
+ .format = format,
.samples = VK_SAMPLE_COUNT_1_BIT,
.loadOp = VK_ATTACHMENT_LOAD_OP_CLEAR,
.storeOp = VK_ATTACHMENT_STORE_OP_STORE,
.stencilLoadOp = VK_ATTACHMENT_LOAD_OP_DONT_CARE,
.stencilStoreOp = VK_ATTACHMENT_STORE_OP_DONT_CARE,
.initialLayout = VK_IMAGE_LAYOUT_UNDEFINED,
- .finalLayout = VK_IMAGE_LAYOUT_PRESENT_SRC_KHR,
+ .finalLayout = is_present ? VK_IMAGE_LAYOUT_PRESENT_SRC_KHR : VK_IMAGE_LAYOUT_GENERAL,
};
const VkAttachmentReference color_attachment_ref{
@@ -408,7 +608,7 @@ void VKBlitScreen::CreateRenderPass() {
.pDependencies = &dependency,
};
- renderpass = device.GetLogical().CreateRenderPass(renderpass_ci);
+ return device.GetLogical().CreateRenderPass(renderpass_ci);
}
void VKBlitScreen::CreateDescriptorSetLayout() {
@@ -429,6 +629,23 @@ void VKBlitScreen::CreateDescriptorSetLayout() {
},
}};
+ const std::array<VkDescriptorSetLayoutBinding, 2> layout_bindings_aa{{
+ {
+ .binding = 0,
+ .descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER,
+ .descriptorCount = 1,
+ .stageFlags = VK_SHADER_STAGE_VERTEX_BIT,
+ .pImmutableSamplers = nullptr,
+ },
+ {
+ .binding = 1,
+ .descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER,
+ .descriptorCount = 1,
+ .stageFlags = VK_SHADER_STAGE_FRAGMENT_BIT,
+ .pImmutableSamplers = nullptr,
+ },
+ }};
+
const VkDescriptorSetLayoutCreateInfo ci{
.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO,
.pNext = nullptr,
@@ -437,11 +654,21 @@ void VKBlitScreen::CreateDescriptorSetLayout() {
.pBindings = layout_bindings.data(),
};
+ const VkDescriptorSetLayoutCreateInfo ci_aa{
+ .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO,
+ .pNext = nullptr,
+ .flags = 0,
+ .bindingCount = static_cast<u32>(layout_bindings_aa.size()),
+ .pBindings = layout_bindings_aa.data(),
+ };
+
descriptor_set_layout = device.GetLogical().CreateDescriptorSetLayout(ci);
+ aa_descriptor_set_layout = device.GetLogical().CreateDescriptorSetLayout(ci_aa);
}
void VKBlitScreen::CreateDescriptorSets() {
const std::vector layouts(image_count, *descriptor_set_layout);
+ const std::vector layouts_aa(image_count, *aa_descriptor_set_layout);
const VkDescriptorSetAllocateInfo ai{
.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO,
@@ -451,7 +678,16 @@ void VKBlitScreen::CreateDescriptorSets() {
.pSetLayouts = layouts.data(),
};
+ const VkDescriptorSetAllocateInfo ai_aa{
+ .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO,
+ .pNext = nullptr,
+ .descriptorPool = *aa_descriptor_pool,
+ .descriptorSetCount = static_cast<u32>(image_count),
+ .pSetLayouts = layouts_aa.data(),
+ };
+
descriptor_sets = descriptor_pool.Allocate(ai);
+ aa_descriptor_sets = aa_descriptor_pool.Allocate(ai_aa);
}
void VKBlitScreen::CreatePipelineLayout() {
@@ -464,11 +700,63 @@ void VKBlitScreen::CreatePipelineLayout() {
.pushConstantRangeCount = 0,
.pPushConstantRanges = nullptr,
};
+ const VkPipelineLayoutCreateInfo ci_aa{
+ .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,
+ .pNext = nullptr,
+ .flags = 0,
+ .setLayoutCount = 1,
+ .pSetLayouts = aa_descriptor_set_layout.address(),
+ .pushConstantRangeCount = 0,
+ .pPushConstantRanges = nullptr,
+ };
pipeline_layout = device.GetLogical().CreatePipelineLayout(ci);
+ aa_pipeline_layout = device.GetLogical().CreatePipelineLayout(ci_aa);
}
void VKBlitScreen::CreateGraphicsPipeline() {
- const std::array<VkPipelineShaderStageCreateInfo, 2> shader_stages{{
+ const std::array<VkPipelineShaderStageCreateInfo, 2> bilinear_shader_stages{{
+ {
+ .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
+ .pNext = nullptr,
+ .flags = 0,
+ .stage = VK_SHADER_STAGE_VERTEX_BIT,
+ .module = *vertex_shader,
+ .pName = "main",
+ .pSpecializationInfo = nullptr,
+ },
+ {
+ .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
+ .pNext = nullptr,
+ .flags = 0,
+ .stage = VK_SHADER_STAGE_FRAGMENT_BIT,
+ .module = *bilinear_fragment_shader,
+ .pName = "main",
+ .pSpecializationInfo = nullptr,
+ },
+ }};
+
+ const std::array<VkPipelineShaderStageCreateInfo, 2> bicubic_shader_stages{{
+ {
+ .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
+ .pNext = nullptr,
+ .flags = 0,
+ .stage = VK_SHADER_STAGE_VERTEX_BIT,
+ .module = *vertex_shader,
+ .pName = "main",
+ .pSpecializationInfo = nullptr,
+ },
+ {
+ .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
+ .pNext = nullptr,
+ .flags = 0,
+ .stage = VK_SHADER_STAGE_FRAGMENT_BIT,
+ .module = *bicubic_fragment_shader,
+ .pName = "main",
+ .pSpecializationInfo = nullptr,
+ },
+ }};
+
+ const std::array<VkPipelineShaderStageCreateInfo, 2> gaussian_shader_stages{{
{
.sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
.pNext = nullptr,
@@ -483,7 +771,28 @@ void VKBlitScreen::CreateGraphicsPipeline() {
.pNext = nullptr,
.flags = 0,
.stage = VK_SHADER_STAGE_FRAGMENT_BIT,
- .module = *fragment_shader,
+ .module = *gaussian_fragment_shader,
+ .pName = "main",
+ .pSpecializationInfo = nullptr,
+ },
+ }};
+
+ const std::array<VkPipelineShaderStageCreateInfo, 2> scaleforce_shader_stages{{
+ {
+ .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
+ .pNext = nullptr,
+ .flags = 0,
+ .stage = VK_SHADER_STAGE_VERTEX_BIT,
+ .module = *vertex_shader,
+ .pName = "main",
+ .pSpecializationInfo = nullptr,
+ },
+ {
+ .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
+ .pNext = nullptr,
+ .flags = 0,
+ .stage = VK_SHADER_STAGE_FRAGMENT_BIT,
+ .module = *scaleforce_fragment_shader,
.pName = "main",
.pSpecializationInfo = nullptr,
},
@@ -583,12 +892,12 @@ void VKBlitScreen::CreateGraphicsPipeline() {
.pDynamicStates = dynamic_states.data(),
};
- const VkGraphicsPipelineCreateInfo pipeline_ci{
+ const VkGraphicsPipelineCreateInfo bilinear_pipeline_ci{
.sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO,
.pNext = nullptr,
.flags = 0,
- .stageCount = static_cast<u32>(shader_stages.size()),
- .pStages = shader_stages.data(),
+ .stageCount = static_cast<u32>(bilinear_shader_stages.size()),
+ .pStages = bilinear_shader_stages.data(),
.pVertexInputState = &vertex_input_ci,
.pInputAssemblyState = &input_assembly_ci,
.pTessellationState = nullptr,
@@ -605,7 +914,76 @@ void VKBlitScreen::CreateGraphicsPipeline() {
.basePipelineIndex = 0,
};
- pipeline = device.GetLogical().CreateGraphicsPipeline(pipeline_ci);
+ const VkGraphicsPipelineCreateInfo bicubic_pipeline_ci{
+ .sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO,
+ .pNext = nullptr,
+ .flags = 0,
+ .stageCount = static_cast<u32>(bicubic_shader_stages.size()),
+ .pStages = bicubic_shader_stages.data(),
+ .pVertexInputState = &vertex_input_ci,
+ .pInputAssemblyState = &input_assembly_ci,
+ .pTessellationState = nullptr,
+ .pViewportState = &viewport_state_ci,
+ .pRasterizationState = &rasterization_ci,
+ .pMultisampleState = &multisampling_ci,
+ .pDepthStencilState = nullptr,
+ .pColorBlendState = &color_blend_ci,
+ .pDynamicState = &dynamic_state_ci,
+ .layout = *pipeline_layout,
+ .renderPass = *renderpass,
+ .subpass = 0,
+ .basePipelineHandle = 0,
+ .basePipelineIndex = 0,
+ };
+
+ const VkGraphicsPipelineCreateInfo gaussian_pipeline_ci{
+ .sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO,
+ .pNext = nullptr,
+ .flags = 0,
+ .stageCount = static_cast<u32>(gaussian_shader_stages.size()),
+ .pStages = gaussian_shader_stages.data(),
+ .pVertexInputState = &vertex_input_ci,
+ .pInputAssemblyState = &input_assembly_ci,
+ .pTessellationState = nullptr,
+ .pViewportState = &viewport_state_ci,
+ .pRasterizationState = &rasterization_ci,
+ .pMultisampleState = &multisampling_ci,
+ .pDepthStencilState = nullptr,
+ .pColorBlendState = &color_blend_ci,
+ .pDynamicState = &dynamic_state_ci,
+ .layout = *pipeline_layout,
+ .renderPass = *renderpass,
+ .subpass = 0,
+ .basePipelineHandle = 0,
+ .basePipelineIndex = 0,
+ };
+
+ const VkGraphicsPipelineCreateInfo scaleforce_pipeline_ci{
+ .sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO,
+ .pNext = nullptr,
+ .flags = 0,
+ .stageCount = static_cast<u32>(scaleforce_shader_stages.size()),
+ .pStages = scaleforce_shader_stages.data(),
+ .pVertexInputState = &vertex_input_ci,
+ .pInputAssemblyState = &input_assembly_ci,
+ .pTessellationState = nullptr,
+ .pViewportState = &viewport_state_ci,
+ .pRasterizationState = &rasterization_ci,
+ .pMultisampleState = &multisampling_ci,
+ .pDepthStencilState = nullptr,
+ .pColorBlendState = &color_blend_ci,
+ .pDynamicState = &dynamic_state_ci,
+ .layout = *pipeline_layout,
+ .renderPass = *renderpass,
+ .subpass = 0,
+ .basePipelineHandle = 0,
+ .basePipelineIndex = 0,
+ };
+
+ bilinear_pipeline = device.GetLogical().CreateGraphicsPipeline(bilinear_pipeline_ci);
+ bicubic_pipeline = device.GetLogical().CreateGraphicsPipeline(bicubic_pipeline_ci);
+ gaussian_pipeline = device.GetLogical().CreateGraphicsPipeline(gaussian_pipeline_ci);
+ scaleforce_pipeline = device.GetLogical().CreateGraphicsPipeline(scaleforce_pipeline_ci);
}
void VKBlitScreen::CreateSampler() {
@@ -614,8 +992,29 @@ void VKBlitScreen::CreateSampler() {
.pNext = nullptr,
.flags = 0,
.magFilter = VK_FILTER_LINEAR,
+ .minFilter = VK_FILTER_LINEAR,
+ .mipmapMode = VK_SAMPLER_MIPMAP_MODE_NEAREST,
+ .addressModeU = VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_BORDER,
+ .addressModeV = VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_BORDER,
+ .addressModeW = VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_BORDER,
+ .mipLodBias = 0.0f,
+ .anisotropyEnable = VK_FALSE,
+ .maxAnisotropy = 0.0f,
+ .compareEnable = VK_FALSE,
+ .compareOp = VK_COMPARE_OP_NEVER,
+ .minLod = 0.0f,
+ .maxLod = 0.0f,
+ .borderColor = VK_BORDER_COLOR_FLOAT_OPAQUE_BLACK,
+ .unnormalizedCoordinates = VK_FALSE,
+ };
+
+ const VkSamplerCreateInfo ci_nn{
+ .sType = VK_STRUCTURE_TYPE_SAMPLER_CREATE_INFO,
+ .pNext = nullptr,
+ .flags = 0,
+ .magFilter = VK_FILTER_NEAREST,
.minFilter = VK_FILTER_NEAREST,
- .mipmapMode = VK_SAMPLER_MIPMAP_MODE_LINEAR,
+ .mipmapMode = VK_SAMPLER_MIPMAP_MODE_NEAREST,
.addressModeU = VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_BORDER,
.addressModeV = VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_BORDER,
.addressModeW = VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_BORDER,
@@ -631,6 +1030,7 @@ void VKBlitScreen::CreateSampler() {
};
sampler = device.GetLogical().CreateSampler(ci);
+ nn_sampler = device.GetLogical().CreateSampler(ci_nn);
}
void VKBlitScreen::CreateFramebuffers() {
@@ -639,7 +1039,7 @@ void VKBlitScreen::CreateFramebuffers() {
for (std::size_t i = 0; i < image_count; ++i) {
const VkImageView image_view{swapchain.GetImageViewIndex(i)};
- framebuffers[i] = CreateFramebuffer(image_view, size);
+ framebuffers[i] = CreateFramebuffer(image_view, size, renderpass);
}
}
@@ -649,6 +1049,11 @@ void VKBlitScreen::ReleaseRawImages() {
}
raw_images.clear();
raw_buffer_commits.clear();
+
+ aa_image_view.reset();
+ aa_image.reset();
+ aa_commit = MemoryCommit{};
+
buffer.reset();
buffer_commit = MemoryCommit{};
}
@@ -675,8 +1080,11 @@ void VKBlitScreen::CreateRawImages(const Tegra::FramebufferConfig& framebuffer)
raw_image_views.resize(image_count);
raw_buffer_commits.resize(image_count);
- for (size_t i = 0; i < image_count; ++i) {
- raw_images[i] = device.GetLogical().CreateImage(VkImageCreateInfo{
+ const auto create_image = [&](bool used_on_framebuffer = false, u32 up_scale = 1,
+ u32 down_shift = 0) {
+ u32 extra_usages = used_on_framebuffer ? VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT
+ : VK_IMAGE_USAGE_TRANSFER_DST_BIT;
+ return device.GetLogical().CreateImage(VkImageCreateInfo{
.sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO,
.pNext = nullptr,
.flags = 0,
@@ -684,26 +1092,30 @@ void VKBlitScreen::CreateRawImages(const Tegra::FramebufferConfig& framebuffer)
.format = GetFormat(framebuffer),
.extent =
{
- .width = framebuffer.width,
- .height = framebuffer.height,
+ .width = (up_scale * framebuffer.width) >> down_shift,
+ .height = (up_scale * framebuffer.height) >> down_shift,
.depth = 1,
},
.mipLevels = 1,
.arrayLayers = 1,
.samples = VK_SAMPLE_COUNT_1_BIT,
- .tiling = VK_IMAGE_TILING_LINEAR,
- .usage = VK_IMAGE_USAGE_TRANSFER_DST_BIT | VK_IMAGE_USAGE_SAMPLED_BIT,
+ .tiling = used_on_framebuffer ? VK_IMAGE_TILING_OPTIMAL : VK_IMAGE_TILING_LINEAR,
+ .usage = VK_IMAGE_USAGE_TRANSFER_DST_BIT | VK_IMAGE_USAGE_SAMPLED_BIT | extra_usages,
.sharingMode = VK_SHARING_MODE_EXCLUSIVE,
.queueFamilyIndexCount = 0,
.pQueueFamilyIndices = nullptr,
.initialLayout = VK_IMAGE_LAYOUT_UNDEFINED,
});
- raw_buffer_commits[i] = memory_allocator.Commit(raw_images[i], MemoryUsage::DeviceLocal);
- raw_image_views[i] = device.GetLogical().CreateImageView(VkImageViewCreateInfo{
+ };
+ const auto create_commit = [&](vk::Image& image) {
+ return memory_allocator.Commit(image, MemoryUsage::DeviceLocal);
+ };
+ const auto create_image_view = [&](vk::Image& image) {
+ return device.GetLogical().CreateImageView(VkImageViewCreateInfo{
.sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,
.pNext = nullptr,
.flags = 0,
- .image = *raw_images[i],
+ .image = *image,
.viewType = VK_IMAGE_VIEW_TYPE_2D,
.format = GetFormat(framebuffer),
.components =
@@ -722,10 +1134,211 @@ void VKBlitScreen::CreateRawImages(const Tegra::FramebufferConfig& framebuffer)
.layerCount = 1,
},
});
+ };
+
+ for (size_t i = 0; i < image_count; ++i) {
+ raw_images[i] = create_image();
+ raw_buffer_commits[i] = create_commit(raw_images[i]);
+ raw_image_views[i] = create_image_view(raw_images[i]);
}
+
+ // AA Resources
+ const u32 up_scale = Settings::values.resolution_info.up_scale;
+ const u32 down_shift = Settings::values.resolution_info.down_shift;
+ aa_image = create_image(true, up_scale, down_shift);
+ aa_commit = create_commit(aa_image);
+ aa_image_view = create_image_view(aa_image);
+ VkExtent2D size{
+ .width = (up_scale * framebuffer.width) >> down_shift,
+ .height = (up_scale * framebuffer.height) >> down_shift,
+ };
+ if (aa_renderpass) {
+ aa_framebuffer = CreateFramebuffer(*aa_image_view, size, aa_renderpass);
+ return;
+ }
+ aa_renderpass = CreateRenderPassImpl(GetFormat(framebuffer), false);
+ aa_framebuffer = CreateFramebuffer(*aa_image_view, size, aa_renderpass);
+
+ const std::array<VkPipelineShaderStageCreateInfo, 2> fxaa_shader_stages{{
+ {
+ .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
+ .pNext = nullptr,
+ .flags = 0,
+ .stage = VK_SHADER_STAGE_VERTEX_BIT,
+ .module = *fxaa_vertex_shader,
+ .pName = "main",
+ .pSpecializationInfo = nullptr,
+ },
+ {
+ .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
+ .pNext = nullptr,
+ .flags = 0,
+ .stage = VK_SHADER_STAGE_FRAGMENT_BIT,
+ .module = *fxaa_fragment_shader,
+ .pName = "main",
+ .pSpecializationInfo = nullptr,
+ },
+ }};
+
+ const auto vertex_binding_description = ScreenRectVertex::GetDescription();
+ const auto vertex_attrs_description = ScreenRectVertex::GetAttributes();
+
+ const VkPipelineVertexInputStateCreateInfo vertex_input_ci{
+ .sType = VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO,
+ .pNext = nullptr,
+ .flags = 0,
+ .vertexBindingDescriptionCount = 1,
+ .pVertexBindingDescriptions = &vertex_binding_description,
+ .vertexAttributeDescriptionCount = u32{vertex_attrs_description.size()},
+ .pVertexAttributeDescriptions = vertex_attrs_description.data(),
+ };
+
+ const VkPipelineInputAssemblyStateCreateInfo input_assembly_ci{
+ .sType = VK_STRUCTURE_TYPE_PIPELINE_INPUT_ASSEMBLY_STATE_CREATE_INFO,
+ .pNext = nullptr,
+ .flags = 0,
+ .topology = VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP,
+ .primitiveRestartEnable = VK_FALSE,
+ };
+
+ const VkPipelineViewportStateCreateInfo viewport_state_ci{
+ .sType = VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_STATE_CREATE_INFO,
+ .pNext = nullptr,
+ .flags = 0,
+ .viewportCount = 1,
+ .pViewports = nullptr,
+ .scissorCount = 1,
+ .pScissors = nullptr,
+ };
+
+ const VkPipelineRasterizationStateCreateInfo rasterization_ci{
+ .sType = VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_STATE_CREATE_INFO,
+ .pNext = nullptr,
+ .flags = 0,
+ .depthClampEnable = VK_FALSE,
+ .rasterizerDiscardEnable = VK_FALSE,
+ .polygonMode = VK_POLYGON_MODE_FILL,
+ .cullMode = VK_CULL_MODE_NONE,
+ .frontFace = VK_FRONT_FACE_CLOCKWISE,
+ .depthBiasEnable = VK_FALSE,
+ .depthBiasConstantFactor = 0.0f,
+ .depthBiasClamp = 0.0f,
+ .depthBiasSlopeFactor = 0.0f,
+ .lineWidth = 1.0f,
+ };
+
+ const VkPipelineMultisampleStateCreateInfo multisampling_ci{
+ .sType = VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO,
+ .pNext = nullptr,
+ .flags = 0,
+ .rasterizationSamples = VK_SAMPLE_COUNT_1_BIT,
+ .sampleShadingEnable = VK_FALSE,
+ .minSampleShading = 0.0f,
+ .pSampleMask = nullptr,
+ .alphaToCoverageEnable = VK_FALSE,
+ .alphaToOneEnable = VK_FALSE,
+ };
+
+ const VkPipelineColorBlendAttachmentState color_blend_attachment{
+ .blendEnable = VK_FALSE,
+ .srcColorBlendFactor = VK_BLEND_FACTOR_ZERO,
+ .dstColorBlendFactor = VK_BLEND_FACTOR_ZERO,
+ .colorBlendOp = VK_BLEND_OP_ADD,
+ .srcAlphaBlendFactor = VK_BLEND_FACTOR_ZERO,
+ .dstAlphaBlendFactor = VK_BLEND_FACTOR_ZERO,
+ .alphaBlendOp = VK_BLEND_OP_ADD,
+ .colorWriteMask = VK_COLOR_COMPONENT_R_BIT | VK_COLOR_COMPONENT_G_BIT |
+ VK_COLOR_COMPONENT_B_BIT | VK_COLOR_COMPONENT_A_BIT,
+ };
+
+ const VkPipelineColorBlendStateCreateInfo color_blend_ci{
+ .sType = VK_STRUCTURE_TYPE_PIPELINE_COLOR_BLEND_STATE_CREATE_INFO,
+ .pNext = nullptr,
+ .flags = 0,
+ .logicOpEnable = VK_FALSE,
+ .logicOp = VK_LOGIC_OP_COPY,
+ .attachmentCount = 1,
+ .pAttachments = &color_blend_attachment,
+ .blendConstants = {0.0f, 0.0f, 0.0f, 0.0f},
+ };
+
+ static constexpr std::array dynamic_states{
+ VK_DYNAMIC_STATE_VIEWPORT,
+ VK_DYNAMIC_STATE_SCISSOR,
+ };
+ const VkPipelineDynamicStateCreateInfo dynamic_state_ci{
+ .sType = VK_STRUCTURE_TYPE_PIPELINE_DYNAMIC_STATE_CREATE_INFO,
+ .pNext = nullptr,
+ .flags = 0,
+ .dynamicStateCount = static_cast<u32>(dynamic_states.size()),
+ .pDynamicStates = dynamic_states.data(),
+ };
+
+ const VkGraphicsPipelineCreateInfo fxaa_pipeline_ci{
+ .sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO,
+ .pNext = nullptr,
+ .flags = 0,
+ .stageCount = static_cast<u32>(fxaa_shader_stages.size()),
+ .pStages = fxaa_shader_stages.data(),
+ .pVertexInputState = &vertex_input_ci,
+ .pInputAssemblyState = &input_assembly_ci,
+ .pTessellationState = nullptr,
+ .pViewportState = &viewport_state_ci,
+ .pRasterizationState = &rasterization_ci,
+ .pMultisampleState = &multisampling_ci,
+ .pDepthStencilState = nullptr,
+ .pColorBlendState = &color_blend_ci,
+ .pDynamicState = &dynamic_state_ci,
+ .layout = *aa_pipeline_layout,
+ .renderPass = *aa_renderpass,
+ .subpass = 0,
+ .basePipelineHandle = 0,
+ .basePipelineIndex = 0,
+ };
+
+ // AA
+ aa_pipeline = device.GetLogical().CreateGraphicsPipeline(fxaa_pipeline_ci);
}
-void VKBlitScreen::UpdateDescriptorSet(std::size_t image_index, VkImageView image_view) const {
+void VKBlitScreen::UpdateAADescriptorSet(std::size_t image_index, VkImageView image_view,
+ bool nn) const {
+ const VkDescriptorImageInfo image_info{
+ .sampler = nn ? *nn_sampler : *sampler,
+ .imageView = image_view,
+ .imageLayout = VK_IMAGE_LAYOUT_GENERAL,
+ };
+
+ const VkWriteDescriptorSet sampler_write{
+ .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
+ .pNext = nullptr,
+ .dstSet = aa_descriptor_sets[image_index],
+ .dstBinding = 0,
+ .dstArrayElement = 0,
+ .descriptorCount = 1,
+ .descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER,
+ .pImageInfo = &image_info,
+ .pBufferInfo = nullptr,
+ .pTexelBufferView = nullptr,
+ };
+
+ const VkWriteDescriptorSet sampler_write_2{
+ .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
+ .pNext = nullptr,
+ .dstSet = aa_descriptor_sets[image_index],
+ .dstBinding = 1,
+ .dstArrayElement = 0,
+ .descriptorCount = 1,
+ .descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER,
+ .pImageInfo = &image_info,
+ .pBufferInfo = nullptr,
+ .pTexelBufferView = nullptr,
+ };
+
+ device.GetLogical().UpdateDescriptorSets(std::array{sampler_write, sampler_write_2}, {});
+}
+
+void VKBlitScreen::UpdateDescriptorSet(std::size_t image_index, VkImageView image_view,
+ bool nn) const {
const VkDescriptorBufferInfo buffer_info{
.buffer = *buffer,
.offset = offsetof(BufferData, uniform),
@@ -746,7 +1359,7 @@ void VKBlitScreen::UpdateDescriptorSet(std::size_t image_index, VkImageView imag
};
const VkDescriptorImageInfo image_info{
- .sampler = *sampler,
+ .sampler = nn ? *nn_sampler : *sampler,
.imageView = image_view,
.imageLayout = VK_IMAGE_LAYOUT_GENERAL,
};
@@ -798,17 +1411,19 @@ void VKBlitScreen::SetVertexData(BufferData& data, const Tegra::FramebufferConfi
UNIMPLEMENTED_IF(framebuffer_crop_rect.top != 0);
UNIMPLEMENTED_IF(framebuffer_crop_rect.left != 0);
- // Scale the output by the crop width/height. This is commonly used with 1280x720 rendering
- // (e.g. handheld mode) on a 1920x1080 framebuffer.
f32 scale_u = 1.0f;
f32 scale_v = 1.0f;
- if (framebuffer_crop_rect.GetWidth() > 0) {
- scale_u = static_cast<f32>(framebuffer_crop_rect.GetWidth()) /
- static_cast<f32>(screen_info.width);
- }
- if (framebuffer_crop_rect.GetHeight() > 0) {
- scale_v = static_cast<f32>(framebuffer_crop_rect.GetHeight()) /
- static_cast<f32>(screen_info.height);
+ // Scale the output by the crop width/height. This is commonly used with 1280x720 rendering
+ // (e.g. handheld mode) on a 1920x1080 framebuffer.
+ if (!fsr) {
+ if (framebuffer_crop_rect.GetWidth() > 0) {
+ scale_u = static_cast<f32>(framebuffer_crop_rect.GetWidth()) /
+ static_cast<f32>(screen_info.width);
+ }
+ if (framebuffer_crop_rect.GetHeight() > 0) {
+ scale_v = static_cast<f32>(framebuffer_crop_rect.GetHeight()) /
+ static_cast<f32>(screen_info.height);
+ }
}
const auto& screen = layout.screen;
@@ -822,6 +1437,15 @@ void VKBlitScreen::SetVertexData(BufferData& data, const Tegra::FramebufferConfi
data.vertices[3] = ScreenRectVertex(x + w, y + h, texcoords.bottom * scale_u, right * scale_v);
}
+void VKBlitScreen::CreateFSR() {
+ const auto& layout = render_window.GetFramebufferLayout();
+ const VkExtent2D fsr_size{
+ .width = layout.screen.GetWidth(),
+ .height = layout.screen.GetHeight(),
+ };
+ fsr = std::make_unique<FSR>(device, memory_allocator, image_count, fsr_size);
+}
+
u64 VKBlitScreen::CalculateBufferSize(const Tegra::FramebufferConfig& framebuffer) const {
return sizeof(BufferData) + GetSizeInBytes(framebuffer) * image_count;
}
diff --git a/src/video_core/renderer_vulkan/vk_blit_screen.h b/src/video_core/renderer_vulkan/vk_blit_screen.h
index 430bcfbca..bbca71af3 100644
--- a/src/video_core/renderer_vulkan/vk_blit_screen.h
+++ b/src/video_core/renderer_vulkan/vk_blit_screen.h
@@ -34,6 +34,7 @@ namespace Vulkan {
struct ScreenInfo;
class Device;
+class FSR;
class RasterizerVulkan;
class VKScheduler;
class VKSwapchain;
@@ -66,6 +67,9 @@ public:
[[nodiscard]] vk::Framebuffer CreateFramebuffer(const VkImageView& image_view,
VkExtent2D extent);
+ [[nodiscard]] vk::Framebuffer CreateFramebuffer(const VkImageView& image_view,
+ VkExtent2D extent, vk::RenderPass& rd);
+
private:
struct BufferData;
@@ -74,6 +78,7 @@ private:
void CreateSemaphores();
void CreateDescriptorPool();
void CreateRenderPass();
+ vk::RenderPass CreateRenderPassImpl(VkFormat, bool is_present = true);
void CreateDescriptorSetLayout();
void CreateDescriptorSets();
void CreatePipelineLayout();
@@ -88,11 +93,14 @@ private:
void CreateStagingBuffer(const Tegra::FramebufferConfig& framebuffer);
void CreateRawImages(const Tegra::FramebufferConfig& framebuffer);
- void UpdateDescriptorSet(std::size_t image_index, VkImageView image_view) const;
+ void UpdateDescriptorSet(std::size_t image_index, VkImageView image_view, bool nn) const;
+ void UpdateAADescriptorSet(std::size_t image_index, VkImageView image_view, bool nn) const;
void SetUniformData(BufferData& data, const Layout::FramebufferLayout layout) const;
void SetVertexData(BufferData& data, const Tegra::FramebufferConfig& framebuffer,
const Layout::FramebufferLayout layout) const;
+ void CreateFSR();
+
u64 CalculateBufferSize(const Tegra::FramebufferConfig& framebuffer) const;
u64 GetRawImageOffset(const Tegra::FramebufferConfig& framebuffer,
std::size_t image_index) const;
@@ -107,14 +115,24 @@ private:
const VKScreenInfo& screen_info;
vk::ShaderModule vertex_shader;
- vk::ShaderModule fragment_shader;
+ vk::ShaderModule fxaa_vertex_shader;
+ vk::ShaderModule fxaa_fragment_shader;
+ vk::ShaderModule bilinear_fragment_shader;
+ vk::ShaderModule bicubic_fragment_shader;
+ vk::ShaderModule gaussian_fragment_shader;
+ vk::ShaderModule scaleforce_fragment_shader;
vk::DescriptorPool descriptor_pool;
vk::DescriptorSetLayout descriptor_set_layout;
vk::PipelineLayout pipeline_layout;
- vk::Pipeline pipeline;
+ vk::Pipeline nearest_neightbor_pipeline;
+ vk::Pipeline bilinear_pipeline;
+ vk::Pipeline bicubic_pipeline;
+ vk::Pipeline gaussian_pipeline;
+ vk::Pipeline scaleforce_pipeline;
vk::RenderPass renderpass;
std::vector<vk::Framebuffer> framebuffers;
vk::DescriptorSets descriptor_sets;
+ vk::Sampler nn_sampler;
vk::Sampler sampler;
vk::Buffer buffer;
@@ -126,8 +144,22 @@ private:
std::vector<vk::Image> raw_images;
std::vector<vk::ImageView> raw_image_views;
std::vector<MemoryCommit> raw_buffer_commits;
+
+ vk::DescriptorPool aa_descriptor_pool;
+ vk::DescriptorSetLayout aa_descriptor_set_layout;
+ vk::PipelineLayout aa_pipeline_layout;
+ vk::Pipeline aa_pipeline;
+ vk::RenderPass aa_renderpass;
+ vk::Framebuffer aa_framebuffer;
+ vk::DescriptorSets aa_descriptor_sets;
+ vk::Image aa_image;
+ vk::ImageView aa_image_view;
+ MemoryCommit aa_commit;
+
u32 raw_width = 0;
u32 raw_height = 0;
+
+ std::unique_ptr<FSR> fsr;
};
} // namespace Vulkan
diff --git a/src/video_core/renderer_vulkan/vk_buffer_cache.cpp b/src/video_core/renderer_vulkan/vk_buffer_cache.cpp
index 8ac58bc2f..5ffd93499 100644
--- a/src/video_core/renderer_vulkan/vk_buffer_cache.cpp
+++ b/src/video_core/renderer_vulkan/vk_buffer_cache.cpp
@@ -146,7 +146,7 @@ void BufferCacheRuntime::Finish() {
}
void BufferCacheRuntime::CopyBuffer(VkBuffer dst_buffer, VkBuffer src_buffer,
- std::span<const VideoCommon::BufferCopy> copies) {
+ std::span<const VideoCommon::BufferCopy> copies, bool barrier) {
static constexpr VkMemoryBarrier READ_BARRIER{
.sType = VK_STRUCTURE_TYPE_MEMORY_BARRIER,
.pNext = nullptr,
@@ -163,10 +163,42 @@ void BufferCacheRuntime::CopyBuffer(VkBuffer dst_buffer, VkBuffer src_buffer,
boost::container::small_vector<VkBufferCopy, 3> vk_copies(copies.size());
std::ranges::transform(copies, vk_copies.begin(), MakeBufferCopy);
scheduler.RequestOutsideRenderPassOperationContext();
- scheduler.Record([src_buffer, dst_buffer, vk_copies](vk::CommandBuffer cmdbuf) {
+ scheduler.Record([src_buffer, dst_buffer, vk_copies, barrier](vk::CommandBuffer cmdbuf) {
+ if (barrier) {
+ cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_ALL_COMMANDS_BIT,
+ VK_PIPELINE_STAGE_TRANSFER_BIT, 0, READ_BARRIER);
+ }
+ cmdbuf.CopyBuffer(src_buffer, dst_buffer, vk_copies);
+ if (barrier) {
+ cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_TRANSFER_BIT,
+ VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, 0, WRITE_BARRIER);
+ }
+ });
+}
+
+void BufferCacheRuntime::PreCopyBarrier() {
+ static constexpr VkMemoryBarrier READ_BARRIER{
+ .sType = VK_STRUCTURE_TYPE_MEMORY_BARRIER,
+ .pNext = nullptr,
+ .srcAccessMask = VK_ACCESS_MEMORY_WRITE_BIT,
+ .dstAccessMask = VK_ACCESS_TRANSFER_READ_BIT | VK_ACCESS_TRANSFER_WRITE_BIT,
+ };
+ scheduler.RequestOutsideRenderPassOperationContext();
+ scheduler.Record([](vk::CommandBuffer cmdbuf) {
cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT,
0, READ_BARRIER);
- cmdbuf.CopyBuffer(src_buffer, dst_buffer, vk_copies);
+ });
+}
+
+void BufferCacheRuntime::PostCopyBarrier() {
+ static constexpr VkMemoryBarrier WRITE_BARRIER{
+ .sType = VK_STRUCTURE_TYPE_MEMORY_BARRIER,
+ .pNext = nullptr,
+ .srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT,
+ .dstAccessMask = VK_ACCESS_MEMORY_READ_BIT | VK_ACCESS_MEMORY_WRITE_BIT,
+ };
+ scheduler.RequestOutsideRenderPassOperationContext();
+ scheduler.Record([](vk::CommandBuffer cmdbuf) {
cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_ALL_COMMANDS_BIT,
0, WRITE_BARRIER);
});
diff --git a/src/video_core/renderer_vulkan/vk_buffer_cache.h b/src/video_core/renderer_vulkan/vk_buffer_cache.h
index c27402ff0..1ee0d8420 100644
--- a/src/video_core/renderer_vulkan/vk_buffer_cache.h
+++ b/src/video_core/renderer_vulkan/vk_buffer_cache.h
@@ -69,8 +69,12 @@ public:
[[nodiscard]] StagingBufferRef DownloadStagingBuffer(size_t size);
+ void PreCopyBarrier();
+
void CopyBuffer(VkBuffer src_buffer, VkBuffer dst_buffer,
- std::span<const VideoCommon::BufferCopy> copies);
+ std::span<const VideoCommon::BufferCopy> copies, bool barrier = true);
+
+ void PostCopyBarrier();
void ClearBuffer(VkBuffer dest_buffer, u32 offset, size_t size, u32 value);
diff --git a/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp b/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp
index 44faf626a..de36bcdb7 100644
--- a/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp
+++ b/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp
@@ -22,6 +22,7 @@
namespace Vulkan {
using Shader::ImageBufferDescriptor;
+using Shader::Backend::SPIRV::RESCALING_LAYOUT_WORDS_OFFSET;
using Tegra::Texture::TexturePair;
ComputePipeline::ComputePipeline(const Device& device_, DescriptorPool& descriptor_pool,
@@ -108,8 +109,7 @@ void ComputePipeline::Configure(Tegra::Engines::KeplerCompute& kepler_compute,
texture_cache.SynchronizeComputeDescriptors();
static constexpr size_t max_elements = 64;
- std::array<ImageId, max_elements> image_view_ids;
- boost::container::static_vector<u32, max_elements> image_view_indices;
+ boost::container::static_vector<VideoCommon::ImageViewInOut, max_elements> views;
boost::container::static_vector<VkSampler, max_elements> samplers;
const auto& qmd{kepler_compute.launch_description};
@@ -134,30 +134,37 @@ void ComputePipeline::Configure(Tegra::Engines::KeplerCompute& kepler_compute,
}
return TexturePair(gpu_memory.Read<u32>(addr), via_header_index);
}};
- const auto add_image{[&](const auto& desc) {
+ const auto add_image{[&](const auto& desc, bool blacklist) {
for (u32 index = 0; index < desc.count; ++index) {
const auto handle{read_handle(desc, index)};
- image_view_indices.push_back(handle.first);
+ views.push_back({
+ .index = handle.first,
+ .blacklist = blacklist,
+ .id = {},
+ });
}
}};
- std::ranges::for_each(info.texture_buffer_descriptors, add_image);
- std::ranges::for_each(info.image_buffer_descriptors, add_image);
+ for (const auto& desc : info.texture_buffer_descriptors) {
+ add_image(desc, false);
+ }
+ for (const auto& desc : info.image_buffer_descriptors) {
+ add_image(desc, false);
+ }
for (const auto& desc : info.texture_descriptors) {
for (u32 index = 0; index < desc.count; ++index) {
const auto handle{read_handle(desc, index)};
- image_view_indices.push_back(handle.first);
+ views.push_back({handle.first});
Sampler* const sampler = texture_cache.GetComputeSampler(handle.second);
samplers.push_back(sampler->Handle());
}
}
- std::ranges::for_each(info.image_descriptors, add_image);
-
- const std::span indices_span(image_view_indices.data(), image_view_indices.size());
- texture_cache.FillComputeImageViews(indices_span, image_view_ids);
+ for (const auto& desc : info.image_descriptors) {
+ add_image(desc, desc.is_written);
+ }
+ texture_cache.FillComputeImageViews(std::span(views.data(), views.size()));
buffer_cache.UnbindComputeTextureBuffers();
- ImageId* texture_buffer_ids{image_view_ids.data()};
size_t index{};
const auto add_buffer{[&](const auto& desc) {
constexpr bool is_image = std::is_same_v<decltype(desc), const ImageBufferDescriptor&>;
@@ -166,11 +173,10 @@ void ComputePipeline::Configure(Tegra::Engines::KeplerCompute& kepler_compute,
if constexpr (is_image) {
is_written = desc.is_written;
}
- ImageView& image_view = texture_cache.GetImageView(*texture_buffer_ids);
+ ImageView& image_view = texture_cache.GetImageView(views[index].id);
buffer_cache.BindComputeTextureBuffer(index, image_view.GpuAddr(),
image_view.BufferSize(), image_view.format,
is_written, is_image);
- ++texture_buffer_ids;
++index;
}
}};
@@ -180,9 +186,11 @@ void ComputePipeline::Configure(Tegra::Engines::KeplerCompute& kepler_compute,
buffer_cache.UpdateComputeBuffers();
buffer_cache.BindHostComputeBuffers();
+ RescalingPushConstant rescaling;
const VkSampler* samplers_it{samplers.data()};
- const ImageId* views_it{image_view_ids.data()};
- PushImageDescriptors(info, samplers_it, views_it, texture_cache, update_descriptor_queue);
+ const VideoCommon::ImageViewInOut* views_it{views.data()};
+ PushImageDescriptors(texture_cache, update_descriptor_queue, info, rescaling, samplers_it,
+ views_it);
if (!is_built.load(std::memory_order::relaxed)) {
// Wait for the pipeline to be built
@@ -192,11 +200,18 @@ void ComputePipeline::Configure(Tegra::Engines::KeplerCompute& kepler_compute,
});
}
const void* const descriptor_data{update_descriptor_queue.UpdateData()};
- scheduler.Record([this, descriptor_data](vk::CommandBuffer cmdbuf) {
+ const bool is_rescaling = !info.texture_descriptors.empty() || !info.image_descriptors.empty();
+ scheduler.Record([this, descriptor_data, is_rescaling,
+ rescaling_data = rescaling.Data()](vk::CommandBuffer cmdbuf) {
cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_COMPUTE, *pipeline);
if (!descriptor_set_layout) {
return;
}
+ if (is_rescaling) {
+ cmdbuf.PushConstants(*pipeline_layout, VK_SHADER_STAGE_COMPUTE_BIT,
+ RESCALING_LAYOUT_WORDS_OFFSET, sizeof(rescaling_data),
+ rescaling_data.data());
+ }
const VkDescriptorSet descriptor_set{descriptor_allocator.Commit()};
const vk::Device& dev{device.GetLogical()};
dev.UpdateDescriptorSet(descriptor_set, *descriptor_update_template, descriptor_data);
diff --git a/src/video_core/renderer_vulkan/vk_fsr.cpp b/src/video_core/renderer_vulkan/vk_fsr.cpp
new file mode 100644
index 000000000..73629d229
--- /dev/null
+++ b/src/video_core/renderer_vulkan/vk_fsr.cpp
@@ -0,0 +1,553 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include <cmath>
+#include "common/bit_cast.h"
+#include "common/common_types.h"
+#include "common/div_ceil.h"
+
+#include "video_core/host_shaders/vulkan_fidelityfx_fsr_easu_fp16_comp_spv.h"
+#include "video_core/host_shaders/vulkan_fidelityfx_fsr_easu_fp32_comp_spv.h"
+#include "video_core/host_shaders/vulkan_fidelityfx_fsr_rcas_fp16_comp_spv.h"
+#include "video_core/host_shaders/vulkan_fidelityfx_fsr_rcas_fp32_comp_spv.h"
+#include "video_core/renderer_vulkan/vk_fsr.h"
+#include "video_core/renderer_vulkan/vk_scheduler.h"
+#include "video_core/renderer_vulkan/vk_shader_util.h"
+#include "video_core/vulkan_common/vulkan_device.h"
+
+namespace Vulkan {
+namespace {
+// Reimplementations of the constant generating functions in ffx_fsr1.h
+// GCC generated a lot of warnings when using the official header.
+u32 AU1_AH1_AF1(f32 f) {
+ static constexpr u32 base[512]{
+ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+ 0x0000, 0x0000, 0x0000, 0x0000, 0x0001, 0x0002, 0x0004, 0x0008, 0x0010, 0x0020, 0x0040,
+ 0x0080, 0x0100, 0x0200, 0x0400, 0x0800, 0x0c00, 0x1000, 0x1400, 0x1800, 0x1c00, 0x2000,
+ 0x2400, 0x2800, 0x2c00, 0x3000, 0x3400, 0x3800, 0x3c00, 0x4000, 0x4400, 0x4800, 0x4c00,
+ 0x5000, 0x5400, 0x5800, 0x5c00, 0x6000, 0x6400, 0x6800, 0x6c00, 0x7000, 0x7400, 0x7800,
+ 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff,
+ 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff,
+ 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff,
+ 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff,
+ 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff,
+ 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff,
+ 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff,
+ 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff,
+ 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff,
+ 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff,
+ 0x7bff, 0x7bff, 0x7bff, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000,
+ 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000,
+ 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000,
+ 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000,
+ 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000,
+ 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000,
+ 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000,
+ 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000,
+ 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000,
+ 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8001, 0x8002, 0x8004, 0x8008,
+ 0x8010, 0x8020, 0x8040, 0x8080, 0x8100, 0x8200, 0x8400, 0x8800, 0x8c00, 0x9000, 0x9400,
+ 0x9800, 0x9c00, 0xa000, 0xa400, 0xa800, 0xac00, 0xb000, 0xb400, 0xb800, 0xbc00, 0xc000,
+ 0xc400, 0xc800, 0xcc00, 0xd000, 0xd400, 0xd800, 0xdc00, 0xe000, 0xe400, 0xe800, 0xec00,
+ 0xf000, 0xf400, 0xf800, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff,
+ 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff,
+ 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff,
+ 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff,
+ 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff,
+ 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff,
+ 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff,
+ 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff,
+ 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff,
+ 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff,
+ 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff,
+ };
+ static constexpr s8 shift[512]{
+ 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
+ 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
+ 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
+ 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
+ 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
+ 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
+ 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x17, 0x16,
+ 0x15, 0x14, 0x13, 0x12, 0x11, 0x10, 0x0f, 0x0e, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d,
+ 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d,
+ 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
+ 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
+ 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
+ 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
+ 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
+ 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
+ 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
+ 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
+ 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
+ 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
+ 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
+ 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
+ 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
+ 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
+ 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x17,
+ 0x16, 0x15, 0x14, 0x13, 0x12, 0x11, 0x10, 0x0f, 0x0e, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d,
+ 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d,
+ 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
+ 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
+ 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
+ 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
+ 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
+ 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
+ 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
+ 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
+ 0x18, 0x18,
+ };
+ const u32 u = Common::BitCast<u32>(f);
+ const u32 i = u >> 23;
+ return base[i] + ((u & 0x7fffff) >> shift[i]);
+}
+
+u32 AU1_AH2_AF2(f32 a[2]) {
+ return AU1_AH1_AF1(a[0]) + (AU1_AH1_AF1(a[1]) << 16);
+}
+
+void FsrEasuCon(u32 con0[4], u32 con1[4], u32 con2[4], u32 con3[4], f32 inputViewportInPixelsX,
+ f32 inputViewportInPixelsY, f32 inputSizeInPixelsX, f32 inputSizeInPixelsY,
+ f32 outputSizeInPixelsX, f32 outputSizeInPixelsY) {
+ con0[0] = Common::BitCast<u32>(inputViewportInPixelsX / outputSizeInPixelsX);
+ con0[1] = Common::BitCast<u32>(inputViewportInPixelsY / outputSizeInPixelsY);
+ con0[2] = Common::BitCast<u32>(0.5f * inputViewportInPixelsX / outputSizeInPixelsX - 0.5f);
+ con0[3] = Common::BitCast<u32>(0.5f * inputViewportInPixelsY / outputSizeInPixelsY - 0.5f);
+ con1[0] = Common::BitCast<u32>(1.0f / inputSizeInPixelsX);
+ con1[1] = Common::BitCast<u32>(1.0f / inputSizeInPixelsY);
+ con1[2] = Common::BitCast<u32>(1.0f / inputSizeInPixelsX);
+ con1[3] = Common::BitCast<u32>(-1.0f / inputSizeInPixelsY);
+ con2[0] = Common::BitCast<u32>(-1.0f / inputSizeInPixelsX);
+ con2[1] = Common::BitCast<u32>(2.0f / inputSizeInPixelsY);
+ con2[2] = Common::BitCast<u32>(1.0f / inputSizeInPixelsX);
+ con2[3] = Common::BitCast<u32>(2.0f / inputSizeInPixelsY);
+ con3[0] = Common::BitCast<u32>(0.0f / inputSizeInPixelsX);
+ con3[1] = Common::BitCast<u32>(4.0f / inputSizeInPixelsY);
+ con3[2] = con3[3] = 0;
+}
+
+void FsrEasuConOffset(u32 con0[4], u32 con1[4], u32 con2[4], u32 con3[4],
+ f32 inputViewportInPixelsX, f32 inputViewportInPixelsY,
+ f32 inputSizeInPixelsX, f32 inputSizeInPixelsY, f32 outputSizeInPixelsX,
+ f32 outputSizeInPixelsY, f32 inputOffsetInPixelsX, f32 inputOffsetInPixelsY) {
+ FsrEasuCon(con0, con1, con2, con3, inputViewportInPixelsX, inputViewportInPixelsY,
+ inputSizeInPixelsX, inputSizeInPixelsY, outputSizeInPixelsX, outputSizeInPixelsY);
+ con0[2] = Common::BitCast<u32>(0.5f * inputViewportInPixelsX / outputSizeInPixelsX - 0.5f +
+ inputOffsetInPixelsX);
+ con0[3] = Common::BitCast<u32>(0.5f * inputViewportInPixelsY / outputSizeInPixelsY - 0.5f +
+ inputOffsetInPixelsY);
+}
+
+void FsrRcasCon(u32* con, f32 sharpness) {
+ sharpness = std::exp2f(-sharpness);
+ f32 hSharp[2]{sharpness, sharpness};
+ con[0] = Common::BitCast<u32>(sharpness);
+ con[1] = AU1_AH2_AF2(hSharp);
+ con[2] = 0;
+ con[3] = 0;
+}
+} // Anonymous namespace
+
+FSR::FSR(const Device& device_, MemoryAllocator& memory_allocator_, size_t image_count_,
+ VkExtent2D output_size_)
+ : device{device_}, memory_allocator{memory_allocator_}, image_count{image_count_},
+ output_size{output_size_} {
+
+ CreateImages();
+ CreateSampler();
+ CreateShaders();
+ CreateDescriptorPool();
+ CreateDescriptorSetLayout();
+ CreateDescriptorSets();
+ CreatePipelineLayout();
+ CreatePipeline();
+}
+
+VkImageView FSR::Draw(VKScheduler& scheduler, size_t image_index, VkImageView image_view,
+ VkExtent2D input_image_extent, const Common::Rectangle<int>& crop_rect) {
+
+ UpdateDescriptorSet(image_index, image_view);
+
+ scheduler.Record([this, image_index, input_image_extent, crop_rect](vk::CommandBuffer cmdbuf) {
+ const VkImageMemoryBarrier base_barrier{
+ .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER,
+ .pNext = nullptr,
+ .srcAccessMask = 0,
+ .dstAccessMask = 0,
+ .oldLayout = VK_IMAGE_LAYOUT_GENERAL,
+ .newLayout = VK_IMAGE_LAYOUT_GENERAL,
+ .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
+ .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
+ .image = {},
+ .subresourceRange =
+ {
+ .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
+ .baseMipLevel = 0,
+ .levelCount = 1,
+ .baseArrayLayer = 0,
+ .layerCount = 1,
+ },
+ };
+
+ cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_COMPUTE, *easu_pipeline);
+
+ std::array<u32, 4 * 4> push_constants;
+ FsrEasuConOffset(
+ push_constants.data() + 0, push_constants.data() + 4, push_constants.data() + 8,
+ push_constants.data() + 12,
+
+ static_cast<f32>(crop_rect.GetWidth()), static_cast<f32>(crop_rect.GetHeight()),
+ static_cast<f32>(input_image_extent.width), static_cast<f32>(input_image_extent.height),
+ static_cast<f32>(output_size.width), static_cast<f32>(output_size.height),
+ static_cast<f32>(crop_rect.left), static_cast<f32>(crop_rect.top));
+ cmdbuf.PushConstants(*pipeline_layout, VK_SHADER_STAGE_COMPUTE_BIT, push_constants);
+
+ {
+ VkImageMemoryBarrier fsr_write_barrier = base_barrier;
+ fsr_write_barrier.image = *images[image_index],
+ fsr_write_barrier.oldLayout = VK_IMAGE_LAYOUT_UNDEFINED;
+
+ cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_ALL_COMMANDS_BIT,
+ VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, 0, fsr_write_barrier);
+ }
+
+ cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_COMPUTE, *pipeline_layout, 0,
+ descriptor_sets[image_index * 2], {});
+ cmdbuf.Dispatch(Common::DivCeil(output_size.width, 16u),
+ Common::DivCeil(output_size.height, 16u), 1);
+
+ cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_COMPUTE, *rcas_pipeline);
+
+ FsrRcasCon(push_constants.data(), 0.25f);
+ cmdbuf.PushConstants(*pipeline_layout, VK_SHADER_STAGE_COMPUTE_BIT, push_constants);
+
+ {
+ std::array<VkImageMemoryBarrier, 2> barriers;
+ auto& fsr_read_barrier = barriers[0];
+ auto& blit_write_barrier = barriers[1];
+
+ fsr_read_barrier = base_barrier;
+ fsr_read_barrier.image = *images[image_index];
+ fsr_read_barrier.srcAccessMask = VK_ACCESS_SHADER_WRITE_BIT;
+ fsr_read_barrier.dstAccessMask = VK_ACCESS_SHADER_READ_BIT;
+
+ blit_write_barrier = base_barrier;
+ blit_write_barrier.image = *images[image_count + image_index];
+ blit_write_barrier.oldLayout = VK_IMAGE_LAYOUT_UNDEFINED;
+ blit_write_barrier.newLayout = VK_IMAGE_LAYOUT_GENERAL;
+
+ cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT,
+ VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, 0, {}, {}, barriers);
+ }
+
+ cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_COMPUTE, *pipeline_layout, 0,
+ descriptor_sets[image_index * 2 + 1], {});
+ cmdbuf.Dispatch(Common::DivCeil(output_size.width, 16u),
+ Common::DivCeil(output_size.height, 16u), 1);
+
+ {
+ std::array<VkImageMemoryBarrier, 1> barriers;
+ auto& blit_read_barrier = barriers[0];
+
+ blit_read_barrier = base_barrier;
+ blit_read_barrier.image = *images[image_count + image_index];
+ blit_read_barrier.srcAccessMask = VK_ACCESS_SHADER_WRITE_BIT;
+ blit_read_barrier.dstAccessMask = VK_ACCESS_SHADER_READ_BIT;
+
+ cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT,
+ VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT, 0, {}, {}, barriers);
+ }
+ });
+
+ return *image_views[image_count + image_index];
+}
+
+void FSR::CreateDescriptorPool() {
+ const std::array<VkDescriptorPoolSize, 2> pool_sizes{{
+ {
+ .type = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER,
+ .descriptorCount = static_cast<u32>(image_count * 2),
+ },
+ {
+ .type = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE,
+ .descriptorCount = static_cast<u32>(image_count * 2),
+ },
+ }};
+
+ const VkDescriptorPoolCreateInfo ci{
+ .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_CREATE_INFO,
+ .pNext = nullptr,
+ .flags = VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT,
+ .maxSets = static_cast<u32>(image_count * 2),
+ .poolSizeCount = static_cast<u32>(pool_sizes.size()),
+ .pPoolSizes = pool_sizes.data(),
+ };
+ descriptor_pool = device.GetLogical().CreateDescriptorPool(ci);
+}
+
+void FSR::CreateDescriptorSetLayout() {
+ const std::array<VkDescriptorSetLayoutBinding, 2> layout_bindings{{
+ {
+ .binding = 0,
+ .descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER,
+ .descriptorCount = 1,
+ .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
+ .pImmutableSamplers = sampler.address(),
+ },
+ {
+ .binding = 1,
+ .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE,
+ .descriptorCount = 1,
+ .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
+ .pImmutableSamplers = sampler.address(),
+ },
+ }};
+
+ const VkDescriptorSetLayoutCreateInfo ci{
+ .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO,
+ .pNext = nullptr,
+ .flags = 0,
+ .bindingCount = static_cast<u32>(layout_bindings.size()),
+ .pBindings = layout_bindings.data(),
+ };
+
+ descriptor_set_layout = device.GetLogical().CreateDescriptorSetLayout(ci);
+}
+
+void FSR::CreateDescriptorSets() {
+ const u32 sets = static_cast<u32>(image_count * 2);
+ const std::vector layouts(sets, *descriptor_set_layout);
+
+ const VkDescriptorSetAllocateInfo ai{
+ .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO,
+ .pNext = nullptr,
+ .descriptorPool = *descriptor_pool,
+ .descriptorSetCount = sets,
+ .pSetLayouts = layouts.data(),
+ };
+
+ descriptor_sets = descriptor_pool.Allocate(ai);
+}
+
+void FSR::CreateImages() {
+ images.resize(image_count * 2);
+ image_views.resize(image_count * 2);
+ buffer_commits.resize(image_count * 2);
+
+ for (size_t i = 0; i < image_count * 2; ++i) {
+ images[i] = device.GetLogical().CreateImage(VkImageCreateInfo{
+ .sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO,
+ .pNext = nullptr,
+ .flags = 0,
+ .imageType = VK_IMAGE_TYPE_2D,
+ .format = VK_FORMAT_R16G16B16A16_SFLOAT,
+ .extent =
+ {
+ .width = output_size.width,
+ .height = output_size.height,
+ .depth = 1,
+ },
+ .mipLevels = 1,
+ .arrayLayers = 1,
+ .samples = VK_SAMPLE_COUNT_1_BIT,
+ .tiling = VK_IMAGE_TILING_OPTIMAL,
+ .usage = VK_IMAGE_USAGE_TRANSFER_SRC_BIT | VK_IMAGE_USAGE_STORAGE_BIT |
+ VK_IMAGE_USAGE_SAMPLED_BIT,
+ .sharingMode = VK_SHARING_MODE_EXCLUSIVE,
+ .queueFamilyIndexCount = 0,
+ .pQueueFamilyIndices = nullptr,
+ .initialLayout = VK_IMAGE_LAYOUT_UNDEFINED,
+ });
+ buffer_commits[i] = memory_allocator.Commit(images[i], MemoryUsage::DeviceLocal);
+ image_views[i] = device.GetLogical().CreateImageView(VkImageViewCreateInfo{
+ .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,
+ .pNext = nullptr,
+ .flags = 0,
+ .image = *images[i],
+ .viewType = VK_IMAGE_VIEW_TYPE_2D,
+ .format = VK_FORMAT_R16G16B16A16_SFLOAT,
+ .components =
+ {
+ .r = VK_COMPONENT_SWIZZLE_IDENTITY,
+ .g = VK_COMPONENT_SWIZZLE_IDENTITY,
+ .b = VK_COMPONENT_SWIZZLE_IDENTITY,
+ .a = VK_COMPONENT_SWIZZLE_IDENTITY,
+ },
+ .subresourceRange =
+ {
+ .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
+ .baseMipLevel = 0,
+ .levelCount = 1,
+ .baseArrayLayer = 0,
+ .layerCount = 1,
+ },
+ });
+ }
+}
+
+void FSR::CreatePipelineLayout() {
+ VkPushConstantRange push_const{
+ .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
+ .offset = 0,
+ .size = sizeof(std::array<u32, 4 * 4>),
+ };
+ VkPipelineLayoutCreateInfo ci{
+ .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,
+ .pNext = nullptr,
+ .flags = 0,
+ .setLayoutCount = 1,
+ .pSetLayouts = descriptor_set_layout.address(),
+ .pushConstantRangeCount = 1,
+ .pPushConstantRanges = &push_const,
+ };
+
+ pipeline_layout = device.GetLogical().CreatePipelineLayout(ci);
+}
+
+void FSR::UpdateDescriptorSet(std::size_t image_index, VkImageView image_view) const {
+ const auto fsr_image_view = *image_views[image_index];
+ const auto blit_image_view = *image_views[image_count + image_index];
+
+ const VkDescriptorImageInfo image_info{
+ .sampler = VK_NULL_HANDLE,
+ .imageView = image_view,
+ .imageLayout = VK_IMAGE_LAYOUT_GENERAL,
+ };
+ const VkDescriptorImageInfo fsr_image_info{
+ .sampler = VK_NULL_HANDLE,
+ .imageView = fsr_image_view,
+ .imageLayout = VK_IMAGE_LAYOUT_GENERAL,
+ };
+ const VkDescriptorImageInfo blit_image_info{
+ .sampler = VK_NULL_HANDLE,
+ .imageView = blit_image_view,
+ .imageLayout = VK_IMAGE_LAYOUT_GENERAL,
+ };
+
+ VkWriteDescriptorSet sampler_write{
+ .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
+ .pNext = nullptr,
+ .dstSet = descriptor_sets[image_index * 2],
+ .dstBinding = 0,
+ .dstArrayElement = 0,
+ .descriptorCount = 1,
+ .descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER,
+ .pImageInfo = &image_info,
+ .pBufferInfo = nullptr,
+ .pTexelBufferView = nullptr,
+ };
+
+ VkWriteDescriptorSet output_write{
+ .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
+ .pNext = nullptr,
+ .dstSet = descriptor_sets[image_index * 2],
+ .dstBinding = 1,
+ .dstArrayElement = 0,
+ .descriptorCount = 1,
+ .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE,
+ .pImageInfo = &fsr_image_info,
+ .pBufferInfo = nullptr,
+ .pTexelBufferView = nullptr,
+ };
+
+ device.GetLogical().UpdateDescriptorSets(std::array{sampler_write, output_write}, {});
+
+ sampler_write.dstSet = descriptor_sets[image_index * 2 + 1];
+ sampler_write.pImageInfo = &fsr_image_info;
+ output_write.dstSet = descriptor_sets[image_index * 2 + 1];
+ output_write.pImageInfo = &blit_image_info;
+
+ device.GetLogical().UpdateDescriptorSets(std::array{sampler_write, output_write}, {});
+}
+
+void FSR::CreateSampler() {
+ const VkSamplerCreateInfo ci{
+ .sType = VK_STRUCTURE_TYPE_SAMPLER_CREATE_INFO,
+ .pNext = nullptr,
+ .flags = 0,
+ .magFilter = VK_FILTER_LINEAR,
+ .minFilter = VK_FILTER_LINEAR,
+ .mipmapMode = VK_SAMPLER_MIPMAP_MODE_LINEAR,
+ .addressModeU = VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE,
+ .addressModeV = VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE,
+ .addressModeW = VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE,
+ .mipLodBias = 0.0f,
+ .anisotropyEnable = VK_FALSE,
+ .maxAnisotropy = 0.0f,
+ .compareEnable = VK_FALSE,
+ .compareOp = VK_COMPARE_OP_NEVER,
+ .minLod = 0.0f,
+ .maxLod = 0.0f,
+ .borderColor = VK_BORDER_COLOR_FLOAT_OPAQUE_BLACK,
+ .unnormalizedCoordinates = VK_FALSE,
+ };
+
+ sampler = device.GetLogical().CreateSampler(ci);
+}
+
+void FSR::CreateShaders() {
+ if (device.IsFloat16Supported()) {
+ easu_shader = BuildShader(device, VULKAN_FIDELITYFX_FSR_EASU_FP16_COMP_SPV);
+ rcas_shader = BuildShader(device, VULKAN_FIDELITYFX_FSR_RCAS_FP16_COMP_SPV);
+ } else {
+ easu_shader = BuildShader(device, VULKAN_FIDELITYFX_FSR_EASU_FP32_COMP_SPV);
+ rcas_shader = BuildShader(device, VULKAN_FIDELITYFX_FSR_RCAS_FP32_COMP_SPV);
+ }
+}
+
+void FSR::CreatePipeline() {
+ VkPipelineShaderStageCreateInfo shader_stage_easu{
+ .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
+ .pNext = nullptr,
+ .flags = 0,
+ .stage = VK_SHADER_STAGE_COMPUTE_BIT,
+ .module = *easu_shader,
+ .pName = "main",
+ .pSpecializationInfo = nullptr,
+ };
+
+ VkPipelineShaderStageCreateInfo shader_stage_rcas{
+ .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
+ .pNext = nullptr,
+ .flags = 0,
+ .stage = VK_SHADER_STAGE_COMPUTE_BIT,
+ .module = *rcas_shader,
+ .pName = "main",
+ .pSpecializationInfo = nullptr,
+ };
+
+ VkComputePipelineCreateInfo pipeline_ci_easu{
+ .sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO,
+ .pNext = nullptr,
+ .flags = 0,
+ .stage = shader_stage_easu,
+ .layout = *pipeline_layout,
+ .basePipelineHandle = VK_NULL_HANDLE,
+ .basePipelineIndex = 0,
+ };
+
+ VkComputePipelineCreateInfo pipeline_ci_rcas{
+ .sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO,
+ .pNext = nullptr,
+ .flags = 0,
+ .stage = shader_stage_rcas,
+ .layout = *pipeline_layout,
+ .basePipelineHandle = VK_NULL_HANDLE,
+ .basePipelineIndex = 0,
+ };
+
+ easu_pipeline = device.GetLogical().CreateComputePipeline(pipeline_ci_easu);
+ rcas_pipeline = device.GetLogical().CreateComputePipeline(pipeline_ci_rcas);
+}
+
+} // namespace Vulkan
diff --git a/src/video_core/renderer_vulkan/vk_fsr.h b/src/video_core/renderer_vulkan/vk_fsr.h
new file mode 100644
index 000000000..6bbec3d36
--- /dev/null
+++ b/src/video_core/renderer_vulkan/vk_fsr.h
@@ -0,0 +1,54 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include "common/math_util.h"
+#include "video_core/vulkan_common/vulkan_memory_allocator.h"
+#include "video_core/vulkan_common/vulkan_wrapper.h"
+
+namespace Vulkan {
+
+class Device;
+class VKScheduler;
+
+class FSR {
+public:
+ explicit FSR(const Device& device, MemoryAllocator& memory_allocator, size_t image_count,
+ VkExtent2D output_size);
+ VkImageView Draw(VKScheduler& scheduler, size_t image_index, VkImageView image_view,
+ VkExtent2D input_image_extent, const Common::Rectangle<int>& crop_rect);
+
+private:
+ void CreateDescriptorPool();
+ void CreateDescriptorSetLayout();
+ void CreateDescriptorSets();
+ void CreateImages();
+ void CreateSampler();
+ void CreateShaders();
+ void CreatePipeline();
+ void CreatePipelineLayout();
+
+ void UpdateDescriptorSet(std::size_t image_index, VkImageView image_view) const;
+
+ const Device& device;
+ MemoryAllocator& memory_allocator;
+ size_t image_count;
+ VkExtent2D output_size;
+
+ vk::DescriptorPool descriptor_pool;
+ vk::DescriptorSetLayout descriptor_set_layout;
+ vk::DescriptorSets descriptor_sets;
+ vk::PipelineLayout pipeline_layout;
+ vk::ShaderModule easu_shader;
+ vk::ShaderModule rcas_shader;
+ vk::Pipeline easu_pipeline;
+ vk::Pipeline rcas_pipeline;
+ vk::Sampler sampler;
+ std::vector<vk::Image> images;
+ std::vector<vk::ImageView> image_views;
+ std::vector<MemoryCommit> buffer_commits;
+};
+
+} // namespace Vulkan
diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp
index 8634c3316..616a7b457 100644
--- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp
+++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp
@@ -32,6 +32,8 @@ namespace {
using boost::container::small_vector;
using boost::container::static_vector;
using Shader::ImageBufferDescriptor;
+using Shader::Backend::SPIRV::RESCALING_LAYOUT_DOWN_FACTOR_OFFSET;
+using Shader::Backend::SPIRV::RESCALING_LAYOUT_WORDS_OFFSET;
using Tegra::Texture::TexturePair;
using VideoCore::Surface::PixelFormat;
using VideoCore::Surface::PixelFormatFromDepthFormat;
@@ -235,6 +237,7 @@ GraphicsPipeline::GraphicsPipeline(
stage_infos[stage] = *info;
enabled_uniform_buffer_masks[stage] = info->constant_buffer_mask;
std::ranges::copy(info->constant_buffer_used_sizes, uniform_buffer_sizes[stage].begin());
+ num_textures += Shader::NumDescriptors(info->texture_descriptors);
}
auto func{[this, shader_notify, &render_pass_cache, &descriptor_pool, pipeline_statistics] {
DescriptorLayoutBuilder builder{MakeBuilder(device, stage_infos)};
@@ -277,11 +280,10 @@ void GraphicsPipeline::AddTransition(GraphicsPipeline* transition) {
template <typename Spec>
void GraphicsPipeline::ConfigureImpl(bool is_indexed) {
- std::array<ImageId, MAX_IMAGE_ELEMENTS> image_view_ids;
- std::array<u32, MAX_IMAGE_ELEMENTS> image_view_indices;
+ std::array<VideoCommon::ImageViewInOut, MAX_IMAGE_ELEMENTS> views;
std::array<VkSampler, MAX_IMAGE_ELEMENTS> samplers;
size_t sampler_index{};
- size_t image_index{};
+ size_t view_index{};
texture_cache.SynchronizeGraphicsDescriptors();
@@ -322,26 +324,30 @@ void GraphicsPipeline::ConfigureImpl(bool is_indexed) {
}
return TexturePair(gpu_memory.Read<u32>(addr), via_header_index);
}};
- const auto add_image{[&](const auto& desc) {
+ const auto add_image{[&](const auto& desc, bool blacklist) LAMBDA_FORCEINLINE {
for (u32 index = 0; index < desc.count; ++index) {
const auto handle{read_handle(desc, index)};
- image_view_indices[image_index++] = handle.first;
+ views[view_index++] = {
+ .index = handle.first,
+ .blacklist = blacklist,
+ .id = {},
+ };
}
}};
if constexpr (Spec::has_texture_buffers) {
for (const auto& desc : info.texture_buffer_descriptors) {
- add_image(desc);
+ add_image(desc, false);
}
}
if constexpr (Spec::has_image_buffers) {
for (const auto& desc : info.image_buffer_descriptors) {
- add_image(desc);
+ add_image(desc, false);
}
}
for (const auto& desc : info.texture_descriptors) {
for (u32 index = 0; index < desc.count; ++index) {
const auto handle{read_handle(desc, index)};
- image_view_indices[image_index++] = handle.first;
+ views[view_index++] = {handle.first};
Sampler* const sampler{texture_cache.GetGraphicsSampler(handle.second)};
samplers[sampler_index++] = sampler->Handle();
@@ -349,7 +355,7 @@ void GraphicsPipeline::ConfigureImpl(bool is_indexed) {
}
if constexpr (Spec::has_images) {
for (const auto& desc : info.image_descriptors) {
- add_image(desc);
+ add_image(desc, desc.is_written);
}
}
}};
@@ -368,10 +374,9 @@ void GraphicsPipeline::ConfigureImpl(bool is_indexed) {
if constexpr (Spec::enabled_stages[4]) {
config_stage(4);
}
- const std::span indices_span(image_view_indices.data(), image_index);
- texture_cache.FillGraphicsImageViews(indices_span, image_view_ids);
+ texture_cache.FillGraphicsImageViews<Spec::has_images>(std::span(views.data(), view_index));
- ImageId* texture_buffer_index{image_view_ids.data()};
+ VideoCommon::ImageViewInOut* texture_buffer_it{views.data()};
const auto bind_stage_info{[&](size_t stage) LAMBDA_FORCEINLINE {
size_t index{};
const auto add_buffer{[&](const auto& desc) {
@@ -381,12 +386,12 @@ void GraphicsPipeline::ConfigureImpl(bool is_indexed) {
if constexpr (is_image) {
is_written = desc.is_written;
}
- ImageView& image_view{texture_cache.GetImageView(*texture_buffer_index)};
+ ImageView& image_view{texture_cache.GetImageView(texture_buffer_it->id)};
buffer_cache.BindGraphicsTextureBuffer(stage, index, image_view.GpuAddr(),
image_view.BufferSize(), image_view.format,
is_written, is_image);
++index;
- ++texture_buffer_index;
+ ++texture_buffer_it;
}
}};
buffer_cache.UnbindGraphicsTextureBuffers(stage);
@@ -402,13 +407,9 @@ void GraphicsPipeline::ConfigureImpl(bool is_indexed) {
add_buffer(desc);
}
}
- for (const auto& desc : info.texture_descriptors) {
- texture_buffer_index += desc.count;
- }
+ texture_buffer_it += Shader::NumDescriptors(info.texture_descriptors);
if constexpr (Spec::has_images) {
- for (const auto& desc : info.image_descriptors) {
- texture_buffer_index += desc.count;
- }
+ texture_buffer_it += Shader::NumDescriptors(info.image_descriptors);
}
}};
if constexpr (Spec::enabled_stages[0]) {
@@ -432,12 +433,13 @@ void GraphicsPipeline::ConfigureImpl(bool is_indexed) {
update_descriptor_queue.Acquire();
+ RescalingPushConstant rescaling;
const VkSampler* samplers_it{samplers.data()};
- const ImageId* views_it{image_view_ids.data()};
+ const VideoCommon::ImageViewInOut* views_it{views.data()};
const auto prepare_stage{[&](size_t stage) LAMBDA_FORCEINLINE {
buffer_cache.BindHostStageBuffers(stage);
- PushImageDescriptors(stage_infos[stage], samplers_it, views_it, texture_cache,
- update_descriptor_queue);
+ PushImageDescriptors(texture_cache, update_descriptor_queue, stage_infos[stage], rescaling,
+ samplers_it, views_it);
}};
if constexpr (Spec::enabled_stages[0]) {
prepare_stage(0);
@@ -454,10 +456,10 @@ void GraphicsPipeline::ConfigureImpl(bool is_indexed) {
if constexpr (Spec::enabled_stages[4]) {
prepare_stage(4);
}
- ConfigureDraw();
+ ConfigureDraw(rescaling);
}
-void GraphicsPipeline::ConfigureDraw() {
+void GraphicsPipeline::ConfigureDraw(const RescalingPushConstant& rescaling) {
texture_cache.UpdateRenderTargets(false);
scheduler.RequestRenderpass(texture_cache.GetFramebuffer());
@@ -468,12 +470,25 @@ void GraphicsPipeline::ConfigureDraw() {
build_condvar.wait(lock, [this] { return is_built.load(std::memory_order::relaxed); });
});
}
+ const bool is_rescaling{texture_cache.IsRescaling()};
+ const bool update_rescaling{scheduler.UpdateRescaling(is_rescaling)};
const bool bind_pipeline{scheduler.UpdateGraphicsPipeline(this)};
const void* const descriptor_data{update_descriptor_queue.UpdateData()};
- scheduler.Record([this, descriptor_data, bind_pipeline](vk::CommandBuffer cmdbuf) {
+ scheduler.Record([this, descriptor_data, bind_pipeline, rescaling_data = rescaling.Data(),
+ is_rescaling, update_rescaling](vk::CommandBuffer cmdbuf) {
if (bind_pipeline) {
cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_GRAPHICS, *pipeline);
}
+ cmdbuf.PushConstants(*pipeline_layout, VK_SHADER_STAGE_ALL_GRAPHICS,
+ RESCALING_LAYOUT_WORDS_OFFSET, sizeof(rescaling_data),
+ rescaling_data.data());
+ if (update_rescaling) {
+ const f32 config_down_factor{Settings::values.resolution_info.down_factor};
+ const f32 scale_down_factor{is_rescaling ? config_down_factor : 1.0f};
+ cmdbuf.PushConstants(*pipeline_layout, VK_SHADER_STAGE_ALL_GRAPHICS,
+ RESCALING_LAYOUT_DOWN_FACTOR_OFFSET, sizeof(scale_down_factor),
+ &scale_down_factor);
+ }
if (!descriptor_set_layout) {
return;
}
@@ -826,18 +841,10 @@ void GraphicsPipeline::MakePipeline(VkRenderPass render_pass) {
void GraphicsPipeline::Validate() {
size_t num_images{};
for (const auto& info : stage_infos) {
- for (const auto& desc : info.texture_buffer_descriptors) {
- num_images += desc.count;
- }
- for (const auto& desc : info.image_buffer_descriptors) {
- num_images += desc.count;
- }
- for (const auto& desc : info.texture_descriptors) {
- num_images += desc.count;
- }
- for (const auto& desc : info.image_descriptors) {
- num_images += desc.count;
- }
+ num_images += Shader::NumDescriptors(info.texture_buffer_descriptors);
+ num_images += Shader::NumDescriptors(info.image_buffer_descriptors);
+ num_images += Shader::NumDescriptors(info.texture_descriptors);
+ num_images += Shader::NumDescriptors(info.image_descriptors);
}
ASSERT(num_images <= MAX_IMAGE_ELEMENTS);
}
diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.h b/src/video_core/renderer_vulkan/vk_graphics_pipeline.h
index 1c780e944..a0c1d8f07 100644
--- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.h
+++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.h
@@ -62,6 +62,7 @@ namespace Vulkan {
class Device;
class PipelineStatistics;
class RenderPassCache;
+class RescalingPushConstant;
class VKScheduler;
class VKUpdateDescriptorQueue;
@@ -113,7 +114,7 @@ private:
template <typename Spec>
void ConfigureImpl(bool is_indexed);
- void ConfigureDraw();
+ void ConfigureDraw(const RescalingPushConstant& rescaling);
void MakePipeline(VkRenderPass render_pass);
@@ -138,6 +139,7 @@ private:
std::array<Shader::Info, NUM_STAGES> stage_infos;
std::array<u32, 5> enabled_uniform_buffer_masks{};
VideoCommon::UniformBufferSizes uniform_buffer_sizes{};
+ u32 num_textures{};
vk::DescriptorSetLayout descriptor_set_layout;
DescriptorAllocator descriptor_allocator;
diff --git a/src/video_core/renderer_vulkan/vk_master_semaphore.h b/src/video_core/renderer_vulkan/vk_master_semaphore.h
index 0886b7da8..9be9c9bed 100644
--- a/src/video_core/renderer_vulkan/vk_master_semaphore.h
+++ b/src/video_core/renderer_vulkan/vk_master_semaphore.h
@@ -70,7 +70,9 @@ public:
return;
}
// If none of the above is hit, fallback to a regular wait
- semaphore.Wait(tick);
+ while (!semaphore.Wait(tick)) {
+ }
+ Refresh();
}
private:
diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp
index 30b47a7a0..fd334a146 100644
--- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp
+++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp
@@ -58,18 +58,28 @@ struct DrawParams {
bool is_indexed;
};
-VkViewport GetViewportState(const Device& device, const Maxwell& regs, size_t index) {
+VkViewport GetViewportState(const Device& device, const Maxwell& regs, size_t index, float scale) {
const auto& src = regs.viewport_transform[index];
- const float width = src.scale_x * 2.0f;
- float y = src.translate_y - src.scale_y;
- float height = src.scale_y * 2.0f;
+ const auto conv = [scale](float value) {
+ float new_value = value * scale;
+ if (scale < 1.0f) {
+ const bool sign = std::signbit(value);
+ new_value = std::round(std::abs(new_value));
+ new_value = sign ? -new_value : new_value;
+ }
+ return new_value;
+ };
+ const float x = conv(src.translate_x - src.scale_x);
+ const float width = conv(src.scale_x * 2.0f);
+ float y = conv(src.translate_y - src.scale_y);
+ float height = conv(src.scale_y * 2.0f);
if (regs.screen_y_control.y_negate) {
y += height;
height = -height;
}
const float reduce_z = regs.depth_mode == Maxwell::DepthMode::MinusOneToOne ? 1.0f : 0.0f;
VkViewport viewport{
- .x = src.translate_x - src.scale_x,
+ .x = x,
.y = y,
.width = width != 0.0f ? width : 1.0f,
.height = height != 0.0f ? height : 1.0f,
@@ -83,14 +93,27 @@ VkViewport GetViewportState(const Device& device, const Maxwell& regs, size_t in
return viewport;
}
-VkRect2D GetScissorState(const Maxwell& regs, size_t index) {
+VkRect2D GetScissorState(const Maxwell& regs, size_t index, u32 up_scale = 1, u32 down_shift = 0) {
const auto& src = regs.scissor_test[index];
VkRect2D scissor;
+ const auto scale_up = [&](s32 value) -> s32 {
+ if (value == 0) {
+ return 0U;
+ }
+ const s32 upset = value * up_scale;
+ s32 acumm = 0;
+ if ((up_scale >> down_shift) == 0) {
+ acumm = upset % 2;
+ }
+ const s32 converted_value = (value * up_scale) >> down_shift;
+ return value < 0 ? std::min<s32>(converted_value - acumm, -1)
+ : std::max<s32>(converted_value + acumm, 1);
+ };
if (src.enable) {
- scissor.offset.x = static_cast<s32>(src.min_x);
- scissor.offset.y = static_cast<s32>(src.min_y);
- scissor.extent.width = src.max_x - src.min_x;
- scissor.extent.height = src.max_y - src.min_y;
+ scissor.offset.x = scale_up(static_cast<s32>(src.min_x));
+ scissor.offset.y = scale_up(static_cast<s32>(src.min_y));
+ scissor.extent.width = scale_up(src.max_x - src.min_x);
+ scissor.extent.height = scale_up(src.max_y - src.min_y);
} else {
scissor.offset.x = 0;
scissor.offset.y = 0;
@@ -199,7 +222,7 @@ void RasterizerVulkan::Clear() {
query_cache.UpdateCounters();
- const auto& regs = maxwell3d.regs;
+ auto& regs = maxwell3d.regs;
const bool use_color = regs.clear_buffers.R || regs.clear_buffers.G || regs.clear_buffers.B ||
regs.clear_buffers.A;
const bool use_depth = regs.clear_buffers.Z;
@@ -214,8 +237,16 @@ void RasterizerVulkan::Clear() {
const VkExtent2D render_area = framebuffer->RenderArea();
scheduler.RequestRenderpass(framebuffer);
+ u32 up_scale = 1;
+ u32 down_shift = 0;
+ if (texture_cache.IsRescaling()) {
+ up_scale = Settings::values.resolution_info.up_scale;
+ down_shift = Settings::values.resolution_info.down_shift;
+ }
+ UpdateViewportsState(regs);
+
VkClearRect clear_rect{
- .rect = GetScissorState(regs, 0),
+ .rect = GetScissorState(regs, 0, up_scale, down_shift),
.baseArrayLayer = regs.clear_buffers.layer,
.layerCount = 1,
};
@@ -230,7 +261,38 @@ void RasterizerVulkan::Clear() {
const u32 color_attachment = regs.clear_buffers.RT;
if (use_color && framebuffer->HasAspectColorBit(color_attachment)) {
VkClearValue clear_value;
- std::memcpy(clear_value.color.float32, regs.clear_color, sizeof(regs.clear_color));
+ bool is_integer = false;
+ bool is_signed = false;
+ size_t int_size = 8;
+ for (std::size_t i = 0; i < Tegra::Engines::Maxwell3D::Regs::NumRenderTargets; ++i) {
+ const auto& this_rt = regs.rt[i];
+ if (this_rt.Address() == 0) {
+ continue;
+ }
+ if (this_rt.format == Tegra::RenderTargetFormat::NONE) {
+ continue;
+ }
+ const auto format =
+ VideoCore::Surface::PixelFormatFromRenderTargetFormat(this_rt.format);
+ is_integer = IsPixelFormatInteger(format);
+ is_signed = IsPixelFormatSignedInteger(format);
+ int_size = PixelComponentSizeBitsInteger(format);
+ break;
+ }
+ if (!is_integer) {
+ std::memcpy(clear_value.color.float32, regs.clear_color, sizeof(regs.clear_color));
+ } else if (!is_signed) {
+ for (size_t i = 0; i < 4; i++) {
+ clear_value.color.uint32[i] = static_cast<u32>(
+ static_cast<f32>(static_cast<u64>(int_size) << 1U) * regs.clear_color[i]);
+ }
+ } else {
+ for (size_t i = 0; i < 4; i++) {
+ clear_value.color.int32[i] =
+ static_cast<s32>(static_cast<f32>(static_cast<s64>(int_size - 1) << 1) *
+ (regs.clear_color[i] - 0.5f));
+ }
+ }
scheduler.Record([color_attachment, clear_value, clear_rect](vk::CommandBuffer cmdbuf) {
const VkClearAttachment attachment{
@@ -595,15 +657,17 @@ void RasterizerVulkan::UpdateViewportsState(Tegra::Engines::Maxwell3D::Regs& reg
if (!state_tracker.TouchViewports()) {
return;
}
+ const bool is_rescaling{texture_cache.IsRescaling()};
+ const float scale = is_rescaling ? Settings::values.resolution_info.up_factor : 1.0f;
const std::array viewports{
- GetViewportState(device, regs, 0), GetViewportState(device, regs, 1),
- GetViewportState(device, regs, 2), GetViewportState(device, regs, 3),
- GetViewportState(device, regs, 4), GetViewportState(device, regs, 5),
- GetViewportState(device, regs, 6), GetViewportState(device, regs, 7),
- GetViewportState(device, regs, 8), GetViewportState(device, regs, 9),
- GetViewportState(device, regs, 10), GetViewportState(device, regs, 11),
- GetViewportState(device, regs, 12), GetViewportState(device, regs, 13),
- GetViewportState(device, regs, 14), GetViewportState(device, regs, 15),
+ GetViewportState(device, regs, 0, scale), GetViewportState(device, regs, 1, scale),
+ GetViewportState(device, regs, 2, scale), GetViewportState(device, regs, 3, scale),
+ GetViewportState(device, regs, 4, scale), GetViewportState(device, regs, 5, scale),
+ GetViewportState(device, regs, 6, scale), GetViewportState(device, regs, 7, scale),
+ GetViewportState(device, regs, 8, scale), GetViewportState(device, regs, 9, scale),
+ GetViewportState(device, regs, 10, scale), GetViewportState(device, regs, 11, scale),
+ GetViewportState(device, regs, 12, scale), GetViewportState(device, regs, 13, scale),
+ GetViewportState(device, regs, 14, scale), GetViewportState(device, regs, 15, scale),
};
scheduler.Record([viewports](vk::CommandBuffer cmdbuf) { cmdbuf.SetViewport(0, viewports); });
}
@@ -612,13 +676,29 @@ void RasterizerVulkan::UpdateScissorsState(Tegra::Engines::Maxwell3D::Regs& regs
if (!state_tracker.TouchScissors()) {
return;
}
+ u32 up_scale = 1;
+ u32 down_shift = 0;
+ if (texture_cache.IsRescaling()) {
+ up_scale = Settings::values.resolution_info.up_scale;
+ down_shift = Settings::values.resolution_info.down_shift;
+ }
const std::array scissors{
- GetScissorState(regs, 0), GetScissorState(regs, 1), GetScissorState(regs, 2),
- GetScissorState(regs, 3), GetScissorState(regs, 4), GetScissorState(regs, 5),
- GetScissorState(regs, 6), GetScissorState(regs, 7), GetScissorState(regs, 8),
- GetScissorState(regs, 9), GetScissorState(regs, 10), GetScissorState(regs, 11),
- GetScissorState(regs, 12), GetScissorState(regs, 13), GetScissorState(regs, 14),
- GetScissorState(regs, 15),
+ GetScissorState(regs, 0, up_scale, down_shift),
+ GetScissorState(regs, 1, up_scale, down_shift),
+ GetScissorState(regs, 2, up_scale, down_shift),
+ GetScissorState(regs, 3, up_scale, down_shift),
+ GetScissorState(regs, 4, up_scale, down_shift),
+ GetScissorState(regs, 5, up_scale, down_shift),
+ GetScissorState(regs, 6, up_scale, down_shift),
+ GetScissorState(regs, 7, up_scale, down_shift),
+ GetScissorState(regs, 8, up_scale, down_shift),
+ GetScissorState(regs, 9, up_scale, down_shift),
+ GetScissorState(regs, 10, up_scale, down_shift),
+ GetScissorState(regs, 11, up_scale, down_shift),
+ GetScissorState(regs, 12, up_scale, down_shift),
+ GetScissorState(regs, 13, up_scale, down_shift),
+ GetScissorState(regs, 14, up_scale, down_shift),
+ GetScissorState(regs, 15, up_scale, down_shift),
};
scheduler.Record([scissors](vk::CommandBuffer cmdbuf) { cmdbuf.SetScissor(0, scissors); });
}
diff --git a/src/video_core/renderer_vulkan/vk_scheduler.cpp b/src/video_core/renderer_vulkan/vk_scheduler.cpp
index 0c11c814f..3bfdf41ba 100644
--- a/src/video_core/renderer_vulkan/vk_scheduler.cpp
+++ b/src/video_core/renderer_vulkan/vk_scheduler.cpp
@@ -128,6 +128,15 @@ bool VKScheduler::UpdateGraphicsPipeline(GraphicsPipeline* pipeline) {
return true;
}
+bool VKScheduler::UpdateRescaling(bool is_rescaling) {
+ if (state.rescaling_defined && is_rescaling == state.is_rescaling) {
+ return false;
+ }
+ state.rescaling_defined = true;
+ state.is_rescaling = is_rescaling;
+ return true;
+}
+
void VKScheduler::WorkerThread(std::stop_token stop_token) {
Common::SetCurrentThreadName("yuzu:VulkanWorker");
do {
@@ -227,6 +236,7 @@ void VKScheduler::AllocateNewContext() {
void VKScheduler::InvalidateState() {
state.graphics_pipeline = nullptr;
+ state.rescaling_defined = false;
state_tracker.InvalidateCommandBufferState();
}
diff --git a/src/video_core/renderer_vulkan/vk_scheduler.h b/src/video_core/renderer_vulkan/vk_scheduler.h
index 85fc1712f..1b06c9296 100644
--- a/src/video_core/renderer_vulkan/vk_scheduler.h
+++ b/src/video_core/renderer_vulkan/vk_scheduler.h
@@ -56,6 +56,9 @@ public:
/// Update the pipeline to the current execution context.
bool UpdateGraphicsPipeline(GraphicsPipeline* pipeline);
+ /// Update the rescaling state. Returns true if the state has to be updated.
+ bool UpdateRescaling(bool is_rescaling);
+
/// Invalidates current command buffer state except for render passes
void InvalidateState();
@@ -185,6 +188,8 @@ private:
VkFramebuffer framebuffer = nullptr;
VkExtent2D render_area = {0, 0};
GraphicsPipeline* graphics_pipeline = nullptr;
+ bool is_rescaling = false;
+ bool rescaling_defined = false;
};
void WorkerThread(std::stop_token stop_token);
diff --git a/src/video_core/renderer_vulkan/vk_state_tracker.h b/src/video_core/renderer_vulkan/vk_state_tracker.h
index 2f2d6b31f..40a149832 100644
--- a/src/video_core/renderer_vulkan/vk_state_tracker.h
+++ b/src/video_core/renderer_vulkan/vk_state_tracker.h
@@ -71,11 +71,15 @@ public:
}
bool TouchViewports() {
- return Exchange(Dirty::Viewports, false);
+ const bool dirty_viewports = Exchange(Dirty::Viewports, false);
+ const bool rescale_viewports = Exchange(VideoCommon::Dirty::RescaleViewports, false);
+ return dirty_viewports || rescale_viewports;
}
bool TouchScissors() {
- return Exchange(Dirty::Scissors, false);
+ const bool dirty_scissors = Exchange(Dirty::Scissors, false);
+ const bool rescale_scissors = Exchange(VideoCommon::Dirty::RescaleScissors, false);
+ return dirty_scissors || rescale_scissors;
}
bool TouchDepthBias() {
diff --git a/src/video_core/renderer_vulkan/vk_texture_cache.cpp b/src/video_core/renderer_vulkan/vk_texture_cache.cpp
index 06c5fb867..407fd2a15 100644
--- a/src/video_core/renderer_vulkan/vk_texture_cache.cpp
+++ b/src/video_core/renderer_vulkan/vk_texture_cache.cpp
@@ -32,10 +32,12 @@ using Tegra::Engines::Fermi2D;
using Tegra::Texture::SwizzleSource;
using Tegra::Texture::TextureMipmapFilter;
using VideoCommon::BufferImageCopy;
+using VideoCommon::ImageFlagBits;
using VideoCommon::ImageInfo;
using VideoCommon::ImageType;
using VideoCommon::SubresourceRange;
using VideoCore::Surface::IsPixelFormatASTC;
+using VideoCore::Surface::IsPixelFormatInteger;
namespace {
constexpr VkBorderColor ConvertBorderColor(const std::array<float, 4>& color) {
@@ -588,8 +590,158 @@ struct RangedBarrierRange {
UNREACHABLE_MSG("Invalid image format={}", format);
return VK_FORMAT_R32_UINT;
}
+
+void BlitScale(VKScheduler& scheduler, VkImage src_image, VkImage dst_image, const ImageInfo& info,
+ VkImageAspectFlags aspect_mask, const Settings::ResolutionScalingInfo& resolution,
+ bool up_scaling = true) {
+ const bool is_2d = info.type == ImageType::e2D;
+ const auto resources = info.resources;
+ const VkExtent2D extent{
+ .width = info.size.width,
+ .height = info.size.height,
+ };
+ // Depth and integer formats must use NEAREST filter for blits.
+ const bool is_color{aspect_mask == VK_IMAGE_ASPECT_COLOR_BIT};
+ const bool is_bilinear{is_color && !IsPixelFormatInteger(info.format)};
+ const VkFilter vk_filter = is_bilinear ? VK_FILTER_LINEAR : VK_FILTER_NEAREST;
+
+ scheduler.RequestOutsideRenderPassOperationContext();
+ scheduler.Record([dst_image, src_image, extent, resources, aspect_mask, resolution, is_2d,
+ vk_filter, up_scaling](vk::CommandBuffer cmdbuf) {
+ const VkOffset2D src_size{
+ .x = static_cast<s32>(up_scaling ? extent.width : resolution.ScaleUp(extent.width)),
+ .y = static_cast<s32>(is_2d && up_scaling ? extent.height
+ : resolution.ScaleUp(extent.height)),
+ };
+ const VkOffset2D dst_size{
+ .x = static_cast<s32>(up_scaling ? resolution.ScaleUp(extent.width) : extent.width),
+ .y = static_cast<s32>(is_2d && up_scaling ? resolution.ScaleUp(extent.height)
+ : extent.height),
+ };
+ boost::container::small_vector<VkImageBlit, 4> regions;
+ regions.reserve(resources.levels);
+ for (s32 level = 0; level < resources.levels; level++) {
+ regions.push_back({
+ .srcSubresource{
+ .aspectMask = aspect_mask,
+ .mipLevel = static_cast<u32>(level),
+ .baseArrayLayer = 0,
+ .layerCount = static_cast<u32>(resources.layers),
+ },
+ .srcOffsets{
+ {
+ .x = 0,
+ .y = 0,
+ .z = 0,
+ },
+ {
+ .x = std::max(1, src_size.x >> level),
+ .y = std::max(1, src_size.y >> level),
+ .z = 1,
+ },
+ },
+ .dstSubresource{
+ .aspectMask = aspect_mask,
+ .mipLevel = static_cast<u32>(level),
+ .baseArrayLayer = 0,
+ .layerCount = static_cast<u32>(resources.layers),
+ },
+ .dstOffsets{
+ {
+ .x = 0,
+ .y = 0,
+ .z = 0,
+ },
+ {
+ .x = std::max(1, dst_size.x >> level),
+ .y = std::max(1, dst_size.y >> level),
+ .z = 1,
+ },
+ },
+ });
+ }
+ const VkImageSubresourceRange subresource_range{
+ .aspectMask = aspect_mask,
+ .baseMipLevel = 0,
+ .levelCount = VK_REMAINING_MIP_LEVELS,
+ .baseArrayLayer = 0,
+ .layerCount = VK_REMAINING_ARRAY_LAYERS,
+ };
+ const std::array read_barriers{
+ VkImageMemoryBarrier{
+ .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER,
+ .pNext = nullptr,
+ .srcAccessMask = VK_ACCESS_MEMORY_WRITE_BIT,
+ .dstAccessMask = VK_ACCESS_TRANSFER_READ_BIT,
+ .oldLayout = VK_IMAGE_LAYOUT_GENERAL,
+ .newLayout = VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL,
+ .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
+ .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
+ .image = src_image,
+ .subresourceRange = subresource_range,
+ },
+ VkImageMemoryBarrier{
+ .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER,
+ .pNext = nullptr,
+ .srcAccessMask = VK_ACCESS_SHADER_WRITE_BIT |
+ VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT |
+ VK_ACCESS_TRANSFER_WRITE_BIT,
+ .dstAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT,
+ .oldLayout = VK_IMAGE_LAYOUT_UNDEFINED, // Discard contents
+ .newLayout = VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL,
+ .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
+ .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
+ .image = dst_image,
+ .subresourceRange = subresource_range,
+ },
+ };
+ const std::array write_barriers{
+ VkImageMemoryBarrier{
+ .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER,
+ .pNext = nullptr,
+ .srcAccessMask = 0,
+ .dstAccessMask = VK_ACCESS_MEMORY_WRITE_BIT | VK_ACCESS_MEMORY_READ_BIT,
+ .oldLayout = VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL,
+ .newLayout = VK_IMAGE_LAYOUT_GENERAL,
+ .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
+ .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
+ .image = src_image,
+ .subresourceRange = subresource_range,
+ },
+ VkImageMemoryBarrier{
+ .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER,
+ .pNext = nullptr,
+ .srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT,
+ .dstAccessMask = VK_ACCESS_MEMORY_WRITE_BIT | VK_ACCESS_MEMORY_READ_BIT,
+ .oldLayout = VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL,
+ .newLayout = VK_IMAGE_LAYOUT_GENERAL,
+ .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
+ .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
+ .image = dst_image,
+ .subresourceRange = subresource_range,
+ },
+ };
+ cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT,
+ 0, nullptr, nullptr, read_barriers);
+ cmdbuf.BlitImage(src_image, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, dst_image,
+ VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, regions, vk_filter);
+ cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_ALL_COMMANDS_BIT,
+ 0, nullptr, nullptr, write_barriers);
+ });
+}
} // Anonymous namespace
+TextureCacheRuntime::TextureCacheRuntime(const Device& device_, VKScheduler& scheduler_,
+ MemoryAllocator& memory_allocator_,
+ StagingBufferPool& staging_buffer_pool_,
+ BlitImageHelper& blit_image_helper_,
+ ASTCDecoderPass& astc_decoder_pass_,
+ RenderPassCache& render_pass_cache_)
+ : device{device_}, scheduler{scheduler_}, memory_allocator{memory_allocator_},
+ staging_buffer_pool{staging_buffer_pool_}, blit_image_helper{blit_image_helper_},
+ astc_decoder_pass{astc_decoder_pass_}, render_pass_cache{render_pass_cache_},
+ resolution{Settings::values.resolution_info} {}
+
void TextureCacheRuntime::Finish() {
scheduler.Finish();
}
@@ -614,8 +766,8 @@ void TextureCacheRuntime::BlitImage(Framebuffer* dst_framebuffer, ImageView& dst
return;
}
if (aspect_mask == VK_IMAGE_ASPECT_COLOR_BIT && !is_src_msaa && !is_dst_msaa) {
- blit_image_helper.BlitColor(dst_framebuffer, src, dst_region, src_region, filter,
- operation);
+ blit_image_helper.BlitColor(dst_framebuffer, src.Handle(Shader::TextureType::Color2D),
+ dst_region, src_region, filter, operation);
return;
}
if (aspect_mask == (VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT)) {
@@ -719,26 +871,29 @@ void TextureCacheRuntime::BlitImage(Framebuffer* dst_framebuffer, ImageView& dst
});
}
-void TextureCacheRuntime::ConvertImage(Framebuffer* dst, ImageView& dst_view, ImageView& src_view) {
+void TextureCacheRuntime::ConvertImage(Framebuffer* dst, ImageView& dst_view, ImageView& src_view,
+ bool rescaled) {
+ const u32 up_scale = rescaled ? resolution.up_scale : 1;
+ const u32 down_shift = rescaled ? resolution.down_shift : 0;
switch (dst_view.format) {
case PixelFormat::R16_UNORM:
if (src_view.format == PixelFormat::D16_UNORM) {
- return blit_image_helper.ConvertD16ToR16(dst, src_view);
+ return blit_image_helper.ConvertD16ToR16(dst, src_view, up_scale, down_shift);
}
break;
case PixelFormat::R32_FLOAT:
if (src_view.format == PixelFormat::D32_FLOAT) {
- return blit_image_helper.ConvertD32ToR32(dst, src_view);
+ return blit_image_helper.ConvertD32ToR32(dst, src_view, up_scale, down_shift);
}
break;
case PixelFormat::D16_UNORM:
if (src_view.format == PixelFormat::R16_UNORM) {
- return blit_image_helper.ConvertR16ToD16(dst, src_view);
+ return blit_image_helper.ConvertR16ToD16(dst, src_view, up_scale, down_shift);
}
break;
case PixelFormat::D32_FLOAT:
if (src_view.format == PixelFormat::R32_FLOAT) {
- return blit_image_helper.ConvertR32ToD32(dst, src_view);
+ return blit_image_helper.ConvertR32ToD32(dst, src_view, up_scale, down_shift);
}
break;
default:
@@ -840,36 +995,39 @@ u64 TextureCacheRuntime::GetDeviceLocalMemory() const {
return device.GetDeviceLocalMemory();
}
-Image::Image(TextureCacheRuntime& runtime, const ImageInfo& info_, GPUVAddr gpu_addr_,
+void TextureCacheRuntime::TickFrame() {}
+
+Image::Image(TextureCacheRuntime& runtime_, const ImageInfo& info_, GPUVAddr gpu_addr_,
VAddr cpu_addr_)
- : VideoCommon::ImageBase(info_, gpu_addr_, cpu_addr_), scheduler{&runtime.scheduler},
- image(MakeImage(runtime.device, info)),
- commit(runtime.memory_allocator.Commit(image, MemoryUsage::DeviceLocal)),
+ : VideoCommon::ImageBase(info_, gpu_addr_, cpu_addr_), scheduler{&runtime_.scheduler},
+ runtime{&runtime_}, original_image(MakeImage(runtime_.device, info)),
+ commit(runtime_.memory_allocator.Commit(original_image, MemoryUsage::DeviceLocal)),
aspect_mask(ImageAspectMask(info.format)) {
- if (IsPixelFormatASTC(info.format) && !runtime.device.IsOptimalAstcSupported()) {
+ if (IsPixelFormatASTC(info.format) && !runtime->device.IsOptimalAstcSupported()) {
if (Settings::values.accelerate_astc.GetValue()) {
flags |= VideoCommon::ImageFlagBits::AcceleratedUpload;
} else {
flags |= VideoCommon::ImageFlagBits::Converted;
}
}
- if (runtime.device.HasDebuggingToolAttached()) {
- image.SetObjectNameEXT(VideoCommon::Name(*this).c_str());
+ if (runtime->device.HasDebuggingToolAttached()) {
+ original_image.SetObjectNameEXT(VideoCommon::Name(*this).c_str());
}
static constexpr VkImageViewUsageCreateInfo storage_image_view_usage_create_info{
.sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_USAGE_CREATE_INFO,
.pNext = nullptr,
.usage = VK_IMAGE_USAGE_STORAGE_BIT,
};
- if (IsPixelFormatASTC(info.format) && !runtime.device.IsOptimalAstcSupported()) {
- const auto& device = runtime.device.GetLogical();
+ current_image = *original_image;
+ if (IsPixelFormatASTC(info.format) && !runtime->device.IsOptimalAstcSupported()) {
+ const auto& device = runtime->device.GetLogical();
storage_image_views.reserve(info.resources.levels);
for (s32 level = 0; level < info.resources.levels; ++level) {
storage_image_views.push_back(device.CreateImageView(VkImageViewCreateInfo{
.sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,
.pNext = &storage_image_view_usage_create_info,
.flags = 0,
- .image = *image,
+ .image = *original_image,
.viewType = VK_IMAGE_VIEW_TYPE_2D_ARRAY,
.format = VK_FORMAT_A8B8G8R8_UNORM_PACK32,
.components{
@@ -890,26 +1048,39 @@ Image::Image(TextureCacheRuntime& runtime, const ImageInfo& info_, GPUVAddr gpu_
}
}
+Image::Image(const VideoCommon::NullImageParams& params) : VideoCommon::ImageBase{params} {}
+
Image::~Image() = default;
void Image::UploadMemory(const StagingBufferRef& map, std::span<const BufferImageCopy> copies) {
// TODO: Move this to another API
+ const bool is_rescaled = True(flags & ImageFlagBits::Rescaled);
+ if (is_rescaled) {
+ ScaleDown(true);
+ }
scheduler->RequestOutsideRenderPassOperationContext();
std::vector vk_copies = TransformBufferImageCopies(copies, map.offset, aspect_mask);
const VkBuffer src_buffer = map.buffer;
- const VkImage vk_image = *image;
+ const VkImage vk_image = *original_image;
const VkImageAspectFlags vk_aspect_mask = aspect_mask;
const bool is_initialized = std::exchange(initialized, true);
scheduler->Record([src_buffer, vk_image, vk_aspect_mask, is_initialized,
vk_copies](vk::CommandBuffer cmdbuf) {
CopyBufferToImage(cmdbuf, src_buffer, vk_image, vk_aspect_mask, is_initialized, vk_copies);
});
+ if (is_rescaled) {
+ ScaleUp();
+ }
}
void Image::DownloadMemory(const StagingBufferRef& map, std::span<const BufferImageCopy> copies) {
+ const bool is_rescaled = True(flags & ImageFlagBits::Rescaled);
+ if (is_rescaled) {
+ ScaleDown();
+ }
std::vector vk_copies = TransformBufferImageCopies(copies, map.offset, aspect_mask);
scheduler->RequestOutsideRenderPassOperationContext();
- scheduler->Record([buffer = map.buffer, image = *image, aspect_mask = aspect_mask,
+ scheduler->Record([buffer = map.buffer, image = *original_image, aspect_mask = aspect_mask,
vk_copies](vk::CommandBuffer cmdbuf) {
const VkImageMemoryBarrier read_barrier{
.sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER,
@@ -959,6 +1130,146 @@ void Image::DownloadMemory(const StagingBufferRef& map, std::span<const BufferIm
cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_ALL_COMMANDS_BIT,
0, memory_write_barrier, nullptr, image_write_barrier);
});
+ if (is_rescaled) {
+ ScaleUp(true);
+ }
+}
+
+bool Image::ScaleUp(bool ignore) {
+ if (True(flags & ImageFlagBits::Rescaled)) {
+ return false;
+ }
+ ASSERT(info.type != ImageType::Linear);
+ flags |= ImageFlagBits::Rescaled;
+ const auto& resolution = runtime->resolution;
+ if (!resolution.active) {
+ return false;
+ }
+ has_scaled = true;
+ const auto& device = runtime->device;
+ if (!scaled_image) {
+ const bool is_2d = info.type == ImageType::e2D;
+ const u32 scaled_width = resolution.ScaleUp(info.size.width);
+ const u32 scaled_height = is_2d ? resolution.ScaleUp(info.size.height) : info.size.height;
+ auto scaled_info = info;
+ scaled_info.size.width = scaled_width;
+ scaled_info.size.height = scaled_height;
+ scaled_image = MakeImage(device, scaled_info);
+ auto& allocator = runtime->memory_allocator;
+ scaled_commit = MemoryCommit(allocator.Commit(scaled_image, MemoryUsage::DeviceLocal));
+ ignore = false;
+ }
+ current_image = *scaled_image;
+ if (ignore) {
+ return true;
+ }
+
+ if (aspect_mask == 0) {
+ aspect_mask = ImageAspectMask(info.format);
+ }
+ static constexpr auto OPTIMAL_FORMAT = FormatType::Optimal;
+ const PixelFormat format = StorageFormat(info.format);
+ const auto vk_format = MaxwellToVK::SurfaceFormat(device, OPTIMAL_FORMAT, false, format).format;
+ const auto blit_usage = VK_FORMAT_FEATURE_BLIT_SRC_BIT | VK_FORMAT_FEATURE_BLIT_DST_BIT;
+ if (device.IsFormatSupported(vk_format, blit_usage, OPTIMAL_FORMAT)) {
+ BlitScale(*scheduler, *original_image, *scaled_image, info, aspect_mask, resolution);
+ } else {
+ return BlitScaleHelper(true);
+ }
+ return true;
+}
+
+bool Image::ScaleDown(bool ignore) {
+ if (False(flags & ImageFlagBits::Rescaled)) {
+ return false;
+ }
+ ASSERT(info.type != ImageType::Linear);
+ flags &= ~ImageFlagBits::Rescaled;
+ const auto& resolution = runtime->resolution;
+ if (!resolution.active) {
+ return false;
+ }
+ current_image = *original_image;
+ if (ignore) {
+ return true;
+ }
+ if (aspect_mask == 0) {
+ aspect_mask = ImageAspectMask(info.format);
+ }
+ static constexpr auto OPTIMAL_FORMAT = FormatType::Optimal;
+ const PixelFormat format = StorageFormat(info.format);
+ const auto& device = runtime->device;
+ const auto vk_format = MaxwellToVK::SurfaceFormat(device, OPTIMAL_FORMAT, false, format).format;
+ const auto blit_usage = VK_FORMAT_FEATURE_BLIT_SRC_BIT | VK_FORMAT_FEATURE_BLIT_DST_BIT;
+ if (device.IsFormatSupported(vk_format, blit_usage, OPTIMAL_FORMAT)) {
+ BlitScale(*scheduler, *scaled_image, *original_image, info, aspect_mask, resolution, false);
+ } else {
+ return BlitScaleHelper(false);
+ }
+ return true;
+}
+
+bool Image::BlitScaleHelper(bool scale_up) {
+ using namespace VideoCommon;
+ static constexpr auto BLIT_OPERATION = Tegra::Engines::Fermi2D::Operation::SrcCopy;
+ const bool is_color{aspect_mask == VK_IMAGE_ASPECT_COLOR_BIT};
+ const bool is_bilinear{is_color && !IsPixelFormatInteger(info.format)};
+ const auto operation = is_bilinear ? Tegra::Engines::Fermi2D::Filter::Bilinear
+ : Tegra::Engines::Fermi2D::Filter::Point;
+
+ const bool is_2d = info.type == ImageType::e2D;
+ const auto& resolution = runtime->resolution;
+ const u32 scaled_width = resolution.ScaleUp(info.size.width);
+ const u32 scaled_height = is_2d ? resolution.ScaleUp(info.size.height) : info.size.height;
+ std::unique_ptr<ImageView>& blit_view = scale_up ? scale_view : normal_view;
+ std::unique_ptr<Framebuffer>& blit_framebuffer =
+ scale_up ? scale_framebuffer : normal_framebuffer;
+ if (!blit_view) {
+ const auto view_info = ImageViewInfo(ImageViewType::e2D, info.format);
+ blit_view = std::make_unique<ImageView>(*runtime, view_info, NULL_IMAGE_ID, *this);
+ }
+
+ const u32 src_width = scale_up ? info.size.width : scaled_width;
+ const u32 src_height = scale_up ? info.size.height : scaled_height;
+ const u32 dst_width = scale_up ? scaled_width : info.size.width;
+ const u32 dst_height = scale_up ? scaled_height : info.size.height;
+ const Region2D src_region{
+ .start = {0, 0},
+ .end = {static_cast<s32>(src_width), static_cast<s32>(src_height)},
+ };
+ const Region2D dst_region{
+ .start = {0, 0},
+ .end = {static_cast<s32>(dst_width), static_cast<s32>(dst_height)},
+ };
+ const VkExtent2D extent{
+ .width = std::max(scaled_width, info.size.width),
+ .height = std::max(scaled_height, info.size.width),
+ };
+
+ auto* view_ptr = blit_view.get();
+ if (aspect_mask == VK_IMAGE_ASPECT_COLOR_BIT) {
+ if (!blit_framebuffer) {
+ blit_framebuffer = std::make_unique<Framebuffer>(*runtime, view_ptr, nullptr, extent);
+ }
+ const auto color_view = blit_view->Handle(Shader::TextureType::Color2D);
+
+ runtime->blit_image_helper.BlitColor(blit_framebuffer.get(), color_view, dst_region,
+ src_region, operation, BLIT_OPERATION);
+ } else if (!runtime->device.IsBlitDepthStencilSupported() &&
+ aspect_mask == (VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT)) {
+ if (!blit_framebuffer) {
+ blit_framebuffer = std::make_unique<Framebuffer>(*runtime, nullptr, view_ptr, extent);
+ }
+ runtime->blit_image_helper.BlitDepthStencil(blit_framebuffer.get(), blit_view->DepthView(),
+ blit_view->StencilView(), dst_region,
+ src_region, operation, BLIT_OPERATION);
+ } else {
+ // TODO: Use helper blits where applicable
+ flags &= ~ImageFlagBits::Rescaled;
+ LOG_ERROR(Render_Vulkan, "Device does not support scaling format {}", info.format);
+ return false;
+ }
+ return true;
}
ImageView::ImageView(TextureCacheRuntime& runtime, const VideoCommon::ImageViewInfo& info,
@@ -1052,9 +1363,11 @@ ImageView::ImageView(TextureCacheRuntime&, const VideoCommon::ImageInfo& info,
: VideoCommon::ImageViewBase{info, view_info}, gpu_addr{gpu_addr_},
buffer_size{VideoCommon::CalculateGuestSizeInBytes(info)} {}
-ImageView::ImageView(TextureCacheRuntime&, const VideoCommon::NullImageParams& params)
+ImageView::ImageView(TextureCacheRuntime&, const VideoCommon::NullImageViewParams& params)
: VideoCommon::ImageViewBase{params} {}
+ImageView::~ImageView() = default;
+
VkImageView ImageView::DepthView() {
if (depth_view) {
return *depth_view;
@@ -1137,7 +1450,8 @@ Sampler::Sampler(TextureCacheRuntime& runtime, const Tegra::Texture::TSCEntry& t
LOG_WARNING(Render_Vulkan, "VK_EXT_sampler_filter_minmax is required");
}
// Some games have samplers with garbage. Sanitize them here.
- const float max_anisotropy = std::clamp(tsc.MaxAnisotropy(), 1.0f, 16.0f);
+ const f32 max_anisotropy = std::clamp(tsc.MaxAnisotropy(), 1.0f, 16.0f);
+
sampler = device.GetLogical().CreateSampler(VkSamplerCreateInfo{
.sType = VK_STRUCTURE_TYPE_SAMPLER_CREATE_INFO,
.pNext = pnext,
@@ -1162,7 +1476,29 @@ Sampler::Sampler(TextureCacheRuntime& runtime, const Tegra::Texture::TSCEntry& t
}
Framebuffer::Framebuffer(TextureCacheRuntime& runtime, std::span<ImageView*, NUM_RT> color_buffers,
- ImageView* depth_buffer, const VideoCommon::RenderTargets& key) {
+ ImageView* depth_buffer, const VideoCommon::RenderTargets& key)
+ : render_area{VkExtent2D{
+ .width = key.size.width,
+ .height = key.size.height,
+ }} {
+ CreateFramebuffer(runtime, color_buffers, depth_buffer);
+ if (runtime.device.HasDebuggingToolAttached()) {
+ framebuffer.SetObjectNameEXT(VideoCommon::Name(key).c_str());
+ }
+}
+
+Framebuffer::Framebuffer(TextureCacheRuntime& runtime, ImageView* color_buffer,
+ ImageView* depth_buffer, VkExtent2D extent)
+ : render_area{extent} {
+ std::array<ImageView*, NUM_RT> color_buffers{color_buffer};
+ CreateFramebuffer(runtime, color_buffers, depth_buffer);
+}
+
+Framebuffer::~Framebuffer() = default;
+
+void Framebuffer::CreateFramebuffer(TextureCacheRuntime& runtime,
+ std::span<ImageView*, NUM_RT> color_buffers,
+ ImageView* depth_buffer) {
std::vector<VkImageView> attachments;
RenderPassKey renderpass_key{};
s32 num_layers = 1;
@@ -1200,10 +1536,6 @@ Framebuffer::Framebuffer(TextureCacheRuntime& runtime, std::span<ImageView*, NUM
renderpass = runtime.render_pass_cache.Get(renderpass_key);
- render_area = VkExtent2D{
- .width = key.size.width,
- .height = key.size.height,
- };
num_color_buffers = static_cast<u32>(num_colors);
framebuffer = runtime.device.GetLogical().CreateFramebuffer({
.sType = VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO,
@@ -1212,13 +1544,10 @@ Framebuffer::Framebuffer(TextureCacheRuntime& runtime, std::span<ImageView*, NUM
.renderPass = renderpass,
.attachmentCount = static_cast<u32>(attachments.size()),
.pAttachments = attachments.data(),
- .width = key.size.width,
- .height = key.size.height,
+ .width = render_area.width,
+ .height = render_area.height,
.layers = static_cast<u32>(std::max(num_layers, 1)),
});
- if (runtime.device.HasDebuggingToolAttached()) {
- framebuffer.SetObjectNameEXT(VideoCommon::Name(key).c_str());
- }
}
void TextureCacheRuntime::AccelerateImageUpload(
diff --git a/src/video_core/renderer_vulkan/vk_texture_cache.h b/src/video_core/renderer_vulkan/vk_texture_cache.h
index b09c468e4..ff28b4e96 100644
--- a/src/video_core/renderer_vulkan/vk_texture_cache.h
+++ b/src/video_core/renderer_vulkan/vk_texture_cache.h
@@ -13,6 +13,10 @@
#include "video_core/vulkan_common/vulkan_memory_allocator.h"
#include "video_core/vulkan_common/vulkan_wrapper.h"
+namespace Settings {
+struct ResolutionScalingInfo;
+}
+
namespace Vulkan {
using VideoCommon::ImageId;
@@ -31,14 +35,14 @@ class RenderPassCache;
class StagingBufferPool;
class VKScheduler;
-struct TextureCacheRuntime {
- const Device& device;
- VKScheduler& scheduler;
- MemoryAllocator& memory_allocator;
- StagingBufferPool& staging_buffer_pool;
- BlitImageHelper& blit_image_helper;
- ASTCDecoderPass& astc_decoder_pass;
- RenderPassCache& render_pass_cache;
+class TextureCacheRuntime {
+public:
+ explicit TextureCacheRuntime(const Device& device_, VKScheduler& scheduler_,
+ MemoryAllocator& memory_allocator_,
+ StagingBufferPool& staging_buffer_pool_,
+ BlitImageHelper& blit_image_helper_,
+ ASTCDecoderPass& astc_decoder_pass_,
+ RenderPassCache& render_pass_cache_);
void Finish();
@@ -46,6 +50,10 @@ struct TextureCacheRuntime {
StagingBufferRef DownloadStagingBuffer(size_t size);
+ void TickFrame();
+
+ u64 GetDeviceLocalMemory() const;
+
void BlitImage(Framebuffer* dst_framebuffer, ImageView& dst, ImageView& src,
const Region2D& dst_region, const Region2D& src_region,
Tegra::Engines::Fermi2D::Filter filter,
@@ -53,7 +61,7 @@ struct TextureCacheRuntime {
void CopyImage(Image& dst, Image& src, std::span<const VideoCommon::ImageCopy> copies);
- void ConvertImage(Framebuffer* dst, ImageView& dst_view, ImageView& src_view);
+ void ConvertImage(Framebuffer* dst, ImageView& dst_view, ImageView& src_view, bool rescaled);
bool CanAccelerateImageUpload(Image&) const noexcept {
return false;
@@ -74,13 +82,21 @@ struct TextureCacheRuntime {
return true;
}
- u64 GetDeviceLocalMemory() const;
+ const Device& device;
+ VKScheduler& scheduler;
+ MemoryAllocator& memory_allocator;
+ StagingBufferPool& staging_buffer_pool;
+ BlitImageHelper& blit_image_helper;
+ ASTCDecoderPass& astc_decoder_pass;
+ RenderPassCache& render_pass_cache;
+ const Settings::ResolutionScalingInfo& resolution;
};
class Image : public VideoCommon::ImageBase {
public:
explicit Image(TextureCacheRuntime&, const VideoCommon::ImageInfo& info, GPUVAddr gpu_addr,
VAddr cpu_addr);
+ explicit Image(const VideoCommon::NullImageParams&);
~Image();
@@ -97,7 +113,7 @@ public:
std::span<const VideoCommon::BufferImageCopy> copies);
[[nodiscard]] VkImage Handle() const noexcept {
- return *image;
+ return current_image;
}
[[nodiscard]] VkImageAspectFlags AspectMask() const noexcept {
@@ -113,14 +129,30 @@ public:
return std::exchange(initialized, true);
}
+ bool ScaleUp(bool ignore = false);
+
+ bool ScaleDown(bool ignore = false);
+
private:
- VKScheduler* scheduler;
- vk::Image image;
+ bool BlitScaleHelper(bool scale_up);
+
+ VKScheduler* scheduler{};
+ TextureCacheRuntime* runtime{};
+
+ vk::Image original_image;
MemoryCommit commit;
- vk::ImageView image_view;
std::vector<vk::ImageView> storage_image_views;
VkImageAspectFlags aspect_mask = 0;
bool initialized = false;
+ vk::Image scaled_image{};
+ MemoryCommit scaled_commit{};
+ VkImage current_image{};
+
+ std::unique_ptr<Framebuffer> scale_framebuffer;
+ std::unique_ptr<ImageView> scale_view;
+
+ std::unique_ptr<Framebuffer> normal_framebuffer;
+ std::unique_ptr<ImageView> normal_view;
};
class ImageView : public VideoCommon::ImageViewBase {
@@ -128,7 +160,15 @@ public:
explicit ImageView(TextureCacheRuntime&, const VideoCommon::ImageViewInfo&, ImageId, Image&);
explicit ImageView(TextureCacheRuntime&, const VideoCommon::ImageInfo&,
const VideoCommon::ImageViewInfo&, GPUVAddr);
- explicit ImageView(TextureCacheRuntime&, const VideoCommon::NullImageParams&);
+ explicit ImageView(TextureCacheRuntime&, const VideoCommon::NullImageViewParams&);
+
+ ~ImageView();
+
+ ImageView(const ImageView&) = delete;
+ ImageView& operator=(const ImageView&) = delete;
+
+ ImageView(ImageView&&) = default;
+ ImageView& operator=(ImageView&&) = default;
[[nodiscard]] VkImageView DepthView();
@@ -197,9 +237,23 @@ private:
class Framebuffer {
public:
- explicit Framebuffer(TextureCacheRuntime&, std::span<ImageView*, NUM_RT> color_buffers,
+ explicit Framebuffer(TextureCacheRuntime& runtime, std::span<ImageView*, NUM_RT> color_buffers,
ImageView* depth_buffer, const VideoCommon::RenderTargets& key);
+ explicit Framebuffer(TextureCacheRuntime& runtime, ImageView* color_buffer,
+ ImageView* depth_buffer, VkExtent2D extent);
+
+ ~Framebuffer();
+
+ Framebuffer(const Framebuffer&) = delete;
+ Framebuffer& operator=(const Framebuffer&) = delete;
+
+ Framebuffer(Framebuffer&&) = default;
+ Framebuffer& operator=(Framebuffer&&) = default;
+
+ void CreateFramebuffer(TextureCacheRuntime& runtime,
+ std::span<ImageView*, NUM_RT> color_buffers, ImageView* depth_buffer);
+
[[nodiscard]] VkFramebuffer Handle() const noexcept {
return *framebuffer;
}
diff --git a/src/video_core/surface.cpp b/src/video_core/surface.cpp
index eb1746265..58d262446 100644
--- a/src/video_core/surface.cpp
+++ b/src/video_core/surface.cpp
@@ -279,6 +279,80 @@ bool IsPixelFormatSRGB(PixelFormat format) {
}
}
+bool IsPixelFormatInteger(PixelFormat format) {
+ switch (format) {
+ case PixelFormat::A8B8G8R8_SINT:
+ case PixelFormat::A8B8G8R8_UINT:
+ case PixelFormat::A2B10G10R10_UINT:
+ case PixelFormat::R8_SINT:
+ case PixelFormat::R8_UINT:
+ case PixelFormat::R16G16B16A16_SINT:
+ case PixelFormat::R16G16B16A16_UINT:
+ case PixelFormat::R32G32B32A32_UINT:
+ case PixelFormat::R32G32B32A32_SINT:
+ case PixelFormat::R32G32_SINT:
+ case PixelFormat::R16_UINT:
+ case PixelFormat::R16_SINT:
+ case PixelFormat::R16G16_UINT:
+ case PixelFormat::R16G16_SINT:
+ case PixelFormat::R8G8_SINT:
+ case PixelFormat::R8G8_UINT:
+ case PixelFormat::R32G32_UINT:
+ case PixelFormat::R32_UINT:
+ case PixelFormat::R32_SINT:
+ return true;
+ default:
+ return false;
+ }
+}
+
+bool IsPixelFormatSignedInteger(PixelFormat format) {
+ switch (format) {
+ case PixelFormat::A8B8G8R8_SINT:
+ case PixelFormat::R8_SINT:
+ case PixelFormat::R16G16B16A16_SINT:
+ case PixelFormat::R32G32B32A32_SINT:
+ case PixelFormat::R32G32_SINT:
+ case PixelFormat::R16_SINT:
+ case PixelFormat::R16G16_SINT:
+ case PixelFormat::R8G8_SINT:
+ case PixelFormat::R32_SINT:
+ return true;
+ default:
+ return false;
+ }
+}
+
+size_t PixelComponentSizeBitsInteger(PixelFormat format) {
+ switch (format) {
+ case PixelFormat::A8B8G8R8_SINT:
+ case PixelFormat::A8B8G8R8_UINT:
+ case PixelFormat::R8_SINT:
+ case PixelFormat::R8_UINT:
+ case PixelFormat::R8G8_SINT:
+ case PixelFormat::R8G8_UINT:
+ return 8;
+ case PixelFormat::A2B10G10R10_UINT:
+ return 10;
+ case PixelFormat::R16G16B16A16_SINT:
+ case PixelFormat::R16G16B16A16_UINT:
+ case PixelFormat::R16_UINT:
+ case PixelFormat::R16_SINT:
+ case PixelFormat::R16G16_UINT:
+ case PixelFormat::R16G16_SINT:
+ return 16;
+ case PixelFormat::R32G32B32A32_UINT:
+ case PixelFormat::R32G32B32A32_SINT:
+ case PixelFormat::R32G32_SINT:
+ case PixelFormat::R32G32_UINT:
+ case PixelFormat::R32_UINT:
+ case PixelFormat::R32_SINT:
+ return 32;
+ default:
+ return 0;
+ }
+}
+
std::pair<u32, u32> GetASTCBlockSize(PixelFormat format) {
return {DefaultBlockWidth(format), DefaultBlockHeight(format)};
}
diff --git a/src/video_core/surface.h b/src/video_core/surface.h
index 1503db81f..2ce7c7d33 100644
--- a/src/video_core/surface.h
+++ b/src/video_core/surface.h
@@ -460,6 +460,12 @@ bool IsPixelFormatASTC(PixelFormat format);
bool IsPixelFormatSRGB(PixelFormat format);
+bool IsPixelFormatInteger(PixelFormat format);
+
+bool IsPixelFormatSignedInteger(PixelFormat format);
+
+size_t PixelComponentSizeBitsInteger(PixelFormat format);
+
std::pair<u32, u32> GetASTCBlockSize(PixelFormat format);
u64 EstimatedDecompressedSize(u64 base_size, PixelFormat format);
diff --git a/src/video_core/texture_cache/image_base.cpp b/src/video_core/texture_cache/image_base.cpp
index 6052d148a..3db2fdf34 100644
--- a/src/video_core/texture_cache/image_base.cpp
+++ b/src/video_core/texture_cache/image_base.cpp
@@ -60,15 +60,17 @@ namespace {
ImageBase::ImageBase(const ImageInfo& info_, GPUVAddr gpu_addr_, VAddr cpu_addr_)
: info{info_}, guest_size_bytes{CalculateGuestSizeInBytes(info)},
unswizzled_size_bytes{CalculateUnswizzledSizeBytes(info)},
- converted_size_bytes{CalculateConvertedSizeBytes(info)}, gpu_addr{gpu_addr_},
- cpu_addr{cpu_addr_}, cpu_addr_end{cpu_addr + guest_size_bytes},
- mip_level_offsets{CalculateMipLevelOffsets(info)} {
+ converted_size_bytes{CalculateConvertedSizeBytes(info)}, scale_rating{}, scale_tick{},
+ has_scaled{}, gpu_addr{gpu_addr_}, cpu_addr{cpu_addr_},
+ cpu_addr_end{cpu_addr + guest_size_bytes}, mip_level_offsets{CalculateMipLevelOffsets(info)} {
if (info.type == ImageType::e3D) {
slice_offsets = CalculateSliceOffsets(info);
slice_subresources = CalculateSliceSubresources(info);
}
}
+ImageBase::ImageBase(const NullImageParams&) {}
+
ImageMapView::ImageMapView(GPUVAddr gpu_addr_, VAddr cpu_addr_, size_t size_, ImageId image_id_)
: gpu_addr{gpu_addr_}, cpu_addr{cpu_addr_}, size{size_}, image_id{image_id_} {}
@@ -254,6 +256,8 @@ void AddImageAlias(ImageBase& lhs, ImageBase& rhs, ImageId lhs_id, ImageId rhs_i
}
lhs.aliased_images.push_back(std::move(lhs_alias));
rhs.aliased_images.push_back(std::move(rhs_alias));
+ lhs.flags &= ~ImageFlagBits::IsRescalable;
+ rhs.flags &= ~ImageFlagBits::IsRescalable;
}
} // namespace VideoCommon
diff --git a/src/video_core/texture_cache/image_base.h b/src/video_core/texture_cache/image_base.h
index 0c17a791b..89c111c00 100644
--- a/src/video_core/texture_cache/image_base.h
+++ b/src/video_core/texture_cache/image_base.h
@@ -33,6 +33,11 @@ enum class ImageFlagBits : u32 {
///< garbage collection priority
Alias = 1 << 11, ///< This image has aliases and has priority on garbage
///< collection
+
+ // Rescaler
+ Rescaled = 1 << 12,
+ CheckingRescalable = 1 << 13,
+ IsRescalable = 1 << 14,
};
DECLARE_ENUM_FLAG_OPERATORS(ImageFlagBits)
@@ -43,8 +48,11 @@ struct AliasedImage {
ImageId id;
};
+struct NullImageParams {};
+
struct ImageBase {
explicit ImageBase(const ImageInfo& info, GPUVAddr gpu_addr, VAddr cpu_addr);
+ explicit ImageBase(const NullImageParams&);
[[nodiscard]] std::optional<SubresourceBase> TryFindBase(GPUVAddr other_addr) const noexcept;
@@ -68,11 +76,18 @@ struct ImageBase {
void CheckBadOverlapState();
void CheckAliasState();
+ bool HasScaled() const {
+ return has_scaled;
+ }
+
ImageInfo info;
u32 guest_size_bytes = 0;
u32 unswizzled_size_bytes = 0;
u32 converted_size_bytes = 0;
+ u32 scale_rating = 0;
+ u64 scale_tick = 0;
+ bool has_scaled = false;
ImageFlagBits flags = ImageFlagBits::CpuModified;
GPUVAddr gpu_addr = 0;
diff --git a/src/video_core/texture_cache/image_info.cpp b/src/video_core/texture_cache/image_info.cpp
index 64fd7010a..afb94082b 100644
--- a/src/video_core/texture_cache/image_info.cpp
+++ b/src/video_core/texture_cache/image_info.cpp
@@ -16,6 +16,7 @@ namespace VideoCommon {
using Tegra::Texture::TextureType;
using Tegra::Texture::TICEntry;
using VideoCore::Surface::PixelFormat;
+using VideoCore::Surface::SurfaceType;
ImageInfo::ImageInfo(const TICEntry& config) noexcept {
format = PixelFormatFromTextureInfo(config.format, config.r_type, config.g_type, config.b_type,
@@ -31,6 +32,7 @@ ImageInfo::ImageInfo(const TICEntry& config) noexcept {
.depth = config.block_depth,
};
}
+ rescaleable = false;
tile_width_spacing = config.tile_width_spacing;
if (config.texture_type != TextureType::Texture2D &&
config.texture_type != TextureType::Texture2DNoMipmap) {
@@ -41,6 +43,7 @@ ImageInfo::ImageInfo(const TICEntry& config) noexcept {
ASSERT(config.BaseLayer() == 0);
type = ImageType::e1D;
size.width = config.Width();
+ resources.layers = 1;
break;
case TextureType::Texture1DArray:
UNIMPLEMENTED_IF(config.BaseLayer() != 0);
@@ -52,12 +55,14 @@ ImageInfo::ImageInfo(const TICEntry& config) noexcept {
case TextureType::Texture2DNoMipmap:
ASSERT(config.Depth() == 1);
type = config.IsPitchLinear() ? ImageType::Linear : ImageType::e2D;
+ rescaleable = !config.IsPitchLinear();
size.width = config.Width();
size.height = config.Height();
resources.layers = config.BaseLayer() + 1;
break;
case TextureType::Texture2DArray:
type = ImageType::e2D;
+ rescaleable = true;
size.width = config.Width();
size.height = config.Height();
resources.layers = config.BaseLayer() + config.Depth();
@@ -82,10 +87,12 @@ ImageInfo::ImageInfo(const TICEntry& config) noexcept {
size.width = config.Width();
size.height = config.Height();
size.depth = config.Depth();
+ resources.layers = 1;
break;
case TextureType::Texture1DBuffer:
type = ImageType::Buffer;
size.width = config.Width();
+ resources.layers = 1;
break;
default:
UNREACHABLE_MSG("Invalid texture_type={}", static_cast<int>(config.texture_type.Value()));
@@ -95,12 +102,16 @@ ImageInfo::ImageInfo(const TICEntry& config) noexcept {
// FIXME: Call this without passing *this
layer_stride = CalculateLayerStride(*this);
maybe_unaligned_layer_stride = CalculateLayerSize(*this);
+ rescaleable &= (block.depth == 0) && resources.levels == 1;
+ rescaleable &= size.height > 256 || GetFormatType(format) != SurfaceType::ColorTexture;
+ downscaleable = size.height > 512;
}
}
ImageInfo::ImageInfo(const Tegra::Engines::Maxwell3D::Regs& regs, size_t index) noexcept {
const auto& rt = regs.rt[index];
format = VideoCore::Surface::PixelFormatFromRenderTargetFormat(rt.format);
+ rescaleable = false;
if (rt.tile_mode.is_pitch_linear) {
ASSERT(rt.tile_mode.is_3d == 0);
type = ImageType::Linear;
@@ -126,6 +137,9 @@ ImageInfo::ImageInfo(const Tegra::Engines::Maxwell3D::Regs& regs, size_t index)
type = ImageType::e3D;
size.depth = rt.depth;
} else {
+ rescaleable = block.depth == 0;
+ rescaleable &= size.height > 256;
+ downscaleable = size.height > 512;
type = ImageType::e2D;
resources.layers = rt.depth;
}
@@ -135,6 +149,7 @@ ImageInfo::ImageInfo(const Tegra::Engines::Maxwell3D::Regs& regs) noexcept {
format = VideoCore::Surface::PixelFormatFromDepthFormat(regs.zeta.format);
size.width = regs.zeta_width;
size.height = regs.zeta_height;
+ rescaleable = false;
resources.levels = 1;
layer_stride = regs.zeta.layer_stride * 4;
maybe_unaligned_layer_stride = layer_stride;
@@ -153,6 +168,8 @@ ImageInfo::ImageInfo(const Tegra::Engines::Maxwell3D::Regs& regs) noexcept {
type = ImageType::e3D;
size.depth = regs.zeta_depth;
} else {
+ rescaleable = block.depth == 0;
+ downscaleable = size.height > 512;
type = ImageType::e2D;
resources.layers = regs.zeta_depth;
}
@@ -161,6 +178,7 @@ ImageInfo::ImageInfo(const Tegra::Engines::Maxwell3D::Regs& regs) noexcept {
ImageInfo::ImageInfo(const Tegra::Engines::Fermi2D::Surface& config) noexcept {
UNIMPLEMENTED_IF_MSG(config.layer != 0, "Surface layer is not zero");
format = VideoCore::Surface::PixelFormatFromRenderTargetFormat(config.format);
+ rescaleable = false;
if (config.linear == Tegra::Engines::Fermi2D::MemoryLayout::Pitch) {
type = ImageType::Linear;
size = Extent3D{
@@ -171,6 +189,7 @@ ImageInfo::ImageInfo(const Tegra::Engines::Fermi2D::Surface& config) noexcept {
pitch = config.pitch;
} else {
type = config.block_depth > 0 ? ImageType::e3D : ImageType::e2D;
+
block = Extent3D{
.width = config.block_width,
.height = config.block_height,
@@ -183,6 +202,9 @@ ImageInfo::ImageInfo(const Tegra::Engines::Fermi2D::Surface& config) noexcept {
.height = config.height,
.depth = 1,
};
+ rescaleable = block.depth == 0;
+ rescaleable &= size.height > 256;
+ downscaleable = size.height > 512;
}
}
diff --git a/src/video_core/texture_cache/image_info.h b/src/video_core/texture_cache/image_info.h
index 5049fc36e..5932dcaba 100644
--- a/src/video_core/texture_cache/image_info.h
+++ b/src/video_core/texture_cache/image_info.h
@@ -15,7 +15,7 @@ using Tegra::Texture::TICEntry;
using VideoCore::Surface::PixelFormat;
struct ImageInfo {
- explicit ImageInfo() = default;
+ ImageInfo() = default;
explicit ImageInfo(const TICEntry& config) noexcept;
explicit ImageInfo(const Tegra::Engines::Maxwell3D::Regs& regs, size_t index) noexcept;
explicit ImageInfo(const Tegra::Engines::Maxwell3D::Regs& regs) noexcept;
@@ -33,6 +33,8 @@ struct ImageInfo {
u32 maybe_unaligned_layer_stride = 0;
u32 num_samples = 1;
u32 tile_width_spacing = 0;
+ bool rescaleable = false;
+ bool downscaleable = false;
};
} // namespace VideoCommon
diff --git a/src/video_core/texture_cache/image_view_base.cpp b/src/video_core/texture_cache/image_view_base.cpp
index 450becbeb..c7b4fc231 100644
--- a/src/video_core/texture_cache/image_view_base.cpp
+++ b/src/video_core/texture_cache/image_view_base.cpp
@@ -37,14 +37,15 @@ ImageViewBase::ImageViewBase(const ImageViewInfo& info, const ImageInfo& image_i
}
ImageViewBase::ImageViewBase(const ImageInfo& info, const ImageViewInfo& view_info)
- : format{info.format}, type{ImageViewType::Buffer}, size{
- .width = info.size.width,
- .height = 1,
- .depth = 1,
- } {
+ : image_id{NULL_IMAGE_ID}, format{info.format}, type{ImageViewType::Buffer},
+ size{
+ .width = info.size.width,
+ .height = 1,
+ .depth = 1,
+ } {
ASSERT_MSG(view_info.type == ImageViewType::Buffer, "Expected texture buffer");
}
-ImageViewBase::ImageViewBase(const NullImageParams&) {}
+ImageViewBase::ImageViewBase(const NullImageViewParams&) : image_id{NULL_IMAGE_ID} {}
} // namespace VideoCommon
diff --git a/src/video_core/texture_cache/image_view_base.h b/src/video_core/texture_cache/image_view_base.h
index 903f715c5..9c24c5359 100644
--- a/src/video_core/texture_cache/image_view_base.h
+++ b/src/video_core/texture_cache/image_view_base.h
@@ -15,7 +15,7 @@ using VideoCore::Surface::PixelFormat;
struct ImageViewInfo;
struct ImageInfo;
-struct NullImageParams {};
+struct NullImageViewParams {};
enum class ImageViewFlagBits : u16 {
PreemtiveDownload = 1 << 0,
@@ -28,7 +28,7 @@ struct ImageViewBase {
explicit ImageViewBase(const ImageViewInfo& info, const ImageInfo& image_info,
ImageId image_id);
explicit ImageViewBase(const ImageInfo& info, const ImageViewInfo& view_info);
- explicit ImageViewBase(const NullImageParams&);
+ explicit ImageViewBase(const NullImageViewParams&);
[[nodiscard]] bool IsBuffer() const noexcept {
return type == ImageViewType::Buffer;
diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h
index f70c1f764..4d2874bf2 100644
--- a/src/video_core/texture_cache/texture_cache.h
+++ b/src/video_core/texture_cache/texture_cache.h
@@ -7,6 +7,7 @@
#include <unordered_set>
#include "common/alignment.h"
+#include "common/settings.h"
#include "video_core/dirty_flags.h"
#include "video_core/engines/kepler_compute.h"
#include "video_core/texture_cache/image_view_base.h"
@@ -44,21 +45,22 @@ TextureCache<P>::TextureCache(Runtime& runtime_, VideoCore::RasterizerInterface&
// Make sure the first index is reserved for the null resources
// This way the null resource becomes a compile time constant
- void(slot_image_views.insert(runtime, NullImageParams{}));
+ void(slot_images.insert(NullImageParams{}));
+ void(slot_image_views.insert(runtime, NullImageViewParams{}));
void(slot_samplers.insert(runtime, sampler_descriptor));
if constexpr (HAS_DEVICE_MEMORY_INFO) {
const auto device_memory = runtime.GetDeviceLocalMemory();
- const u64 possible_expected_memory = (device_memory * 3) / 10;
- const u64 possible_critical_memory = (device_memory * 6) / 10;
- expected_memory = std::max(possible_expected_memory, DEFAULT_EXPECTED_MEMORY);
- critical_memory = std::max(possible_critical_memory, DEFAULT_CRITICAL_MEMORY);
+ const u64 possible_expected_memory = (device_memory * 4) / 10;
+ const u64 possible_critical_memory = (device_memory * 7) / 10;
+ expected_memory = std::max(possible_expected_memory, DEFAULT_EXPECTED_MEMORY - 256_MiB);
+ critical_memory = std::max(possible_critical_memory, DEFAULT_CRITICAL_MEMORY - 512_MiB);
minimum_memory = 0;
} else {
- // on OGL we can be more conservatives as the driver takes care.
+ // On OpenGL we can be more conservatives as the driver takes care.
expected_memory = DEFAULT_EXPECTED_MEMORY + 512_MiB;
critical_memory = DEFAULT_CRITICAL_MEMORY + 1_GiB;
- minimum_memory = expected_memory;
+ minimum_memory = 0;
}
}
@@ -67,7 +69,7 @@ void TextureCache<P>::RunGarbageCollector() {
const bool high_priority_mode = total_used_memory >= expected_memory;
const bool aggressive_mode = total_used_memory >= critical_memory;
const u64 ticks_to_destroy = aggressive_mode ? 10ULL : high_priority_mode ? 25ULL : 100ULL;
- size_t num_iterations = aggressive_mode ? 10000 : (high_priority_mode ? 100 : 5);
+ size_t num_iterations = aggressive_mode ? 300 : (high_priority_mode ? 50 : 10);
const auto clean_up = [this, &num_iterations, high_priority_mode](ImageId image_id) {
if (num_iterations == 0) {
return true;
@@ -89,7 +91,7 @@ void TextureCache<P>::RunGarbageCollector() {
UntrackImage(image, image_id);
}
UnregisterImage(image_id);
- DeleteImage(image_id);
+ DeleteImage(image_id, image.scale_tick > frame_tick + 5);
return false;
};
lru_cache.ForEachItemBelow(frame_tick - ticks_to_destroy, clean_up);
@@ -103,6 +105,7 @@ void TextureCache<P>::TickFrame() {
sentenced_images.Tick();
sentenced_framebuffers.Tick();
sentenced_image_view.Tick();
+ runtime.TickFrame();
++frame_tick;
}
@@ -122,15 +125,14 @@ void TextureCache<P>::MarkModification(ImageId id) noexcept {
}
template <class P>
-void TextureCache<P>::FillGraphicsImageViews(std::span<const u32> indices,
- std::span<ImageViewId> image_view_ids) {
- FillImageViews(graphics_image_table, graphics_image_view_ids, indices, image_view_ids);
+template <bool has_blacklists>
+void TextureCache<P>::FillGraphicsImageViews(std::span<ImageViewInOut> views) {
+ FillImageViews<has_blacklists>(graphics_image_table, graphics_image_view_ids, views);
}
template <class P>
-void TextureCache<P>::FillComputeImageViews(std::span<const u32> indices,
- std::span<ImageViewId> image_view_ids) {
- FillImageViews(compute_image_table, compute_image_view_ids, indices, image_view_ids);
+void TextureCache<P>::FillComputeImageViews(std::span<ImageViewInOut> views) {
+ FillImageViews<true>(compute_image_table, compute_image_view_ids, views);
}
template <class P>
@@ -190,6 +192,102 @@ void TextureCache<P>::SynchronizeComputeDescriptors() {
}
template <class P>
+bool TextureCache<P>::RescaleRenderTargets(bool is_clear) {
+ auto& flags = maxwell3d.dirty.flags;
+ u32 scale_rating = 0;
+ bool rescaled = false;
+ std::array<ImageId, NUM_RT> tmp_color_images{};
+ ImageId tmp_depth_image{};
+ do {
+ flags[Dirty::RenderTargets] = false;
+
+ has_deleted_images = false;
+ // Render target control is used on all render targets, so force look ups when this one is
+ // up
+ const bool force = flags[Dirty::RenderTargetControl];
+ flags[Dirty::RenderTargetControl] = false;
+
+ scale_rating = 0;
+ bool any_rescaled = false;
+ bool can_rescale = true;
+ const auto check_rescale = [&](ImageViewId view_id, ImageId& id_save) {
+ if (view_id != NULL_IMAGE_VIEW_ID && view_id != ImageViewId{}) {
+ const auto& view = slot_image_views[view_id];
+ const auto image_id = view.image_id;
+ id_save = image_id;
+ auto& image = slot_images[image_id];
+ can_rescale &= ImageCanRescale(image);
+ any_rescaled |= True(image.flags & ImageFlagBits::Rescaled) ||
+ GetFormatType(image.info.format) != SurfaceType::ColorTexture;
+ scale_rating = std::max<u32>(scale_rating, image.scale_tick <= frame_tick
+ ? image.scale_rating + 1U
+ : image.scale_rating);
+ } else {
+ id_save = CORRUPT_ID;
+ }
+ };
+ for (size_t index = 0; index < NUM_RT; ++index) {
+ ImageViewId& color_buffer_id = render_targets.color_buffer_ids[index];
+ if (flags[Dirty::ColorBuffer0 + index] || force) {
+ flags[Dirty::ColorBuffer0 + index] = false;
+ BindRenderTarget(&color_buffer_id, FindColorBuffer(index, is_clear));
+ }
+ check_rescale(color_buffer_id, tmp_color_images[index]);
+ }
+ if (flags[Dirty::ZetaBuffer] || force) {
+ flags[Dirty::ZetaBuffer] = false;
+ BindRenderTarget(&render_targets.depth_buffer_id, FindDepthBuffer(is_clear));
+ }
+ check_rescale(render_targets.depth_buffer_id, tmp_depth_image);
+
+ if (can_rescale) {
+ rescaled = any_rescaled || scale_rating >= 2;
+ const auto scale_up = [this](ImageId image_id) {
+ if (image_id != CORRUPT_ID) {
+ Image& image = slot_images[image_id];
+ ScaleUp(image);
+ }
+ };
+ if (rescaled) {
+ for (size_t index = 0; index < NUM_RT; ++index) {
+ scale_up(tmp_color_images[index]);
+ }
+ scale_up(tmp_depth_image);
+ scale_rating = 2;
+ }
+ } else {
+ rescaled = false;
+ const auto scale_down = [this](ImageId image_id) {
+ if (image_id != CORRUPT_ID) {
+ Image& image = slot_images[image_id];
+ ScaleDown(image);
+ }
+ };
+ for (size_t index = 0; index < NUM_RT; ++index) {
+ scale_down(tmp_color_images[index]);
+ }
+ scale_down(tmp_depth_image);
+ scale_rating = 1;
+ }
+ } while (has_deleted_images);
+ const auto set_rating = [this, scale_rating](ImageId image_id) {
+ if (image_id != CORRUPT_ID) {
+ Image& image = slot_images[image_id];
+ image.scale_rating = scale_rating;
+ if (image.scale_tick <= frame_tick) {
+ image.scale_tick = frame_tick + 1;
+ }
+ }
+ };
+ for (size_t index = 0; index < NUM_RT; ++index) {
+ set_rating(tmp_color_images[index]);
+ }
+ set_rating(tmp_depth_image);
+
+ return rescaled;
+}
+
+template <class P>
void TextureCache<P>::UpdateRenderTargets(bool is_clear) {
using namespace VideoCommon::Dirty;
auto& flags = maxwell3d.dirty.flags;
@@ -202,24 +300,18 @@ void TextureCache<P>::UpdateRenderTargets(bool is_clear) {
PrepareImageView(depth_buffer_id, true, is_clear && IsFullClear(depth_buffer_id));
return;
}
- flags[Dirty::RenderTargets] = false;
- // Render target control is used on all render targets, so force look ups when this one is up
- const bool force = flags[Dirty::RenderTargetControl];
- flags[Dirty::RenderTargetControl] = false;
+ const bool rescaled = RescaleRenderTargets(is_clear);
+ if (is_rescaling != rescaled) {
+ flags[Dirty::RescaleViewports] = true;
+ flags[Dirty::RescaleScissors] = true;
+ is_rescaling = rescaled;
+ }
for (size_t index = 0; index < NUM_RT; ++index) {
ImageViewId& color_buffer_id = render_targets.color_buffer_ids[index];
- if (flags[Dirty::ColorBuffer0 + index] || force) {
- flags[Dirty::ColorBuffer0 + index] = false;
- BindRenderTarget(&color_buffer_id, FindColorBuffer(index, is_clear));
- }
PrepareImageView(color_buffer_id, true, is_clear && IsFullClear(color_buffer_id));
}
- if (flags[Dirty::ZetaBuffer] || force) {
- flags[Dirty::ZetaBuffer] = false;
- BindRenderTarget(&render_targets.depth_buffer_id, FindDepthBuffer(is_clear));
- }
const ImageViewId depth_buffer_id = render_targets.depth_buffer_id;
PrepareImageView(depth_buffer_id, true, is_clear && IsFullClear(depth_buffer_id));
@@ -227,9 +319,15 @@ void TextureCache<P>::UpdateRenderTargets(bool is_clear) {
for (size_t index = 0; index < NUM_RT; ++index) {
render_targets.draw_buffers[index] = static_cast<u8>(maxwell3d.regs.rt_control.Map(index));
}
+ u32 up_scale = 1;
+ u32 down_shift = 0;
+ if (is_rescaling) {
+ up_scale = Settings::values.resolution_info.up_scale;
+ down_shift = Settings::values.resolution_info.down_shift;
+ }
render_targets.size = Extent2D{
- maxwell3d.regs.render_area.width,
- maxwell3d.regs.render_area.height,
+ (maxwell3d.regs.render_area.width * up_scale) >> down_shift,
+ (maxwell3d.regs.render_area.height * up_scale) >> down_shift,
};
flags[Dirty::DepthBiasGlobal] = true;
@@ -241,17 +339,28 @@ typename P::Framebuffer* TextureCache<P>::GetFramebuffer() {
}
template <class P>
+template <bool has_blacklists>
void TextureCache<P>::FillImageViews(DescriptorTable<TICEntry>& table,
std::span<ImageViewId> cached_image_view_ids,
- std::span<const u32> indices,
- std::span<ImageViewId> image_view_ids) {
- ASSERT(indices.size() <= image_view_ids.size());
+ std::span<ImageViewInOut> views) {
+ bool has_blacklisted;
do {
has_deleted_images = false;
- std::ranges::transform(indices, image_view_ids.begin(), [&](u32 index) {
- return VisitImageView(table, cached_image_view_ids, index);
- });
- } while (has_deleted_images);
+ if constexpr (has_blacklists) {
+ has_blacklisted = false;
+ }
+ for (ImageViewInOut& view : views) {
+ view.id = VisitImageView(table, cached_image_view_ids, view.index);
+ if constexpr (has_blacklists) {
+ if (view.blacklist && view.id != NULL_IMAGE_VIEW_ID) {
+ const ImageViewBase& image_view{slot_image_views[view.id]};
+ auto& image = slot_images[image_view.image_id];
+ has_blacklisted |= ScaleDown(image);
+ image.scale_rating = 0;
+ }
+ }
+ }
+ } while (has_deleted_images || (has_blacklists && has_blacklisted));
}
template <class P>
@@ -369,8 +478,43 @@ void TextureCache<P>::BlitImage(const Tegra::Engines::Fermi2D::Surface& dst,
PrepareImage(src_id, false, false);
PrepareImage(dst_id, true, false);
- ImageBase& dst_image = slot_images[dst_id];
- const ImageBase& src_image = slot_images[src_id];
+ Image& dst_image = slot_images[dst_id];
+ Image& src_image = slot_images[src_id];
+ bool is_src_rescaled = True(src_image.flags & ImageFlagBits::Rescaled);
+ bool is_dst_rescaled = True(dst_image.flags & ImageFlagBits::Rescaled);
+
+ const bool is_resolve = src_image.info.num_samples != 1 && dst_image.info.num_samples == 1;
+ if (is_src_rescaled != is_dst_rescaled) {
+ if (ImageCanRescale(src_image)) {
+ ScaleUp(src_image);
+ is_src_rescaled = True(src_image.flags & ImageFlagBits::Rescaled);
+ if (is_resolve) {
+ dst_image.info.rescaleable = true;
+ for (const auto& alias : dst_image.aliased_images) {
+ Image& other_image = slot_images[alias.id];
+ other_image.info.rescaleable = true;
+ }
+ }
+ }
+ if (ImageCanRescale(dst_image)) {
+ ScaleUp(dst_image);
+ is_dst_rescaled = True(dst_image.flags & ImageFlagBits::Rescaled);
+ }
+ }
+ if (is_resolve && (is_src_rescaled != is_dst_rescaled)) {
+ // A resolve requires both images to be the same dimensions. Resize down if needed.
+ ScaleDown(src_image);
+ ScaleDown(dst_image);
+ is_src_rescaled = True(src_image.flags & ImageFlagBits::Rescaled);
+ is_dst_rescaled = True(dst_image.flags & ImageFlagBits::Rescaled);
+ }
+ const auto& resolution = Settings::values.resolution_info;
+ const auto scale_region = [&](Region2D& region) {
+ region.start.x = resolution.ScaleUp(region.start.x);
+ region.start.y = resolution.ScaleUp(region.start.y);
+ region.end.x = resolution.ScaleUp(region.end.x);
+ region.end.y = resolution.ScaleUp(region.end.y);
+ };
// TODO: Deduplicate
const std::optional src_base = src_image.TryFindBase(src.Address());
@@ -378,20 +522,26 @@ void TextureCache<P>::BlitImage(const Tegra::Engines::Fermi2D::Surface& dst,
const ImageViewInfo src_view_info(ImageViewType::e2D, images.src_format, src_range);
const auto [src_framebuffer_id, src_view_id] = RenderTargetFromImage(src_id, src_view_info);
const auto [src_samples_x, src_samples_y] = SamplesLog2(src_image.info.num_samples);
- const Region2D src_region{
+ Region2D src_region{
Offset2D{.x = copy.src_x0 >> src_samples_x, .y = copy.src_y0 >> src_samples_y},
Offset2D{.x = copy.src_x1 >> src_samples_x, .y = copy.src_y1 >> src_samples_y},
};
+ if (is_src_rescaled) {
+ scale_region(src_region);
+ }
const std::optional dst_base = dst_image.TryFindBase(dst.Address());
const SubresourceRange dst_range{.base = dst_base.value(), .extent = {1, 1}};
const ImageViewInfo dst_view_info(ImageViewType::e2D, images.dst_format, dst_range);
const auto [dst_framebuffer_id, dst_view_id] = RenderTargetFromImage(dst_id, dst_view_info);
const auto [dst_samples_x, dst_samples_y] = SamplesLog2(dst_image.info.num_samples);
- const Region2D dst_region{
+ Region2D dst_region{
Offset2D{.x = copy.dst_x0 >> dst_samples_x, .y = copy.dst_y0 >> dst_samples_y},
Offset2D{.x = copy.dst_x1 >> dst_samples_x, .y = copy.dst_y1 >> dst_samples_y},
};
+ if (is_dst_rescaled) {
+ scale_region(dst_region);
+ }
// Always call this after src_framebuffer_id was queried, as the address might be invalidated.
Framebuffer* const dst_framebuffer = &slot_framebuffers[dst_framebuffer_id];
@@ -487,6 +637,20 @@ void TextureCache<P>::PopAsyncFlushes() {
}
template <class P>
+bool TextureCache<P>::IsRescaling() const noexcept {
+ return is_rescaling;
+}
+
+template <class P>
+bool TextureCache<P>::IsRescaling(const ImageViewBase& image_view) const noexcept {
+ if (image_view.type == ImageViewType::Buffer) {
+ return false;
+ }
+ const ImageBase& image = slot_images[image_view.image_id];
+ return True(image.flags & ImageFlagBits::Rescaled);
+}
+
+template <class P>
bool TextureCache<P>::IsRegionGpuModified(VAddr addr, size_t size) {
bool is_modified = false;
ForEachImageInRegion(addr, size, [&is_modified](ImageId, ImageBase& image) {
@@ -624,6 +788,105 @@ ImageId TextureCache<P>::FindImage(const ImageInfo& info, GPUVAddr gpu_addr,
}
template <class P>
+bool TextureCache<P>::ImageCanRescale(ImageBase& image) {
+ if (!image.info.rescaleable) {
+ return false;
+ }
+ if (Settings::values.resolution_info.downscale && !image.info.downscaleable) {
+ return false;
+ }
+ if (True(image.flags & (ImageFlagBits::Rescaled | ImageFlagBits::CheckingRescalable))) {
+ return true;
+ }
+ if (True(image.flags & ImageFlagBits::IsRescalable)) {
+ return true;
+ }
+ image.flags |= ImageFlagBits::CheckingRescalable;
+ for (const auto& alias : image.aliased_images) {
+ Image& other_image = slot_images[alias.id];
+ if (!ImageCanRescale(other_image)) {
+ image.flags &= ~ImageFlagBits::CheckingRescalable;
+ return false;
+ }
+ }
+ image.flags &= ~ImageFlagBits::CheckingRescalable;
+ image.flags |= ImageFlagBits::IsRescalable;
+ return true;
+}
+
+template <class P>
+void TextureCache<P>::InvalidateScale(Image& image) {
+ if (image.scale_tick <= frame_tick) {
+ image.scale_tick = frame_tick + 1;
+ }
+ const std::span<const ImageViewId> image_view_ids = image.image_view_ids;
+ auto& dirty = maxwell3d.dirty.flags;
+ dirty[Dirty::RenderTargets] = true;
+ dirty[Dirty::ZetaBuffer] = true;
+ for (size_t rt = 0; rt < NUM_RT; ++rt) {
+ dirty[Dirty::ColorBuffer0 + rt] = true;
+ }
+ for (const ImageViewId image_view_id : image_view_ids) {
+ std::ranges::replace(render_targets.color_buffer_ids, image_view_id, ImageViewId{});
+ if (render_targets.depth_buffer_id == image_view_id) {
+ render_targets.depth_buffer_id = ImageViewId{};
+ }
+ }
+ RemoveImageViewReferences(image_view_ids);
+ RemoveFramebuffers(image_view_ids);
+ for (const ImageViewId image_view_id : image_view_ids) {
+ sentenced_image_view.Push(std::move(slot_image_views[image_view_id]));
+ slot_image_views.erase(image_view_id);
+ }
+ image.image_view_ids.clear();
+ image.image_view_infos.clear();
+ if constexpr (ENABLE_VALIDATION) {
+ std::ranges::fill(graphics_image_view_ids, CORRUPT_ID);
+ std::ranges::fill(compute_image_view_ids, CORRUPT_ID);
+ }
+ graphics_image_table.Invalidate();
+ compute_image_table.Invalidate();
+ has_deleted_images = true;
+}
+
+template <class P>
+u64 TextureCache<P>::GetScaledImageSizeBytes(ImageBase& image) {
+ const u64 scale_up = static_cast<u64>(Settings::values.resolution_info.up_scale *
+ Settings::values.resolution_info.up_scale);
+ const u64 down_shift = static_cast<u64>(Settings::values.resolution_info.down_shift +
+ Settings::values.resolution_info.down_shift);
+ const u64 image_size_bytes =
+ static_cast<u64>(std::max(image.guest_size_bytes, image.unswizzled_size_bytes));
+ const u64 tentative_size = (image_size_bytes * scale_up) >> down_shift;
+ const u64 fitted_size = Common::AlignUp(tentative_size, 1024);
+ return fitted_size;
+}
+
+template <class P>
+bool TextureCache<P>::ScaleUp(Image& image) {
+ const bool has_copy = image.HasScaled();
+ const bool rescaled = image.ScaleUp();
+ if (!rescaled) {
+ return false;
+ }
+ if (!has_copy) {
+ total_used_memory += GetScaledImageSizeBytes(image);
+ }
+ InvalidateScale(image);
+ return true;
+}
+
+template <class P>
+bool TextureCache<P>::ScaleDown(Image& image) {
+ const bool rescaled = image.ScaleDown();
+ if (!rescaled) {
+ return false;
+ }
+ InvalidateScale(image);
+ return true;
+}
+
+template <class P>
ImageId TextureCache<P>::InsertImage(const ImageInfo& info, GPUVAddr gpu_addr,
RelaxedOptions options) {
std::optional<VAddr> cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr);
@@ -660,12 +923,18 @@ ImageId TextureCache<P>::JoinImages(const ImageInfo& info, GPUVAddr gpu_addr, VA
std::vector<ImageId> right_aliased_ids;
std::unordered_set<ImageId> ignore_textures;
std::vector<ImageId> bad_overlap_ids;
+ std::vector<ImageId> all_siblings;
+ const bool this_is_linear = info.type == ImageType::Linear;
const auto region_check = [&](ImageId overlap_id, ImageBase& overlap) {
if (True(overlap.flags & ImageFlagBits::Remapped)) {
ignore_textures.insert(overlap_id);
return;
}
- if (info.type == ImageType::Linear) {
+ const bool overlap_is_linear = overlap.info.type == ImageType::Linear;
+ if (this_is_linear != overlap_is_linear) {
+ return;
+ }
+ if (this_is_linear && overlap_is_linear) {
if (info.pitch == overlap.info.pitch && gpu_addr == overlap.gpu_addr) {
// Alias linear images with the same pitch
left_aliased_ids.push_back(overlap_id);
@@ -681,6 +950,7 @@ ImageId TextureCache<P>::JoinImages(const ImageInfo& info, GPUVAddr gpu_addr, VA
cpu_addr = solution->cpu_addr;
new_info.resources = solution->resources;
overlap_ids.push_back(overlap_id);
+ all_siblings.push_back(overlap_id);
return;
}
static constexpr auto options = RelaxedOptions::Size | RelaxedOptions::Format;
@@ -688,10 +958,12 @@ ImageId TextureCache<P>::JoinImages(const ImageInfo& info, GPUVAddr gpu_addr, VA
if (IsSubresource(new_info, overlap, gpu_addr, options, broken_views, native_bgr)) {
left_aliased_ids.push_back(overlap_id);
overlap.flags |= ImageFlagBits::Alias;
+ all_siblings.push_back(overlap_id);
} else if (IsSubresource(overlap.info, new_image_base, overlap.gpu_addr, options,
broken_views, native_bgr)) {
right_aliased_ids.push_back(overlap_id);
overlap.flags |= ImageFlagBits::Alias;
+ all_siblings.push_back(overlap_id);
} else {
bad_overlap_ids.push_back(overlap_id);
overlap.flags |= ImageFlagBits::BadOverlap;
@@ -709,6 +981,32 @@ ImageId TextureCache<P>::JoinImages(const ImageInfo& info, GPUVAddr gpu_addr, VA
}
};
ForEachSparseImageInRegion(gpu_addr, size_bytes, region_check_gpu);
+
+ bool can_rescale = info.rescaleable;
+ bool any_rescaled = false;
+ for (const ImageId sibling_id : all_siblings) {
+ if (!can_rescale) {
+ break;
+ }
+ Image& sibling = slot_images[sibling_id];
+ can_rescale &= ImageCanRescale(sibling);
+ any_rescaled |= True(sibling.flags & ImageFlagBits::Rescaled);
+ }
+
+ can_rescale &= any_rescaled;
+
+ if (can_rescale) {
+ for (const ImageId sibling_id : all_siblings) {
+ Image& sibling = slot_images[sibling_id];
+ ScaleUp(sibling);
+ }
+ } else {
+ for (const ImageId sibling_id : all_siblings) {
+ Image& sibling = slot_images[sibling_id];
+ ScaleDown(sibling);
+ }
+ }
+
const ImageId new_image_id = slot_images.insert(runtime, new_info, gpu_addr, cpu_addr);
Image& new_image = slot_images[new_image_id];
@@ -731,14 +1029,23 @@ ImageId TextureCache<P>::JoinImages(const ImageInfo& info, GPUVAddr gpu_addr, VA
// TODO: Only upload what we need
RefreshContents(new_image, new_image_id);
+ if (can_rescale) {
+ ScaleUp(new_image);
+ } else {
+ ScaleDown(new_image);
+ }
+
for (const ImageId overlap_id : overlap_ids) {
Image& overlap = slot_images[overlap_id];
if (overlap.info.num_samples != new_image.info.num_samples) {
LOG_WARNING(HW_GPU, "Copying between images with different samples is not implemented");
} else {
+ const auto& resolution = Settings::values.resolution_info;
const SubresourceBase base = new_image.TryFindBase(overlap.gpu_addr).value();
- const auto copies = MakeShrinkImageCopies(new_info, overlap.info, base);
- runtime.CopyImage(new_image, overlap, copies);
+ const u32 up_scale = can_rescale ? resolution.up_scale : 1;
+ const u32 down_shift = can_rescale ? resolution.down_shift : 0;
+ auto copies = MakeShrinkImageCopies(new_info, overlap.info, base, up_scale, down_shift);
+ runtime.CopyImage(new_image, overlap, std::move(copies));
}
if (True(overlap.flags & ImageFlagBits::Tracked)) {
UntrackImage(overlap, overlap_id);
@@ -1083,13 +1390,6 @@ void TextureCache<P>::UnregisterImage(ImageId image_id) {
"Trying to unregister an already registered image");
image.flags &= ~ImageFlagBits::Registered;
image.flags &= ~ImageFlagBits::BadOverlap;
- u64 tentative_size = std::max(image.guest_size_bytes, image.unswizzled_size_bytes);
- if ((IsPixelFormatASTC(image.info.format) &&
- True(image.flags & ImageFlagBits::AcceleratedUpload)) ||
- True(image.flags & ImageFlagBits::Converted)) {
- tentative_size = EstimatedDecompressedSize(tentative_size, image.info.format);
- }
- total_used_memory -= Common::AlignUp(tentative_size, 1024);
lru_cache.Free(image.lru_index);
const auto& clear_page_table =
[this, image_id](
@@ -1213,8 +1513,18 @@ void TextureCache<P>::UntrackImage(ImageBase& image, ImageId image_id) {
}
template <class P>
-void TextureCache<P>::DeleteImage(ImageId image_id) {
+void TextureCache<P>::DeleteImage(ImageId image_id, bool immediate_delete) {
ImageBase& image = slot_images[image_id];
+ if (image.HasScaled()) {
+ total_used_memory -= GetScaledImageSizeBytes(image);
+ }
+ u64 tentative_size = std::max(image.guest_size_bytes, image.unswizzled_size_bytes);
+ if ((IsPixelFormatASTC(image.info.format) &&
+ True(image.flags & ImageFlagBits::AcceleratedUpload)) ||
+ True(image.flags & ImageFlagBits::Converted)) {
+ tentative_size = EstimatedDecompressedSize(tentative_size, image.info.format);
+ }
+ total_used_memory -= Common::AlignUp(tentative_size, 1024);
const GPUVAddr gpu_addr = image.gpu_addr;
const auto alloc_it = image_allocs_table.find(gpu_addr);
if (alloc_it == image_allocs_table.end()) {
@@ -1269,10 +1579,14 @@ void TextureCache<P>::DeleteImage(ImageId image_id) {
num_removed_overlaps);
}
for (const ImageViewId image_view_id : image_view_ids) {
- sentenced_image_view.Push(std::move(slot_image_views[image_view_id]));
+ if (!immediate_delete) {
+ sentenced_image_view.Push(std::move(slot_image_views[image_view_id]));
+ }
slot_image_views.erase(image_view_id);
}
- sentenced_images.Push(std::move(slot_images[image_id]));
+ if (!immediate_delete) {
+ sentenced_images.Push(std::move(slot_images[image_id]));
+ }
slot_images.erase(image_id);
alloc_images.erase(alloc_image_it);
@@ -1306,6 +1620,9 @@ void TextureCache<P>::RemoveFramebuffers(std::span<const ImageViewId> removed_vi
auto it = framebuffers.begin();
while (it != framebuffers.end()) {
if (it->first.Contains(removed_views)) {
+ auto framebuffer_id = it->second;
+ ASSERT(framebuffer_id);
+ sentenced_framebuffers.Push(std::move(slot_framebuffers[framebuffer_id]));
it = framebuffers.erase(it);
} else {
++it;
@@ -1322,26 +1639,60 @@ void TextureCache<P>::MarkModification(ImageBase& image) noexcept {
template <class P>
void TextureCache<P>::SynchronizeAliases(ImageId image_id) {
boost::container::small_vector<const AliasedImage*, 1> aliased_images;
- ImageBase& image = slot_images[image_id];
+ Image& image = slot_images[image_id];
+ bool any_rescaled = True(image.flags & ImageFlagBits::Rescaled);
u64 most_recent_tick = image.modification_tick;
for (const AliasedImage& aliased : image.aliased_images) {
ImageBase& aliased_image = slot_images[aliased.id];
if (image.modification_tick < aliased_image.modification_tick) {
most_recent_tick = std::max(most_recent_tick, aliased_image.modification_tick);
aliased_images.push_back(&aliased);
+ any_rescaled |= True(aliased_image.flags & ImageFlagBits::Rescaled);
}
}
if (aliased_images.empty()) {
return;
}
+ const bool can_rescale = ImageCanRescale(image);
+ if (any_rescaled) {
+ if (can_rescale) {
+ ScaleUp(image);
+ } else {
+ ScaleDown(image);
+ }
+ }
image.modification_tick = most_recent_tick;
std::ranges::sort(aliased_images, [this](const AliasedImage* lhs, const AliasedImage* rhs) {
const ImageBase& lhs_image = slot_images[lhs->id];
const ImageBase& rhs_image = slot_images[rhs->id];
return lhs_image.modification_tick < rhs_image.modification_tick;
});
+ const auto& resolution = Settings::values.resolution_info;
for (const AliasedImage* const aliased : aliased_images) {
- CopyImage(image_id, aliased->id, aliased->copies);
+ if (!resolution.active | !any_rescaled) {
+ CopyImage(image_id, aliased->id, aliased->copies);
+ continue;
+ }
+ Image& aliased_image = slot_images[aliased->id];
+ if (!can_rescale) {
+ ScaleDown(aliased_image);
+ CopyImage(image_id, aliased->id, aliased->copies);
+ continue;
+ }
+ ScaleUp(aliased_image);
+
+ const bool both_2d{image.info.type == ImageType::e2D &&
+ aliased_image.info.type == ImageType::e2D};
+ auto copies = aliased->copies;
+ for (auto copy : copies) {
+ copy.extent.width = std::max<u32>(
+ (copy.extent.width * resolution.up_scale) >> resolution.down_shift, 1);
+ if (both_2d) {
+ copy.extent.height = std::max<u32>(
+ (copy.extent.height * resolution.up_scale) >> resolution.down_shift, 1);
+ }
+ }
+ CopyImage(image_id, aliased->id, copies);
}
}
@@ -1377,9 +1728,25 @@ void TextureCache<P>::PrepareImageView(ImageViewId image_view_id, bool is_modifi
}
template <class P>
-void TextureCache<P>::CopyImage(ImageId dst_id, ImageId src_id, std::span<const ImageCopy> copies) {
+void TextureCache<P>::CopyImage(ImageId dst_id, ImageId src_id, std::vector<ImageCopy> copies) {
Image& dst = slot_images[dst_id];
Image& src = slot_images[src_id];
+ const bool is_rescaled = True(src.flags & ImageFlagBits::Rescaled);
+ if (is_rescaled) {
+ ASSERT(True(dst.flags & ImageFlagBits::Rescaled));
+ const bool both_2d{src.info.type == ImageType::e2D && dst.info.type == ImageType::e2D};
+ const auto& resolution = Settings::values.resolution_info;
+ for (auto& copy : copies) {
+ copy.src_offset.x = resolution.ScaleUp(copy.src_offset.x);
+ copy.dst_offset.x = resolution.ScaleUp(copy.dst_offset.x);
+ copy.extent.width = resolution.ScaleUp(copy.extent.width);
+ if (both_2d) {
+ copy.src_offset.y = resolution.ScaleUp(copy.src_offset.y);
+ copy.dst_offset.y = resolution.ScaleUp(copy.dst_offset.y);
+ copy.extent.height = resolution.ScaleUp(copy.extent.height);
+ }
+ }
+ }
const auto dst_format_type = GetFormatType(dst.info.format);
const auto src_format_type = GetFormatType(src.info.format);
if (src_format_type == dst_format_type) {
@@ -1424,7 +1791,7 @@ void TextureCache<P>::CopyImage(ImageId dst_id, ImageId src_id, std::span<const
};
UNIMPLEMENTED_IF(copy.extent != expected_size);
- runtime.ConvertImage(dst_framebuffer, dst_view, src_view);
+ runtime.ConvertImage(dst_framebuffer, dst_view, src_view, is_rescaled);
}
}
@@ -1433,8 +1800,8 @@ void TextureCache<P>::BindRenderTarget(ImageViewId* old_id, ImageViewId new_id)
if (*old_id == new_id) {
return;
}
- if (*old_id) {
- const ImageViewBase& old_view = slot_image_views[*old_id];
+ if (new_id) {
+ const ImageViewBase& old_view = slot_image_views[new_id];
if (True(old_view.flags & ImageViewFlagBits::PreemtiveDownload)) {
uncommitted_downloads.push_back(old_view.image_id);
}
@@ -1447,10 +1814,18 @@ std::pair<FramebufferId, ImageViewId> TextureCache<P>::RenderTargetFromImage(
ImageId image_id, const ImageViewInfo& view_info) {
const ImageViewId view_id = FindOrEmplaceImageView(image_id, view_info);
const ImageBase& image = slot_images[image_id];
+ const bool is_rescaled = True(image.flags & ImageFlagBits::Rescaled);
const bool is_color = GetFormatType(image.info.format) == SurfaceType::ColorTexture;
const ImageViewId color_view_id = is_color ? view_id : ImageViewId{};
const ImageViewId depth_view_id = is_color ? ImageViewId{} : view_id;
- const Extent3D extent = MipSize(image.info.size, view_info.range.base.level);
+ Extent3D extent = MipSize(image.info.size, view_info.range.base.level);
+ if (is_rescaled) {
+ const auto& resolution = Settings::values.resolution_info;
+ extent.width = resolution.ScaleUp(extent.width);
+ if (image.info.type == ImageType::e2D) {
+ extent.height = resolution.ScaleUp(extent.height);
+ }
+ }
const u32 num_samples = image.info.num_samples;
const auto [samples_x, samples_y] = SamplesLog2(num_samples);
const FramebufferId framebuffer_id = GetFramebufferId(RenderTargets{
diff --git a/src/video_core/texture_cache/texture_cache_base.h b/src/video_core/texture_cache/texture_cache_base.h
index 2d1893c1c..643ad811c 100644
--- a/src/video_core/texture_cache/texture_cache_base.h
+++ b/src/video_core/texture_cache/texture_cache_base.h
@@ -21,6 +21,7 @@
#include "video_core/texture_cache/descriptor_table.h"
#include "video_core/texture_cache/image_base.h"
#include "video_core/texture_cache/image_info.h"
+#include "video_core/texture_cache/image_view_base.h"
#include "video_core/texture_cache/image_view_info.h"
#include "video_core/texture_cache/render_targets.h"
#include "video_core/texture_cache/slot_vector.h"
@@ -39,6 +40,12 @@ using VideoCore::Surface::PixelFormatFromDepthFormat;
using VideoCore::Surface::PixelFormatFromRenderTargetFormat;
using namespace Common::Literals;
+struct ImageViewInOut {
+ u32 index{};
+ bool blacklist{};
+ ImageViewId id{};
+};
+
template <class P>
class TextureCache {
/// Address shift for caching images into a hash table
@@ -53,11 +60,6 @@ class TextureCache {
/// True when the API can provide info about the memory of the device.
static constexpr bool HAS_DEVICE_MEMORY_INFO = P::HAS_DEVICE_MEMORY_INFO;
- /// Image view ID for null descriptors
- static constexpr ImageViewId NULL_IMAGE_VIEW_ID{0};
- /// Sampler ID for bugged sampler ids
- static constexpr SamplerId NULL_SAMPLER_ID{0};
-
static constexpr u64 DEFAULT_EXPECTED_MEMORY = 1_GiB;
static constexpr u64 DEFAULT_CRITICAL_MEMORY = 2_GiB;
@@ -99,11 +101,11 @@ public:
void MarkModification(ImageId id) noexcept;
/// Fill image_view_ids with the graphics images in indices
- void FillGraphicsImageViews(std::span<const u32> indices,
- std::span<ImageViewId> image_view_ids);
+ template <bool has_blacklists>
+ void FillGraphicsImageViews(std::span<ImageViewInOut> views);
/// Fill image_view_ids with the compute images in indices
- void FillComputeImageViews(std::span<const u32> indices, std::span<ImageViewId> image_view_ids);
+ void FillComputeImageViews(std::span<ImageViewInOut> views);
/// Get the sampler from the graphics descriptor table in the specified index
Sampler* GetGraphicsSampler(u32 index);
@@ -117,6 +119,11 @@ public:
/// Refresh the state for compute image view and sampler descriptors
void SynchronizeComputeDescriptors();
+ /// Updates the Render Targets if they can be rescaled
+ /// @param is_clear True when the render targets are being used for clears
+ /// @retval True if the Render Targets have been rescaled.
+ bool RescaleRenderTargets(bool is_clear);
+
/// Update bound render targets and upload memory if necessary
/// @param is_clear True when the render targets are being used for clears
void UpdateRenderTargets(bool is_clear);
@@ -160,6 +167,10 @@ public:
/// Return true when a CPU region is modified from the GPU
[[nodiscard]] bool IsRegionGpuModified(VAddr addr, size_t size);
+ [[nodiscard]] bool IsRescaling() const noexcept;
+
+ [[nodiscard]] bool IsRescaling(const ImageViewBase& image_view) const noexcept;
+
std::mutex mutex;
private:
@@ -198,9 +209,10 @@ private:
void RunGarbageCollector();
/// Fills image_view_ids in the image views in indices
+ template <bool has_blacklists>
void FillImageViews(DescriptorTable<TICEntry>& table,
- std::span<ImageViewId> cached_image_view_ids, std::span<const u32> indices,
- std::span<ImageViewId> image_view_ids);
+ std::span<ImageViewId> cached_image_view_ids,
+ std::span<ImageViewInOut> views);
/// Find or create an image view in the guest descriptor table
ImageViewId VisitImageView(DescriptorTable<TICEntry>& table,
@@ -285,7 +297,7 @@ private:
void UntrackImage(ImageBase& image, ImageId image_id);
/// Delete image from the cache
- void DeleteImage(ImageId image);
+ void DeleteImage(ImageId image, bool immediate_delete = false);
/// Remove image views references from the cache
void RemoveImageViewReferences(std::span<const ImageViewId> removed_views);
@@ -306,7 +318,7 @@ private:
void PrepareImageView(ImageViewId image_view_id, bool is_modification, bool invalidate);
/// Execute copies from one image to the other, even if they are incompatible
- void CopyImage(ImageId dst_id, ImageId src_id, std::span<const ImageCopy> copies);
+ void CopyImage(ImageId dst_id, ImageId src_id, std::vector<ImageCopy> copies);
/// Bind an image view as render target, downloading resources preemtively if needed
void BindRenderTarget(ImageViewId* old_id, ImageViewId new_id);
@@ -318,6 +330,12 @@ private:
/// Returns true if the current clear parameters clear the whole image of a given image view
[[nodiscard]] bool IsFullClear(ImageViewId id);
+ bool ImageCanRescale(ImageBase& image);
+ void InvalidateScale(Image& image);
+ bool ScaleUp(Image& image);
+ bool ScaleDown(Image& image);
+ u64 GetScaledImageSizeBytes(ImageBase& image);
+
Runtime& runtime;
VideoCore::RasterizerInterface& rasterizer;
Tegra::Engines::Maxwell3D& maxwell3d;
@@ -349,6 +367,7 @@ private:
VAddr virtual_invalid_space{};
bool has_deleted_images = false;
+ bool is_rescaling = false;
u64 total_used_memory = 0;
u64 minimum_memory;
u64 expected_memory;
diff --git a/src/video_core/texture_cache/types.h b/src/video_core/texture_cache/types.h
index 47a11cb2f..5c274abdf 100644
--- a/src/video_core/texture_cache/types.h
+++ b/src/video_core/texture_cache/types.h
@@ -22,6 +22,13 @@ using ImageAllocId = SlotId;
using SamplerId = SlotId;
using FramebufferId = SlotId;
+/// Fake image ID for null image views
+constexpr ImageId NULL_IMAGE_ID{0};
+/// Image view ID for null descriptors
+constexpr ImageViewId NULL_IMAGE_VIEW_ID{0};
+/// Sampler ID for bugged sampler ids
+constexpr SamplerId NULL_SAMPLER_ID{0};
+
enum class ImageType : u32 {
e1D,
e2D,
diff --git a/src/video_core/texture_cache/util.cpp b/src/video_core/texture_cache/util.cpp
index 59cf2f561..ddc9fb13a 100644
--- a/src/video_core/texture_cache/util.cpp
+++ b/src/video_core/texture_cache/util.cpp
@@ -723,7 +723,7 @@ ImageViewType RenderTargetImageViewType(const ImageInfo& info) noexcept {
}
std::vector<ImageCopy> MakeShrinkImageCopies(const ImageInfo& dst, const ImageInfo& src,
- SubresourceBase base) {
+ SubresourceBase base, u32 up_scale, u32 down_shift) {
ASSERT(dst.resources.levels >= src.resources.levels);
ASSERT(dst.num_samples == src.num_samples);
@@ -732,7 +732,7 @@ std::vector<ImageCopy> MakeShrinkImageCopies(const ImageInfo& dst, const ImageIn
ASSERT(src.type == ImageType::e3D);
ASSERT(src.resources.levels == 1);
}
-
+ const bool both_2d{src.type == ImageType::e2D && dst.type == ImageType::e2D};
std::vector<ImageCopy> copies;
copies.reserve(src.resources.levels);
for (s32 level = 0; level < src.resources.levels; ++level) {
@@ -762,6 +762,10 @@ std::vector<ImageCopy> MakeShrinkImageCopies(const ImageInfo& dst, const ImageIn
if (is_dst_3d) {
copy.extent.depth = src.size.depth;
}
+ copy.extent.width = std::max<u32>((copy.extent.width * up_scale) >> down_shift, 1);
+ if (both_2d) {
+ copy.extent.height = std::max<u32>((copy.extent.height * up_scale) >> down_shift, 1);
+ }
}
return copies;
}
@@ -1153,10 +1157,10 @@ void DeduceBlitImages(ImageInfo& dst_info, ImageInfo& src_info, const ImageBase*
if (dst && GetFormatType(dst->info.format) != SurfaceType::ColorTexture) {
dst_info.format = dst->info.format;
}
- if (!dst && src && GetFormatType(src->info.format) != SurfaceType::ColorTexture) {
+ if (src && GetFormatType(src->info.format) != SurfaceType::ColorTexture) {
dst_info.format = src->info.format;
}
- if (!src && dst && GetFormatType(dst->info.format) != SurfaceType::ColorTexture) {
+ if (dst && GetFormatType(dst->info.format) != SurfaceType::ColorTexture) {
src_info.format = dst->info.format;
}
}
diff --git a/src/video_core/texture_cache/util.h b/src/video_core/texture_cache/util.h
index 766502908..7af52de2e 100644
--- a/src/video_core/texture_cache/util.h
+++ b/src/video_core/texture_cache/util.h
@@ -55,7 +55,8 @@ struct OverlapResult {
[[nodiscard]] std::vector<ImageCopy> MakeShrinkImageCopies(const ImageInfo& dst,
const ImageInfo& src,
- SubresourceBase base);
+ SubresourceBase base, u32 up_scale = 1,
+ u32 down_shift = 0);
[[nodiscard]] bool IsValidEntry(const Tegra::MemoryManager& gpu_memory, const TICEntry& config);
diff --git a/src/video_core/textures/texture.cpp b/src/video_core/textures/texture.cpp
index a552543ed..06954963d 100644
--- a/src/video_core/textures/texture.cpp
+++ b/src/video_core/textures/texture.cpp
@@ -51,22 +51,6 @@ constexpr std::array<float, 256> SRGB_CONVERSION_LUT = {
0.917104f, 0.929242f, 0.941493f, 0.953859f, 0.966338f, 1.000000f, 1.000000f, 1.000000f,
};
-unsigned SettingsMinimumAnisotropy() noexcept {
- switch (static_cast<Anisotropy>(Settings::values.max_anisotropy.GetValue())) {
- default:
- case Anisotropy::Default:
- return 1U;
- case Anisotropy::Filter2x:
- return 2U;
- case Anisotropy::Filter4x:
- return 4U;
- case Anisotropy::Filter8x:
- return 8U;
- case Anisotropy::Filter16x:
- return 16U;
- }
-}
-
} // Anonymous namespace
std::array<float, 4> TSCEntry::BorderColor() const noexcept {
@@ -78,7 +62,18 @@ std::array<float, 4> TSCEntry::BorderColor() const noexcept {
}
float TSCEntry::MaxAnisotropy() const noexcept {
- return static_cast<float>(std::max(1U << max_anisotropy, SettingsMinimumAnisotropy()));
+ if (max_anisotropy == 0 && mipmap_filter != TextureMipmapFilter::Linear) {
+ return 1.0f;
+ }
+ const auto anisotropic_settings = Settings::values.max_anisotropy.GetValue();
+ u32 added_anisotropic{};
+ if (anisotropic_settings == 0) {
+ added_anisotropic = Settings::values.resolution_info.up_scale >>
+ Settings::values.resolution_info.down_shift;
+ } else {
+ added_anisotropic = Settings::values.max_anisotropy.GetValue() - 1U;
+ }
+ return static_cast<float>(1U << (max_anisotropy + added_anisotropic));
}
} // namespace Tegra::Texture
diff --git a/src/video_core/video_core.cpp b/src/video_core/video_core.cpp
index cae543a51..e852c817e 100644
--- a/src/video_core/video_core.cpp
+++ b/src/video_core/video_core.cpp
@@ -37,6 +37,8 @@ std::unique_ptr<VideoCore::RendererBase> CreateRenderer(
namespace VideoCore {
std::unique_ptr<Tegra::GPU> CreateGPU(Core::Frontend::EmuWindow& emu_window, Core::System& system) {
+ Settings::UpdateRescalingInfo();
+
const auto nvdec_value = Settings::values.nvdec_emulation.GetValue();
const bool use_nvdec = nvdec_value != Settings::NvdecEmulation::Off;
const bool use_async = Settings::values.use_asynchronous_gpu_emulation.GetValue();
@@ -53,11 +55,10 @@ std::unique_ptr<Tegra::GPU> CreateGPU(Core::Frontend::EmuWindow& emu_window, Cor
}
}
-u16 GetResolutionScaleFactor(const RendererBase& renderer) {
- return static_cast<u16>(
- Settings::values.resolution_factor.GetValue() != 0
- ? Settings::values.resolution_factor.GetValue()
- : renderer.GetRenderWindow().GetFramebufferLayout().GetScalingRatio());
+float GetResolutionScaleFactor(const RendererBase& renderer) {
+ return Settings::values.resolution_info.active
+ ? Settings::values.resolution_info.up_factor
+ : renderer.GetRenderWindow().GetFramebufferLayout().GetScalingRatio();
}
} // namespace VideoCore
diff --git a/src/video_core/video_core.h b/src/video_core/video_core.h
index f5c27125d..f86877e86 100644
--- a/src/video_core/video_core.h
+++ b/src/video_core/video_core.h
@@ -25,6 +25,6 @@ class RendererBase;
/// Creates an emulated GPU instance using the given system context.
std::unique_ptr<Tegra::GPU> CreateGPU(Core::Frontend::EmuWindow& emu_window, Core::System& system);
-u16 GetResolutionScaleFactor(const RendererBase& renderer);
+float GetResolutionScaleFactor(const RendererBase& renderer);
} // namespace VideoCore
diff --git a/src/video_core/vulkan_common/vulkan_device.h b/src/video_core/vulkan_common/vulkan_device.h
index 2d5daf6cd..10653ac6b 100644
--- a/src/video_core/vulkan_common/vulkan_device.h
+++ b/src/video_core/vulkan_common/vulkan_device.h
@@ -40,6 +40,10 @@ public:
VkFormat GetSupportedFormat(VkFormat wanted_format, VkFormatFeatureFlags wanted_usage,
FormatType format_type) const;
+ /// Returns true if a format is supported.
+ bool IsFormatSupported(VkFormat wanted_format, VkFormatFeatureFlags wanted_usage,
+ FormatType format_type) const;
+
/// Reports a device loss.
void ReportLoss() const;
@@ -370,10 +374,6 @@ private:
/// Returns true if the device natively supports blitting depth stencil images.
bool TestDepthStencilBlits() const;
- /// Returns true if a format is supported.
- bool IsFormatSupported(VkFormat wanted_format, VkFormatFeatureFlags wanted_usage,
- FormatType format_type) const;
-
VkInstance instance; ///< Vulkan instance.
vk::DeviceDispatch dld; ///< Device function pointers.
vk::PhysicalDevice physical; ///< Physical device.