summaryrefslogtreecommitdiffstats
path: root/src/video_core
diff options
context:
space:
mode:
Diffstat (limited to 'src/video_core')
-rw-r--r--src/video_core/engines/maxwell_3d.h9
-rw-r--r--src/video_core/gpu.cpp65
-rw-r--r--src/video_core/gpu.h7
-rw-r--r--src/video_core/gpu_thread.cpp4
-rw-r--r--src/video_core/morton.cpp2
-rw-r--r--src/video_core/renderer_base.h10
-rw-r--r--src/video_core/renderer_opengl/gl_resource_manager.cpp18
-rw-r--r--src/video_core/renderer_opengl/gl_resource_manager.h25
-rw-r--r--src/video_core/renderer_opengl/gl_state.cpp15
-rw-r--r--src/video_core/renderer_opengl/gl_state.h4
-rw-r--r--src/video_core/renderer_opengl/gl_texture_cache.cpp29
-rw-r--r--src/video_core/renderer_opengl/maxwell_to_gl.h24
-rw-r--r--src/video_core/renderer_opengl/renderer_opengl.cpp272
-rw-r--r--src/video_core/renderer_opengl/renderer_opengl.h26
-rw-r--r--src/video_core/renderer_vulkan/maxwell_to_vk.cpp15
-rw-r--r--src/video_core/renderer_vulkan/renderer_vulkan.cpp15
-rw-r--r--src/video_core/renderer_vulkan/renderer_vulkan.h8
-rw-r--r--src/video_core/renderer_vulkan/vk_device.cpp1
-rw-r--r--src/video_core/renderer_vulkan/vk_rasterizer.cpp17
-rw-r--r--src/video_core/renderer_vulkan/vk_rasterizer.h10
-rw-r--r--src/video_core/renderer_vulkan/vk_shader_decompiler.cpp6
-rw-r--r--src/video_core/renderer_vulkan/vk_texture_cache.h4
-rw-r--r--src/video_core/shader/decode/arithmetic.cpp33
-rw-r--r--src/video_core/shader/decode/arithmetic_integer.cpp94
-rw-r--r--src/video_core/shader/track.cpp20
-rw-r--r--src/video_core/surface.cpp2
-rw-r--r--src/video_core/surface.h72
-rw-r--r--src/video_core/texture_cache/format_lookup_table.cpp3
-rw-r--r--src/video_core/texture_cache/surface_params.cpp47
-rw-r--r--src/video_core/texture_cache/surface_params.h5
-rw-r--r--src/video_core/texture_cache/texture_cache.h5
31 files changed, 599 insertions, 268 deletions
diff --git a/src/video_core/engines/maxwell_3d.h b/src/video_core/engines/maxwell_3d.h
index 26939be3f..6ea7cc6a5 100644
--- a/src/video_core/engines/maxwell_3d.h
+++ b/src/video_core/engines/maxwell_3d.h
@@ -542,7 +542,7 @@ public:
BitField<12, 1, InvMemoryLayout> type;
} memory_layout;
union {
- BitField<0, 16, u32> array_mode;
+ BitField<0, 16, u32> layers;
BitField<16, 1, u32> volume;
};
u32 layer_stride;
@@ -800,8 +800,12 @@ public:
u32 zeta_width;
u32 zeta_height;
+ union {
+ BitField<0, 16, u32> zeta_layers;
+ BitField<16, 1, u32> zeta_volume;
+ };
- INSERT_UNION_PADDING_WORDS(0x27);
+ INSERT_UNION_PADDING_WORDS(0x26);
u32 depth_test_enable;
@@ -1507,6 +1511,7 @@ ASSERT_REG_POSITION(vertex_attrib_format, 0x458);
ASSERT_REG_POSITION(rt_control, 0x487);
ASSERT_REG_POSITION(zeta_width, 0x48a);
ASSERT_REG_POSITION(zeta_height, 0x48b);
+ASSERT_REG_POSITION(zeta_layers, 0x48c);
ASSERT_REG_POSITION(depth_test_enable, 0x4B3);
ASSERT_REG_POSITION(independent_blend_enable, 0x4B9);
ASSERT_REG_POSITION(depth_write_enabled, 0x4BA);
diff --git a/src/video_core/gpu.cpp b/src/video_core/gpu.cpp
index 7d7137109..e8f763ce9 100644
--- a/src/video_core/gpu.cpp
+++ b/src/video_core/gpu.cpp
@@ -140,71 +140,6 @@ void GPU::FlushCommands() {
renderer.Rasterizer().FlushCommands();
}
-u32 RenderTargetBytesPerPixel(RenderTargetFormat format) {
- ASSERT(format != RenderTargetFormat::NONE);
-
- switch (format) {
- case RenderTargetFormat::RGBA32_FLOAT:
- case RenderTargetFormat::RGBA32_UINT:
- return 16;
- case RenderTargetFormat::RGBA16_UINT:
- case RenderTargetFormat::RGBA16_UNORM:
- case RenderTargetFormat::RGBA16_FLOAT:
- case RenderTargetFormat::RGBX16_FLOAT:
- case RenderTargetFormat::RG32_FLOAT:
- case RenderTargetFormat::RG32_UINT:
- return 8;
- case RenderTargetFormat::RGBA8_UNORM:
- case RenderTargetFormat::RGBA8_SNORM:
- case RenderTargetFormat::RGBA8_SRGB:
- case RenderTargetFormat::RGBA8_UINT:
- case RenderTargetFormat::RGB10_A2_UNORM:
- case RenderTargetFormat::BGRA8_UNORM:
- case RenderTargetFormat::BGRA8_SRGB:
- case RenderTargetFormat::RG16_UNORM:
- case RenderTargetFormat::RG16_SNORM:
- case RenderTargetFormat::RG16_UINT:
- case RenderTargetFormat::RG16_SINT:
- case RenderTargetFormat::RG16_FLOAT:
- case RenderTargetFormat::R32_FLOAT:
- case RenderTargetFormat::R11G11B10_FLOAT:
- case RenderTargetFormat::R32_UINT:
- return 4;
- case RenderTargetFormat::R16_UNORM:
- case RenderTargetFormat::R16_SNORM:
- case RenderTargetFormat::R16_UINT:
- case RenderTargetFormat::R16_SINT:
- case RenderTargetFormat::R16_FLOAT:
- case RenderTargetFormat::RG8_UNORM:
- case RenderTargetFormat::RG8_SNORM:
- return 2;
- case RenderTargetFormat::R8_UNORM:
- case RenderTargetFormat::R8_UINT:
- return 1;
- default:
- UNIMPLEMENTED_MSG("Unimplemented render target format {}", static_cast<u32>(format));
- return 1;
- }
-}
-
-u32 DepthFormatBytesPerPixel(DepthFormat format) {
- switch (format) {
- case DepthFormat::Z32_S8_X24_FLOAT:
- return 8;
- case DepthFormat::Z32_FLOAT:
- case DepthFormat::S8_Z24_UNORM:
- case DepthFormat::Z24_X8_UNORM:
- case DepthFormat::Z24_S8_UNORM:
- case DepthFormat::Z24_C8_UNORM:
- return 4;
- case DepthFormat::Z16_UNORM:
- return 2;
- default:
- UNIMPLEMENTED_MSG("Unimplemented Depth format {}", static_cast<u32>(format));
- return 1;
- }
-}
-
// Note that, traditionally, methods are treated as 4-byte addressable locations, and hence
// their numbers are written down multiplied by 4 in Docs. Here we are not multiply by 4.
// So the values you see in docs might be multiplied by 4.
diff --git a/src/video_core/gpu.h b/src/video_core/gpu.h
index 07727210c..ba8c9d665 100644
--- a/src/video_core/gpu.h
+++ b/src/video_core/gpu.h
@@ -57,6 +57,7 @@ enum class RenderTargetFormat : u32 {
RG16_UINT = 0xDD,
RG16_FLOAT = 0xDE,
R11G11B10_FLOAT = 0xE0,
+ R32_SINT = 0xE3,
R32_UINT = 0xE4,
R32_FLOAT = 0xE5,
B5G6R5_UNORM = 0xE8,
@@ -82,12 +83,6 @@ enum class DepthFormat : u32 {
Z32_S8_X24_FLOAT = 0x19,
};
-/// Returns the number of bytes per pixel of each rendertarget format.
-u32 RenderTargetBytesPerPixel(RenderTargetFormat format);
-
-/// Returns the number of bytes per pixel of each depth format.
-u32 DepthFormatBytesPerPixel(DepthFormat format);
-
struct CommandListHeader;
class DebugContext;
diff --git a/src/video_core/gpu_thread.cpp b/src/video_core/gpu_thread.cpp
index 2cdf1aa7f..b1088af3d 100644
--- a/src/video_core/gpu_thread.cpp
+++ b/src/video_core/gpu_thread.cpp
@@ -5,7 +5,7 @@
#include "common/assert.h"
#include "common/microprofile.h"
#include "core/core.h"
-#include "core/frontend/scope_acquire_window_context.h"
+#include "core/frontend/scope_acquire_context.h"
#include "video_core/dma_pusher.h"
#include "video_core/gpu.h"
#include "video_core/gpu_thread.h"
@@ -27,7 +27,7 @@ static void RunThread(VideoCore::RendererBase& renderer, Tegra::DmaPusher& dma_p
return;
}
- Core::Frontend::ScopeAcquireWindowContext acquire_context{renderer.GetRenderWindow()};
+ Core::Frontend::ScopeAcquireContext acquire_context{renderer.GetRenderWindow()};
CommandDataContainer next;
while (state.is_running) {
diff --git a/src/video_core/morton.cpp b/src/video_core/morton.cpp
index 2f2fe6859..f2c83266e 100644
--- a/src/video_core/morton.cpp
+++ b/src/video_core/morton.cpp
@@ -85,6 +85,7 @@ static constexpr ConversionArray morton_to_linear_fns = {
MortonCopy<true, PixelFormat::RG32UI>,
MortonCopy<true, PixelFormat::RGBX16F>,
MortonCopy<true, PixelFormat::R32UI>,
+ MortonCopy<true, PixelFormat::R32I>,
MortonCopy<true, PixelFormat::ASTC_2D_8X8>,
MortonCopy<true, PixelFormat::ASTC_2D_8X5>,
MortonCopy<true, PixelFormat::ASTC_2D_5X4>,
@@ -166,6 +167,7 @@ static constexpr ConversionArray linear_to_morton_fns = {
MortonCopy<false, PixelFormat::RG32UI>,
MortonCopy<false, PixelFormat::RGBX16F>,
MortonCopy<false, PixelFormat::R32UI>,
+ MortonCopy<false, PixelFormat::R32I>,
nullptr,
nullptr,
nullptr,
diff --git a/src/video_core/renderer_base.h b/src/video_core/renderer_base.h
index af1bebc4f..5ec99a126 100644
--- a/src/video_core/renderer_base.h
+++ b/src/video_core/renderer_base.h
@@ -35,15 +35,19 @@ public:
explicit RendererBase(Core::Frontend::EmuWindow& window);
virtual ~RendererBase();
- /// Swap buffers (render frame)
- virtual void SwapBuffers(const Tegra::FramebufferConfig* framebuffer) = 0;
-
/// Initialize the renderer
virtual bool Init() = 0;
/// Shutdown the renderer
virtual void ShutDown() = 0;
+ /// Finalize rendering the guest frame and draw into the presentation texture
+ virtual void SwapBuffers(const Tegra::FramebufferConfig* framebuffer) = 0;
+
+ /// Draws the latest frame to the window waiting timeout_ms for a frame to arrive (Renderer
+ /// specific implementation)
+ virtual void TryPresent(int timeout_ms) = 0;
+
// Getter/setter functions:
// ------------------------
diff --git a/src/video_core/renderer_opengl/gl_resource_manager.cpp b/src/video_core/renderer_opengl/gl_resource_manager.cpp
index f0ddfb276..c0aee770f 100644
--- a/src/video_core/renderer_opengl/gl_resource_manager.cpp
+++ b/src/video_core/renderer_opengl/gl_resource_manager.cpp
@@ -15,6 +15,24 @@ MICROPROFILE_DEFINE(OpenGL_ResourceDeletion, "OpenGL", "Resource Deletion", MP_R
namespace OpenGL {
+void OGLRenderbuffer::Create() {
+ if (handle != 0)
+ return;
+
+ MICROPROFILE_SCOPE(OpenGL_ResourceCreation);
+ glGenRenderbuffers(1, &handle);
+}
+
+void OGLRenderbuffer::Release() {
+ if (handle == 0)
+ return;
+
+ MICROPROFILE_SCOPE(OpenGL_ResourceDeletion);
+ glDeleteRenderbuffers(1, &handle);
+ OpenGLState::GetCurState().ResetRenderbuffer(handle).Apply();
+ handle = 0;
+}
+
void OGLTexture::Create(GLenum target) {
if (handle != 0)
return;
diff --git a/src/video_core/renderer_opengl/gl_resource_manager.h b/src/video_core/renderer_opengl/gl_resource_manager.h
index 514d1d165..995a4e45e 100644
--- a/src/video_core/renderer_opengl/gl_resource_manager.h
+++ b/src/video_core/renderer_opengl/gl_resource_manager.h
@@ -11,6 +11,31 @@
namespace OpenGL {
+class OGLRenderbuffer : private NonCopyable {
+public:
+ OGLRenderbuffer() = default;
+
+ OGLRenderbuffer(OGLRenderbuffer&& o) noexcept : handle(std::exchange(o.handle, 0)) {}
+
+ ~OGLRenderbuffer() {
+ Release();
+ }
+
+ OGLRenderbuffer& operator=(OGLRenderbuffer&& o) noexcept {
+ Release();
+ handle = std::exchange(o.handle, 0);
+ return *this;
+ }
+
+ /// Creates a new internal OpenGL resource and stores the handle
+ void Create();
+
+ /// Deletes the internal OpenGL resource
+ void Release();
+
+ GLuint handle = 0;
+};
+
class OGLTexture : private NonCopyable {
public:
OGLTexture() = default;
diff --git a/src/video_core/renderer_opengl/gl_state.cpp b/src/video_core/renderer_opengl/gl_state.cpp
index ab1f7983c..7d3bc1a1f 100644
--- a/src/video_core/renderer_opengl/gl_state.cpp
+++ b/src/video_core/renderer_opengl/gl_state.cpp
@@ -423,6 +423,13 @@ void OpenGLState::ApplyClipControl() {
}
}
+void OpenGLState::ApplyRenderBuffer() {
+ if (cur_state.renderbuffer != renderbuffer) {
+ cur_state.renderbuffer = renderbuffer;
+ glBindRenderbuffer(GL_RENDERBUFFER, renderbuffer);
+ }
+}
+
void OpenGLState::ApplyTextures() {
const std::size_t size = std::size(textures);
for (std::size_t i = 0; i < size; ++i) {
@@ -478,6 +485,7 @@ void OpenGLState::Apply() {
ApplyPolygonOffset();
ApplyAlphaTest();
ApplyClipControl();
+ ApplyRenderBuffer();
}
void OpenGLState::EmulateViewportWithScissor() {
@@ -551,4 +559,11 @@ OpenGLState& OpenGLState::ResetFramebuffer(GLuint handle) {
return *this;
}
+OpenGLState& OpenGLState::ResetRenderbuffer(GLuint handle) {
+ if (renderbuffer == handle) {
+ renderbuffer = 0;
+ }
+ return *this;
+}
+
} // namespace OpenGL
diff --git a/src/video_core/renderer_opengl/gl_state.h b/src/video_core/renderer_opengl/gl_state.h
index 4953eeda2..bce662f2c 100644
--- a/src/video_core/renderer_opengl/gl_state.h
+++ b/src/video_core/renderer_opengl/gl_state.h
@@ -158,6 +158,8 @@ public:
GLenum depth_mode = GL_NEGATIVE_ONE_TO_ONE;
} clip_control;
+ GLuint renderbuffer{}; // GL_RENDERBUFFER_BINDING
+
OpenGLState();
/// Get the currently active OpenGL state
@@ -196,6 +198,7 @@ public:
void ApplyPolygonOffset();
void ApplyAlphaTest();
void ApplyClipControl();
+ void ApplyRenderBuffer();
/// Resets any references to the given resource
OpenGLState& UnbindTexture(GLuint handle);
@@ -204,6 +207,7 @@ public:
OpenGLState& ResetPipeline(GLuint handle);
OpenGLState& ResetVertexArray(GLuint handle);
OpenGLState& ResetFramebuffer(GLuint handle);
+ OpenGLState& ResetRenderbuffer(GLuint handle);
/// Viewport does not affects glClearBuffer so emulate viewport using scissor test
void EmulateViewportWithScissor();
diff --git a/src/video_core/renderer_opengl/gl_texture_cache.cpp b/src/video_core/renderer_opengl/gl_texture_cache.cpp
index 5c1ae1418..cf934b0d8 100644
--- a/src/video_core/renderer_opengl/gl_texture_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_texture_cache.cpp
@@ -87,6 +87,7 @@ constexpr std::array<FormatTuple, VideoCore::Surface::MaxPixelFormat> tex_format
{GL_RG32UI, GL_RG_INTEGER, GL_UNSIGNED_INT, false}, // RG32UI
{GL_RGB16F, GL_RGBA, GL_HALF_FLOAT, false}, // RGBX16F
{GL_R32UI, GL_RED_INTEGER, GL_UNSIGNED_INT, false}, // R32UI
+ {GL_R32I, GL_RED_INTEGER, GL_INT, false}, // R32I
{GL_RGBA8, GL_RGBA, GL_UNSIGNED_BYTE, false}, // ASTC_2D_8X8
{GL_RGBA8, GL_RGBA, GL_UNSIGNED_BYTE, false}, // ASTC_2D_8X5
{GL_RGBA8, GL_RGBA, GL_UNSIGNED_BYTE, false}, // ASTC_2D_5X4
@@ -405,24 +406,36 @@ CachedSurfaceView::CachedSurfaceView(CachedSurface& surface, const ViewParams& p
CachedSurfaceView::~CachedSurfaceView() = default;
void CachedSurfaceView::Attach(GLenum attachment, GLenum target) const {
- ASSERT(params.num_layers == 1 && params.num_levels == 1);
+ ASSERT(params.num_levels == 1);
- const auto& owner_params = surface.GetSurfaceParams();
+ const GLuint texture = surface.GetTexture();
+ if (params.num_layers > 1) {
+ // Layered framebuffer attachments
+ UNIMPLEMENTED_IF(params.base_layer != 0);
+
+ switch (params.target) {
+ case SurfaceTarget::Texture2DArray:
+ glFramebufferTexture(target, attachment, texture, params.base_level);
+ break;
+ default:
+ UNIMPLEMENTED();
+ }
+ return;
+ }
- switch (owner_params.target) {
+ const GLenum view_target = surface.GetTarget();
+ switch (surface.GetSurfaceParams().target) {
case SurfaceTarget::Texture1D:
- glFramebufferTexture1D(target, attachment, surface.GetTarget(), surface.GetTexture(),
- params.base_level);
+ glFramebufferTexture1D(target, attachment, view_target, texture, params.base_level);
break;
case SurfaceTarget::Texture2D:
- glFramebufferTexture2D(target, attachment, surface.GetTarget(), surface.GetTexture(),
- params.base_level);
+ glFramebufferTexture2D(target, attachment, view_target, texture, params.base_level);
break;
case SurfaceTarget::Texture1DArray:
case SurfaceTarget::Texture2DArray:
case SurfaceTarget::TextureCubemap:
case SurfaceTarget::TextureCubeArray:
- glFramebufferTextureLayer(target, attachment, surface.GetTexture(), params.base_level,
+ glFramebufferTextureLayer(target, attachment, texture, params.base_level,
params.base_layer);
break;
default:
diff --git a/src/video_core/renderer_opengl/maxwell_to_gl.h b/src/video_core/renderer_opengl/maxwell_to_gl.h
index 7ed505628..d3dea3659 100644
--- a/src/video_core/renderer_opengl/maxwell_to_gl.h
+++ b/src/video_core/renderer_opengl/maxwell_to_gl.h
@@ -92,8 +92,32 @@ inline GLenum VertexType(Maxwell::VertexAttribute attrib) {
}
case Maxwell::VertexAttribute::Type::UnsignedScaled:
switch (attrib.size) {
+ case Maxwell::VertexAttribute::Size::Size_8:
case Maxwell::VertexAttribute::Size::Size_8_8:
+ case Maxwell::VertexAttribute::Size::Size_8_8_8:
+ case Maxwell::VertexAttribute::Size::Size_8_8_8_8:
return GL_UNSIGNED_BYTE;
+ case Maxwell::VertexAttribute::Size::Size_16:
+ case Maxwell::VertexAttribute::Size::Size_16_16:
+ case Maxwell::VertexAttribute::Size::Size_16_16_16:
+ case Maxwell::VertexAttribute::Size::Size_16_16_16_16:
+ return GL_UNSIGNED_SHORT;
+ default:
+ LOG_ERROR(Render_OpenGL, "Unimplemented vertex size={}", attrib.SizeString());
+ return {};
+ }
+ case Maxwell::VertexAttribute::Type::SignedScaled:
+ switch (attrib.size) {
+ case Maxwell::VertexAttribute::Size::Size_8:
+ case Maxwell::VertexAttribute::Size::Size_8_8:
+ case Maxwell::VertexAttribute::Size::Size_8_8_8:
+ case Maxwell::VertexAttribute::Size::Size_8_8_8_8:
+ return GL_BYTE;
+ case Maxwell::VertexAttribute::Size::Size_16:
+ case Maxwell::VertexAttribute::Size::Size_16_16:
+ case Maxwell::VertexAttribute::Size::Size_16_16_16:
+ case Maxwell::VertexAttribute::Size::Size_16_16_16_16:
+ return GL_SHORT;
default:
LOG_ERROR(Render_OpenGL, "Unimplemented vertex size={}", attrib.SizeString());
return {};
diff --git a/src/video_core/renderer_opengl/renderer_opengl.cpp b/src/video_core/renderer_opengl/renderer_opengl.cpp
index bba16afaf..a4340b502 100644
--- a/src/video_core/renderer_opengl/renderer_opengl.cpp
+++ b/src/video_core/renderer_opengl/renderer_opengl.cpp
@@ -9,11 +9,11 @@
#include <glad/glad.h>
#include "common/assert.h"
#include "common/logging/log.h"
+#include "common/microprofile.h"
#include "common/telemetry.h"
#include "core/core.h"
#include "core/core_timing.h"
#include "core/frontend/emu_window.h"
-#include "core/frontend/scope_acquire_window_context.h"
#include "core/memory.h"
#include "core/perf_stats.h"
#include "core/settings.h"
@@ -24,6 +24,144 @@
namespace OpenGL {
+// If the size of this is too small, it ends up creating a soft cap on FPS as the renderer will have
+// to wait on available presentation frames.
+constexpr std::size_t SWAP_CHAIN_SIZE = 3;
+
+struct Frame {
+ u32 width{}; /// Width of the frame (to detect resize)
+ u32 height{}; /// Height of the frame
+ bool color_reloaded{}; /// Texture attachment was recreated (ie: resized)
+ OpenGL::OGLRenderbuffer color{}; /// Buffer shared between the render/present FBO
+ OpenGL::OGLFramebuffer render{}; /// FBO created on the render thread
+ OpenGL::OGLFramebuffer present{}; /// FBO created on the present thread
+ GLsync render_fence{}; /// Fence created on the render thread
+ GLsync present_fence{}; /// Fence created on the presentation thread
+ bool is_srgb{}; /// Framebuffer is sRGB or RGB
+};
+
+/**
+ * For smooth Vsync rendering, we want to always present the latest frame that the core generates,
+ * but also make sure that rendering happens at the pace that the frontend dictates. This is a
+ * helper class that the renderer uses to sync frames between the render thread and the presentation
+ * thread
+ */
+class FrameMailbox {
+public:
+ std::mutex swap_chain_lock;
+ std::condition_variable present_cv;
+ std::array<Frame, SWAP_CHAIN_SIZE> swap_chain{};
+ std::queue<Frame*> free_queue;
+ std::deque<Frame*> present_queue;
+ Frame* previous_frame{};
+
+ FrameMailbox() {
+ for (auto& frame : swap_chain) {
+ free_queue.push(&frame);
+ }
+ }
+
+ ~FrameMailbox() {
+ // lock the mutex and clear out the present and free_queues and notify any people who are
+ // blocked to prevent deadlock on shutdown
+ std::scoped_lock lock{swap_chain_lock};
+ std::queue<Frame*>().swap(free_queue);
+ present_queue.clear();
+ present_cv.notify_all();
+ }
+
+ void ReloadPresentFrame(Frame* frame, u32 height, u32 width) {
+ frame->present.Release();
+ frame->present.Create();
+ GLint previous_draw_fbo{};
+ glGetIntegerv(GL_DRAW_FRAMEBUFFER_BINDING, &previous_draw_fbo);
+ glBindFramebuffer(GL_FRAMEBUFFER, frame->present.handle);
+ glFramebufferRenderbuffer(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_RENDERBUFFER,
+ frame->color.handle);
+ if (glCheckFramebufferStatus(GL_FRAMEBUFFER) != GL_FRAMEBUFFER_COMPLETE) {
+ LOG_CRITICAL(Render_OpenGL, "Failed to recreate present FBO!");
+ }
+ glBindFramebuffer(GL_DRAW_FRAMEBUFFER, previous_draw_fbo);
+ frame->color_reloaded = false;
+ }
+
+ void ReloadRenderFrame(Frame* frame, u32 width, u32 height) {
+ OpenGLState prev_state = OpenGLState::GetCurState();
+ OpenGLState state = OpenGLState::GetCurState();
+
+ // Recreate the color texture attachment
+ frame->color.Release();
+ frame->color.Create();
+ state.renderbuffer = frame->color.handle;
+ state.Apply();
+ glRenderbufferStorage(GL_RENDERBUFFER, frame->is_srgb ? GL_SRGB8 : GL_RGB8, width, height);
+
+ // Recreate the FBO for the render target
+ frame->render.Release();
+ frame->render.Create();
+ state.draw.read_framebuffer = frame->render.handle;
+ state.draw.draw_framebuffer = frame->render.handle;
+ state.Apply();
+ glFramebufferRenderbuffer(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_RENDERBUFFER,
+ frame->color.handle);
+ if (glCheckFramebufferStatus(GL_FRAMEBUFFER) != GL_FRAMEBUFFER_COMPLETE) {
+ LOG_CRITICAL(Render_OpenGL, "Failed to recreate render FBO!");
+ }
+ prev_state.Apply();
+ frame->width = width;
+ frame->height = height;
+ frame->color_reloaded = true;
+ }
+
+ Frame* GetRenderFrame() {
+ std::unique_lock lock{swap_chain_lock};
+
+ // If theres no free frames, we will reuse the oldest render frame
+ if (free_queue.empty()) {
+ auto frame = present_queue.back();
+ present_queue.pop_back();
+ return frame;
+ }
+
+ Frame* frame = free_queue.front();
+ free_queue.pop();
+ return frame;
+ }
+
+ void ReleaseRenderFrame(Frame* frame) {
+ std::unique_lock lock{swap_chain_lock};
+ present_queue.push_front(frame);
+ present_cv.notify_one();
+ }
+
+ Frame* TryGetPresentFrame(int timeout_ms) {
+ std::unique_lock lock{swap_chain_lock};
+ // wait for new entries in the present_queue
+ present_cv.wait_for(lock, std::chrono::milliseconds(timeout_ms),
+ [&] { return !present_queue.empty(); });
+ if (present_queue.empty()) {
+ // timed out waiting for a frame to draw so return the previous frame
+ return previous_frame;
+ }
+
+ // free the previous frame and add it back to the free queue
+ if (previous_frame) {
+ free_queue.push(previous_frame);
+ }
+
+ // the newest entries are pushed to the front of the queue
+ Frame* frame = present_queue.front();
+ present_queue.pop_front();
+ // remove all old entries from the present queue and move them back to the free_queue
+ for (auto f : present_queue) {
+ free_queue.push(f);
+ }
+ present_queue.clear();
+ previous_frame = frame;
+ return frame;
+ }
+};
+
namespace {
constexpr char vertex_shader[] = R"(
@@ -158,21 +296,91 @@ void APIENTRY DebugHandler(GLenum source, GLenum type, GLuint id, GLenum severit
} // Anonymous namespace
RendererOpenGL::RendererOpenGL(Core::Frontend::EmuWindow& emu_window, Core::System& system)
- : VideoCore::RendererBase{emu_window}, emu_window{emu_window}, system{system} {}
+ : VideoCore::RendererBase{emu_window}, emu_window{emu_window}, system{system},
+ frame_mailbox{std::make_unique<FrameMailbox>()} {}
RendererOpenGL::~RendererOpenGL() = default;
+MICROPROFILE_DEFINE(OpenGL_RenderFrame, "OpenGL", "Render Frame", MP_RGB(128, 128, 64));
+MICROPROFILE_DEFINE(OpenGL_WaitPresent, "OpenGL", "Wait For Present", MP_RGB(128, 128, 128));
+
void RendererOpenGL::SwapBuffers(const Tegra::FramebufferConfig* framebuffer) {
+ render_window.PollEvents();
+
+ if (!framebuffer) {
+ return;
+ }
+
// Maintain the rasterizer's state as a priority
OpenGLState prev_state = OpenGLState::GetCurState();
state.AllDirty();
state.Apply();
+ PrepareRendertarget(framebuffer);
+ RenderScreenshot();
+
+ Frame* frame;
+ {
+ MICROPROFILE_SCOPE(OpenGL_WaitPresent);
+
+ frame = frame_mailbox->GetRenderFrame();
+
+ // Clean up sync objects before drawing
+
+ // INTEL driver workaround. We can't delete the previous render sync object until we are
+ // sure that the presentation is done
+ if (frame->present_fence) {
+ glClientWaitSync(frame->present_fence, 0, GL_TIMEOUT_IGNORED);
+ }
+
+ // delete the draw fence if the frame wasn't presented
+ if (frame->render_fence) {
+ glDeleteSync(frame->render_fence);
+ frame->render_fence = 0;
+ }
+
+ // wait for the presentation to be done
+ if (frame->present_fence) {
+ glWaitSync(frame->present_fence, 0, GL_TIMEOUT_IGNORED);
+ glDeleteSync(frame->present_fence);
+ frame->present_fence = 0;
+ }
+ }
+
+ {
+ MICROPROFILE_SCOPE(OpenGL_RenderFrame);
+ const auto& layout = render_window.GetFramebufferLayout();
+
+ // Recreate the frame if the size of the window has changed
+ if (layout.width != frame->width || layout.height != frame->height ||
+ screen_info.display_srgb != frame->is_srgb) {
+ LOG_DEBUG(Render_OpenGL, "Reloading render frame");
+ frame->is_srgb = screen_info.display_srgb;
+ frame_mailbox->ReloadRenderFrame(frame, layout.width, layout.height);
+ }
+ state.draw.draw_framebuffer = frame->render.handle;
+ state.Apply();
+ DrawScreen(layout);
+ // Create a fence for the frontend to wait on and swap this frame to OffTex
+ frame->render_fence = glFenceSync(GL_SYNC_GPU_COMMANDS_COMPLETE, 0);
+ glFlush();
+ frame_mailbox->ReleaseRenderFrame(frame);
+ m_current_frame++;
+ rasterizer->TickFrame();
+ }
+
+ // Restore the rasterizer state
+ prev_state.AllDirty();
+ prev_state.Apply();
+}
+
+void RendererOpenGL::PrepareRendertarget(const Tegra::FramebufferConfig* framebuffer) {
if (framebuffer) {
// If framebuffer is provided, reload it from memory to a texture
if (screen_info.texture.width != static_cast<GLsizei>(framebuffer->width) ||
screen_info.texture.height != static_cast<GLsizei>(framebuffer->height) ||
- screen_info.texture.pixel_format != framebuffer->pixel_format) {
+ screen_info.texture.pixel_format != framebuffer->pixel_format ||
+ gl_framebuffer_data.empty()) {
// Reallocate texture if the framebuffer size has changed.
// This is expected to not happen very often and hence should not be a
// performance problem.
@@ -181,22 +389,7 @@ void RendererOpenGL::SwapBuffers(const Tegra::FramebufferConfig* framebuffer) {
// Load the framebuffer from memory, draw it to the screen, and swap buffers
LoadFBToScreenInfo(*framebuffer);
-
- if (renderer_settings.screenshot_requested)
- CaptureScreenshot();
-
- DrawScreen(render_window.GetFramebufferLayout());
-
- rasterizer->TickFrame();
-
- render_window.SwapBuffers();
}
-
- render_window.PollEvents();
-
- // Restore the rasterizer state
- prev_state.AllDirty();
- prev_state.Apply();
}
void RendererOpenGL::LoadFBToScreenInfo(const Tegra::FramebufferConfig& framebuffer) {
@@ -418,13 +611,48 @@ void RendererOpenGL::DrawScreen(const Layout::FramebufferLayout& layout) {
DrawScreenTriangles(screen_info, static_cast<float>(screen.left),
static_cast<float>(screen.top), static_cast<float>(screen.GetWidth()),
static_cast<float>(screen.GetHeight()));
+}
- m_current_frame++;
+void RendererOpenGL::TryPresent(int timeout_ms) {
+ const auto& layout = render_window.GetFramebufferLayout();
+ auto frame = frame_mailbox->TryGetPresentFrame(timeout_ms);
+ if (!frame) {
+ LOG_DEBUG(Render_OpenGL, "TryGetPresentFrame returned no frame to present");
+ return;
+ }
+
+ // Clearing before a full overwrite of a fbo can signal to drivers that they can avoid a
+ // readback since we won't be doing any blending
+ glClear(GL_COLOR_BUFFER_BIT);
+
+ // Recreate the presentation FBO if the color attachment was changed
+ if (frame->color_reloaded) {
+ LOG_DEBUG(Render_OpenGL, "Reloading present frame");
+ frame_mailbox->ReloadPresentFrame(frame, layout.width, layout.height);
+ }
+ glWaitSync(frame->render_fence, 0, GL_TIMEOUT_IGNORED);
+ // INTEL workaround.
+ // Normally we could just delete the draw fence here, but due to driver bugs, we can just delete
+ // it on the emulation thread without too much penalty
+ // glDeleteSync(frame.render_sync);
+ // frame.render_sync = 0;
+
+ glBindFramebuffer(GL_READ_FRAMEBUFFER, frame->present.handle);
+ glBlitFramebuffer(0, 0, frame->width, frame->height, 0, 0, layout.width, layout.height,
+ GL_COLOR_BUFFER_BIT, GL_LINEAR);
+
+ // Insert fence for the main thread to block on
+ frame->present_fence = glFenceSync(GL_SYNC_GPU_COMMANDS_COMPLETE, 0);
+ glFlush();
+
+ glBindFramebuffer(GL_READ_FRAMEBUFFER, 0);
}
-void RendererOpenGL::UpdateFramerate() {}
+void RendererOpenGL::RenderScreenshot() {
+ if (!renderer_settings.screenshot_requested) {
+ return;
+ }
-void RendererOpenGL::CaptureScreenshot() {
// Draw the current frame to the screenshot framebuffer
screenshot_framebuffer.Create();
GLuint old_read_fb = state.draw.read_framebuffer;
@@ -459,8 +687,6 @@ void RendererOpenGL::CaptureScreenshot() {
}
bool RendererOpenGL::Init() {
- Core::Frontend::ScopeAcquireWindowContext acquire_context{render_window};
-
if (GLAD_GL_KHR_debug) {
glEnable(GL_DEBUG_OUTPUT);
glDebugMessageCallback(DebugHandler, nullptr);
diff --git a/src/video_core/renderer_opengl/renderer_opengl.h b/src/video_core/renderer_opengl/renderer_opengl.h
index b56328a7f..d45e69cbc 100644
--- a/src/video_core/renderer_opengl/renderer_opengl.h
+++ b/src/video_core/renderer_opengl/renderer_opengl.h
@@ -44,19 +44,23 @@ struct ScreenInfo {
TextureInfo texture;
};
+struct PresentationTexture {
+ u32 width = 0;
+ u32 height = 0;
+ OGLTexture texture;
+};
+
+class FrameMailbox;
+
class RendererOpenGL final : public VideoCore::RendererBase {
public:
explicit RendererOpenGL(Core::Frontend::EmuWindow& emu_window, Core::System& system);
~RendererOpenGL() override;
- /// Swap buffers (render frame)
- void SwapBuffers(const Tegra::FramebufferConfig* framebuffer) override;
-
- /// Initialize the renderer
bool Init() override;
-
- /// Shutdown the renderer
void ShutDown() override;
+ void SwapBuffers(const Tegra::FramebufferConfig* framebuffer) override;
+ void TryPresent(int timeout_ms) override;
private:
/// Initializes the OpenGL state and creates persistent objects.
@@ -74,10 +78,7 @@ private:
void DrawScreenTriangles(const ScreenInfo& screen_info, float x, float y, float w, float h);
- /// Updates the framerate.
- void UpdateFramerate();
-
- void CaptureScreenshot();
+ void RenderScreenshot();
/// Loads framebuffer from emulated memory into the active OpenGL texture.
void LoadFBToScreenInfo(const Tegra::FramebufferConfig& framebuffer);
@@ -87,6 +88,8 @@ private:
void LoadColorToActiveGLTexture(u8 color_r, u8 color_g, u8 color_b, u8 color_a,
const TextureInfo& texture);
+ void PrepareRendertarget(const Tegra::FramebufferConfig* framebuffer);
+
Core::Frontend::EmuWindow& emu_window;
Core::System& system;
@@ -107,6 +110,9 @@ private:
/// Used for transforming the framebuffer orientation
Tegra::FramebufferConfig::TransformFlags framebuffer_transform_flags;
Common::Rectangle<int> framebuffer_crop_rect;
+
+ /// Frame presentation mailbox
+ std::unique_ptr<FrameMailbox> frame_mailbox;
};
} // namespace OpenGL
diff --git a/src/video_core/renderer_vulkan/maxwell_to_vk.cpp b/src/video_core/renderer_vulkan/maxwell_to_vk.cpp
index 5403c3ab7..aad0c895b 100644
--- a/src/video_core/renderer_vulkan/maxwell_to_vk.cpp
+++ b/src/video_core/renderer_vulkan/maxwell_to_vk.cpp
@@ -159,6 +159,7 @@ struct FormatTuple {
{vk::Format::eR32G32Uint, Attachable | Storage}, // RG32UI
{vk::Format::eUndefined, {}}, // RGBX16F
{vk::Format::eR32Uint, Attachable | Storage}, // R32UI
+ {vk::Format::eR32Sint, Attachable | Storage}, // R32I
{vk::Format::eAstc8x8UnormBlock, {}}, // ASTC_2D_8X8
{vk::Format::eUndefined, {}}, // ASTC_2D_8X5
{vk::Format::eUndefined, {}}, // ASTC_2D_5X4
@@ -370,8 +371,22 @@ vk::Format VertexFormat(Maxwell::VertexAttribute::Type type, Maxwell::VertexAttr
}
case Maxwell::VertexAttribute::Type::UnsignedScaled:
switch (size) {
+ case Maxwell::VertexAttribute::Size::Size_8:
+ return vk::Format::eR8Uscaled;
case Maxwell::VertexAttribute::Size::Size_8_8:
return vk::Format::eR8G8Uscaled;
+ case Maxwell::VertexAttribute::Size::Size_8_8_8:
+ return vk::Format::eR8G8B8Uscaled;
+ case Maxwell::VertexAttribute::Size::Size_8_8_8_8:
+ return vk::Format::eR8G8B8A8Uscaled;
+ case Maxwell::VertexAttribute::Size::Size_16:
+ return vk::Format::eR16Uscaled;
+ case Maxwell::VertexAttribute::Size::Size_16_16:
+ return vk::Format::eR16G16Uscaled;
+ case Maxwell::VertexAttribute::Size::Size_16_16_16:
+ return vk::Format::eR16G16B16Uscaled;
+ case Maxwell::VertexAttribute::Size::Size_16_16_16_16:
+ return vk::Format::eR16G16B16A16Uscaled;
default:
break;
}
diff --git a/src/video_core/renderer_vulkan/renderer_vulkan.cpp b/src/video_core/renderer_vulkan/renderer_vulkan.cpp
index d5032b432..ddc62bc97 100644
--- a/src/video_core/renderer_vulkan/renderer_vulkan.cpp
+++ b/src/video_core/renderer_vulkan/renderer_vulkan.cpp
@@ -106,8 +106,14 @@ RendererVulkan::~RendererVulkan() {
}
void RendererVulkan::SwapBuffers(const Tegra::FramebufferConfig* framebuffer) {
+ render_window.PollEvents();
+
+ if (!framebuffer) {
+ return;
+ }
+
const auto& layout = render_window.GetFramebufferLayout();
- if (framebuffer && layout.width > 0 && layout.height > 0 && render_window.IsShown()) {
+ if (layout.width > 0 && layout.height > 0 && render_window.IsShown()) {
const VAddr framebuffer_addr = framebuffer->address + framebuffer->offset;
const bool use_accelerated =
rasterizer->AccelerateDisplay(*framebuffer, framebuffer_addr, framebuffer->stride);
@@ -128,13 +134,16 @@ void RendererVulkan::SwapBuffers(const Tegra::FramebufferConfig* framebuffer) {
blit_screen->Recreate();
}
- render_window.SwapBuffers();
rasterizer->TickFrame();
}
render_window.PollEvents();
}
+void RendererVulkan::TryPresent(int /*timeout_ms*/) {
+ // TODO (bunnei): ImplementMe
+}
+
bool RendererVulkan::Init() {
PFN_vkGetInstanceProcAddr vkGetInstanceProcAddr{};
render_window.RetrieveVulkanHandlers(&vkGetInstanceProcAddr, &instance, &surface);
@@ -262,4 +271,4 @@ void RendererVulkan::Report() const {
telemetry_session.AddField(field, "GPU_Vulkan_Extensions", extensions);
}
-} // namespace Vulkan \ No newline at end of file
+} // namespace Vulkan
diff --git a/src/video_core/renderer_vulkan/renderer_vulkan.h b/src/video_core/renderer_vulkan/renderer_vulkan.h
index a472c5dc9..f513397f0 100644
--- a/src/video_core/renderer_vulkan/renderer_vulkan.h
+++ b/src/video_core/renderer_vulkan/renderer_vulkan.h
@@ -36,14 +36,10 @@ public:
explicit RendererVulkan(Core::Frontend::EmuWindow& window, Core::System& system);
~RendererVulkan() override;
- /// Swap buffers (render frame)
- void SwapBuffers(const Tegra::FramebufferConfig* framebuffer) override;
-
- /// Initialize the renderer
bool Init() override;
-
- /// Shutdown the renderer
void ShutDown() override;
+ void SwapBuffers(const Tegra::FramebufferConfig* framebuffer) override;
+ void TryPresent(int timeout_ms) override;
private:
std::optional<vk::DebugUtilsMessengerEXT> CreateDebugCallback(
diff --git a/src/video_core/renderer_vulkan/vk_device.cpp b/src/video_core/renderer_vulkan/vk_device.cpp
index d1da4f9d3..886bde3b9 100644
--- a/src/video_core/renderer_vulkan/vk_device.cpp
+++ b/src/video_core/renderer_vulkan/vk_device.cpp
@@ -523,6 +523,7 @@ std::unordered_map<vk::Format, vk::FormatProperties> VKDevice::GetFormatProperti
vk::Format::eB10G11R11UfloatPack32,
vk::Format::eR32Sfloat,
vk::Format::eR32Uint,
+ vk::Format::eR32Sint,
vk::Format::eR16Sfloat,
vk::Format::eR16G16B16A16Sfloat,
vk::Format::eB8G8R8A8Unorm,
diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp
index ad837dd4a..3fe28c204 100644
--- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp
+++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp
@@ -614,33 +614,34 @@ bool RasterizerVulkan::WalkAttachmentOverlaps(const CachedSurfaceView& attachmen
std::tuple<vk::Framebuffer, vk::Extent2D> RasterizerVulkan::ConfigureFramebuffers(
vk::RenderPass renderpass) {
FramebufferCacheKey key{renderpass, std::numeric_limits<u32>::max(),
- std::numeric_limits<u32>::max()};
+ std::numeric_limits<u32>::max(), std::numeric_limits<u32>::max()};
- const auto MarkAsModifiedAndPush = [&](const View& view) {
- if (view == nullptr) {
+ const auto try_push = [&](const View& view) {
+ if (!view) {
return false;
}
key.views.push_back(view->GetHandle());
key.width = std::min(key.width, view->GetWidth());
key.height = std::min(key.height, view->GetHeight());
+ key.layers = std::min(key.layers, view->GetNumLayers());
return true;
};
for (std::size_t index = 0; index < std::size(color_attachments); ++index) {
- if (MarkAsModifiedAndPush(color_attachments[index])) {
+ if (try_push(color_attachments[index])) {
texture_cache.MarkColorBufferInUse(index);
}
}
- if (MarkAsModifiedAndPush(zeta_attachment)) {
+ if (try_push(zeta_attachment)) {
texture_cache.MarkDepthBufferInUse();
}
const auto [fbentry, is_cache_miss] = framebuffer_cache.try_emplace(key);
auto& framebuffer = fbentry->second;
if (is_cache_miss) {
- const vk::FramebufferCreateInfo framebuffer_ci({}, key.renderpass,
- static_cast<u32>(key.views.size()),
- key.views.data(), key.width, key.height, 1);
+ const vk::FramebufferCreateInfo framebuffer_ci(
+ {}, key.renderpass, static_cast<u32>(key.views.size()), key.views.data(), key.width,
+ key.height, key.layers);
const auto dev = device.GetLogical();
const auto& dld = device.GetDispatchLoader();
framebuffer = dev.createFramebufferUnique(framebuffer_ci, nullptr, dld);
diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.h b/src/video_core/renderer_vulkan/vk_rasterizer.h
index 138903d60..4dc8af6e8 100644
--- a/src/video_core/renderer_vulkan/vk_rasterizer.h
+++ b/src/video_core/renderer_vulkan/vk_rasterizer.h
@@ -56,6 +56,7 @@ struct FramebufferCacheKey {
vk::RenderPass renderpass{};
u32 width = 0;
u32 height = 0;
+ u32 layers = 0;
ImageViewsPack views;
std::size_t Hash() const noexcept {
@@ -66,12 +67,17 @@ struct FramebufferCacheKey {
}
boost::hash_combine(hash, width);
boost::hash_combine(hash, height);
+ boost::hash_combine(hash, layers);
return hash;
}
bool operator==(const FramebufferCacheKey& rhs) const noexcept {
- return std::tie(renderpass, views, width, height) ==
- std::tie(rhs.renderpass, rhs.views, rhs.width, rhs.height);
+ return std::tie(renderpass, views, width, height, layers) ==
+ std::tie(rhs.renderpass, rhs.views, rhs.width, rhs.height, rhs.layers);
+ }
+
+ bool operator!=(const FramebufferCacheKey& rhs) const noexcept {
+ return !operator==(rhs);
}
};
diff --git a/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp b/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp
index 9841f0dd1..cfcca5af0 100644
--- a/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp
+++ b/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp
@@ -2221,16 +2221,14 @@ private:
switch (specialization.attribute_types.at(location)) {
case Maxwell::VertexAttribute::Type::SignedNorm:
case Maxwell::VertexAttribute::Type::UnsignedNorm:
+ case Maxwell::VertexAttribute::Type::UnsignedScaled:
+ case Maxwell::VertexAttribute::Type::SignedScaled:
case Maxwell::VertexAttribute::Type::Float:
return {Type::Float, t_in_float, t_in_float4};
case Maxwell::VertexAttribute::Type::SignedInt:
return {Type::Int, t_in_int, t_in_int4};
case Maxwell::VertexAttribute::Type::UnsignedInt:
return {Type::Uint, t_in_uint, t_in_uint4};
- case Maxwell::VertexAttribute::Type::UnsignedScaled:
- case Maxwell::VertexAttribute::Type::SignedScaled:
- UNIMPLEMENTED();
- return {Type::Float, t_in_float, t_in_float4};
default:
UNREACHABLE();
return {Type::Float, t_in_float, t_in_float4};
diff --git a/src/video_core/renderer_vulkan/vk_texture_cache.h b/src/video_core/renderer_vulkan/vk_texture_cache.h
index d3edbe80c..22e3d34de 100644
--- a/src/video_core/renderer_vulkan/vk_texture_cache.h
+++ b/src/video_core/renderer_vulkan/vk_texture_cache.h
@@ -151,6 +151,10 @@ public:
return params.GetMipHeight(base_level);
}
+ u32 GetNumLayers() const {
+ return num_layers;
+ }
+
bool IsBufferView() const {
return buffer_view;
}
diff --git a/src/video_core/shader/decode/arithmetic.cpp b/src/video_core/shader/decode/arithmetic.cpp
index 90240c765..478394682 100644
--- a/src/video_core/shader/decode/arithmetic.cpp
+++ b/src/video_core/shader/decode/arithmetic.cpp
@@ -53,29 +53,24 @@ u32 ShaderIR::DecodeArithmetic(NodeBlock& bb, u32 pc) {
op_b = GetOperandAbsNegFloat(op_b, false, instr.fmul.negate_b);
- // TODO(Rodrigo): Should precise be used when there's a postfactor?
- Node value = Operation(OperationCode::FMul, PRECISE, op_a, op_b);
+ static constexpr std::array FmulPostFactor = {
+ 1.000f, // None
+ 0.500f, // Divide 2
+ 0.250f, // Divide 4
+ 0.125f, // Divide 8
+ 8.000f, // Mul 8
+ 4.000f, // Mul 4
+ 2.000f, // Mul 2
+ };
if (instr.fmul.postfactor != 0) {
- auto postfactor = static_cast<s32>(instr.fmul.postfactor);
-
- // Postfactor encoded as 3-bit 1's complement in instruction, interpreted with below
- // logic.
- if (postfactor >= 4) {
- postfactor = 7 - postfactor;
- } else {
- postfactor = 0 - postfactor;
- }
-
- if (postfactor > 0) {
- value = Operation(OperationCode::FMul, NO_PRECISE, value,
- Immediate(static_cast<f32>(1 << postfactor)));
- } else {
- value = Operation(OperationCode::FDiv, NO_PRECISE, value,
- Immediate(static_cast<f32>(1 << -postfactor)));
- }
+ op_a = Operation(OperationCode::FMul, NO_PRECISE, op_a,
+ Immediate(FmulPostFactor[instr.fmul.postfactor]));
}
+ // TODO(Rodrigo): Should precise be used when there's a postfactor?
+ Node value = Operation(OperationCode::FMul, PRECISE, op_a, op_b);
+
value = GetSaturatedFloat(value, instr.alu.saturate_d);
SetInternalFlagsFromFloat(bb, value, instr.generates_cc);
diff --git a/src/video_core/shader/decode/arithmetic_integer.cpp b/src/video_core/shader/decode/arithmetic_integer.cpp
index 21366869d..2fe787d6f 100644
--- a/src/video_core/shader/decode/arithmetic_integer.cpp
+++ b/src/video_core/shader/decode/arithmetic_integer.cpp
@@ -293,44 +293,66 @@ u32 ShaderIR::DecodeArithmeticInteger(NodeBlock& bb, u32 pc) {
void ShaderIR::WriteLop3Instruction(NodeBlock& bb, Register dest, Node op_a, Node op_b, Node op_c,
Node imm_lut, bool sets_cc) {
- constexpr u32 lop_iterations = 32;
- const Node one = Immediate(1);
- const Node two = Immediate(2);
-
- Node value;
- for (u32 i = 0; i < lop_iterations; ++i) {
- const Node shift_amount = Immediate(i);
-
- const Node a = Operation(OperationCode::ILogicalShiftRight, NO_PRECISE, op_c, shift_amount);
- const Node pack_0 = Operation(OperationCode::IBitwiseAnd, NO_PRECISE, a, one);
-
- const Node b = Operation(OperationCode::ILogicalShiftRight, NO_PRECISE, op_b, shift_amount);
- const Node c = Operation(OperationCode::IBitwiseAnd, NO_PRECISE, b, one);
- const Node pack_1 = Operation(OperationCode::ILogicalShiftLeft, NO_PRECISE, c, one);
-
- const Node d = Operation(OperationCode::ILogicalShiftRight, NO_PRECISE, op_a, shift_amount);
- const Node e = Operation(OperationCode::IBitwiseAnd, NO_PRECISE, d, one);
- const Node pack_2 = Operation(OperationCode::ILogicalShiftLeft, NO_PRECISE, e, two);
-
- const Node pack_01 = Operation(OperationCode::IBitwiseAnd, NO_PRECISE, pack_0, pack_1);
- const Node pack_012 = Operation(OperationCode::IBitwiseAnd, NO_PRECISE, pack_01, pack_2);
-
- const Node shifted_bit =
- Operation(OperationCode::ILogicalShiftRight, NO_PRECISE, imm_lut, pack_012);
- const Node bit = Operation(OperationCode::IBitwiseAnd, NO_PRECISE, shifted_bit, one);
-
- const Node right =
- Operation(OperationCode::ILogicalShiftLeft, NO_PRECISE, bit, shift_amount);
-
- if (i > 0) {
- value = Operation(OperationCode::IBitwiseOr, NO_PRECISE, value, right);
- } else {
- value = right;
+ const Node lop3_fast = [&](const Node na, const Node nb, const Node nc, const Node ttbl) {
+ Node value = Immediate(0);
+ const ImmediateNode imm = std::get<ImmediateNode>(*ttbl);
+ if (imm.GetValue() & 0x01) {
+ const Node a = Operation(OperationCode::IBitwiseNot, na);
+ const Node b = Operation(OperationCode::IBitwiseNot, nb);
+ const Node c = Operation(OperationCode::IBitwiseNot, nc);
+ Node r = Operation(OperationCode::IBitwiseAnd, NO_PRECISE, a, b);
+ r = Operation(OperationCode::IBitwiseAnd, NO_PRECISE, r, c);
+ value = Operation(OperationCode::IBitwiseOr, value, r);
}
- }
+ if (imm.GetValue() & 0x02) {
+ const Node a = Operation(OperationCode::IBitwiseNot, na);
+ const Node b = Operation(OperationCode::IBitwiseNot, nb);
+ Node r = Operation(OperationCode::IBitwiseAnd, NO_PRECISE, a, b);
+ r = Operation(OperationCode::IBitwiseAnd, NO_PRECISE, r, nc);
+ value = Operation(OperationCode::IBitwiseOr, value, r);
+ }
+ if (imm.GetValue() & 0x04) {
+ const Node a = Operation(OperationCode::IBitwiseNot, na);
+ const Node c = Operation(OperationCode::IBitwiseNot, nc);
+ Node r = Operation(OperationCode::IBitwiseAnd, NO_PRECISE, a, nb);
+ r = Operation(OperationCode::IBitwiseAnd, NO_PRECISE, r, c);
+ value = Operation(OperationCode::IBitwiseOr, value, r);
+ }
+ if (imm.GetValue() & 0x08) {
+ const Node a = Operation(OperationCode::IBitwiseNot, na);
+ Node r = Operation(OperationCode::IBitwiseAnd, NO_PRECISE, a, nb);
+ r = Operation(OperationCode::IBitwiseAnd, NO_PRECISE, r, nc);
+ value = Operation(OperationCode::IBitwiseOr, value, r);
+ }
+ if (imm.GetValue() & 0x10) {
+ const Node b = Operation(OperationCode::IBitwiseNot, nb);
+ const Node c = Operation(OperationCode::IBitwiseNot, nc);
+ Node r = Operation(OperationCode::IBitwiseAnd, NO_PRECISE, na, b);
+ r = Operation(OperationCode::IBitwiseAnd, NO_PRECISE, r, c);
+ value = Operation(OperationCode::IBitwiseOr, value, r);
+ }
+ if (imm.GetValue() & 0x20) {
+ const Node b = Operation(OperationCode::IBitwiseNot, nb);
+ Node r = Operation(OperationCode::IBitwiseAnd, NO_PRECISE, na, b);
+ r = Operation(OperationCode::IBitwiseAnd, NO_PRECISE, r, nc);
+ value = Operation(OperationCode::IBitwiseOr, value, r);
+ }
+ if (imm.GetValue() & 0x40) {
+ const Node c = Operation(OperationCode::IBitwiseNot, nc);
+ Node r = Operation(OperationCode::IBitwiseAnd, NO_PRECISE, na, nb);
+ r = Operation(OperationCode::IBitwiseAnd, NO_PRECISE, r, c);
+ value = Operation(OperationCode::IBitwiseOr, value, r);
+ }
+ if (imm.GetValue() & 0x80) {
+ Node r = Operation(OperationCode::IBitwiseAnd, NO_PRECISE, na, nb);
+ r = Operation(OperationCode::IBitwiseAnd, NO_PRECISE, r, nc);
+ value = Operation(OperationCode::IBitwiseOr, value, r);
+ }
+ return value;
+ }(op_a, op_b, op_c, imm_lut);
- SetInternalFlagsFromInteger(bb, value, sets_cc);
- SetRegister(bb, dest, value);
+ SetInternalFlagsFromInteger(bb, lop3_fast, sets_cc);
+ SetRegister(bb, dest, lop3_fast);
}
} // namespace VideoCommon::Shader
diff --git a/src/video_core/shader/track.cpp b/src/video_core/shader/track.cpp
index face8c943..15e22b9fa 100644
--- a/src/video_core/shader/track.cpp
+++ b/src/video_core/shader/track.cpp
@@ -157,13 +157,21 @@ std::tuple<Node, u32, u32> ShaderIR::TrackCbuf(Node tracked, const NodeBlock& co
if (gpr->GetIndex() == Tegra::Shader::Register::ZeroIndex) {
return {};
}
- // Reduce the cursor in one to avoid infinite loops when the instruction sets the same
- // register that it uses as operand
- const auto [source, new_cursor] = TrackRegister(gpr, code, cursor - 1);
- if (!source) {
- return {};
+ s64 current_cursor = cursor;
+ while (current_cursor > 0) {
+ // Reduce the cursor in one to avoid infinite loops when the instruction sets the same
+ // register that it uses as operand
+ const auto [source, new_cursor] = TrackRegister(gpr, code, current_cursor - 1);
+ current_cursor = new_cursor;
+ if (!source) {
+ continue;
+ }
+ const auto [base_address, index, offset] = TrackCbuf(source, code, current_cursor);
+ if (base_address != nullptr) {
+ return {base_address, index, offset};
+ }
}
- return TrackCbuf(source, code, new_cursor);
+ return {};
}
if (const auto operation = std::get_if<OperationNode>(&*tracked)) {
for (std::size_t i = operation->GetOperandsCount(); i > 0; --i) {
diff --git a/src/video_core/surface.cpp b/src/video_core/surface.cpp
index 1655ccf16..9707c353d 100644
--- a/src/video_core/surface.cpp
+++ b/src/video_core/surface.cpp
@@ -155,6 +155,8 @@ PixelFormat PixelFormatFromRenderTargetFormat(Tegra::RenderTargetFormat format)
return PixelFormat::R16I;
case Tegra::RenderTargetFormat::R32_FLOAT:
return PixelFormat::R32F;
+ case Tegra::RenderTargetFormat::R32_SINT:
+ return PixelFormat::R32I;
case Tegra::RenderTargetFormat::R32_UINT:
return PixelFormat::R32UI;
case Tegra::RenderTargetFormat::RG32_UINT:
diff --git a/src/video_core/surface.h b/src/video_core/surface.h
index 0d17a93ed..d88109e5a 100644
--- a/src/video_core/surface.h
+++ b/src/video_core/surface.h
@@ -59,47 +59,48 @@ enum class PixelFormat {
RG32UI = 41,
RGBX16F = 42,
R32UI = 43,
- ASTC_2D_8X8 = 44,
- ASTC_2D_8X5 = 45,
- ASTC_2D_5X4 = 46,
- BGRA8_SRGB = 47,
- DXT1_SRGB = 48,
- DXT23_SRGB = 49,
- DXT45_SRGB = 50,
- BC7U_SRGB = 51,
- R4G4B4A4U = 52,
- ASTC_2D_4X4_SRGB = 53,
- ASTC_2D_8X8_SRGB = 54,
- ASTC_2D_8X5_SRGB = 55,
- ASTC_2D_5X4_SRGB = 56,
- ASTC_2D_5X5 = 57,
- ASTC_2D_5X5_SRGB = 58,
- ASTC_2D_10X8 = 59,
- ASTC_2D_10X8_SRGB = 60,
- ASTC_2D_6X6 = 61,
- ASTC_2D_6X6_SRGB = 62,
- ASTC_2D_10X10 = 63,
- ASTC_2D_10X10_SRGB = 64,
- ASTC_2D_12X12 = 65,
- ASTC_2D_12X12_SRGB = 66,
- ASTC_2D_8X6 = 67,
- ASTC_2D_8X6_SRGB = 68,
- ASTC_2D_6X5 = 69,
- ASTC_2D_6X5_SRGB = 70,
- E5B9G9R9F = 71,
+ R32I = 44,
+ ASTC_2D_8X8 = 45,
+ ASTC_2D_8X5 = 46,
+ ASTC_2D_5X4 = 47,
+ BGRA8_SRGB = 48,
+ DXT1_SRGB = 49,
+ DXT23_SRGB = 50,
+ DXT45_SRGB = 51,
+ BC7U_SRGB = 52,
+ R4G4B4A4U = 53,
+ ASTC_2D_4X4_SRGB = 54,
+ ASTC_2D_8X8_SRGB = 55,
+ ASTC_2D_8X5_SRGB = 56,
+ ASTC_2D_5X4_SRGB = 57,
+ ASTC_2D_5X5 = 58,
+ ASTC_2D_5X5_SRGB = 59,
+ ASTC_2D_10X8 = 60,
+ ASTC_2D_10X8_SRGB = 61,
+ ASTC_2D_6X6 = 62,
+ ASTC_2D_6X6_SRGB = 63,
+ ASTC_2D_10X10 = 64,
+ ASTC_2D_10X10_SRGB = 65,
+ ASTC_2D_12X12 = 66,
+ ASTC_2D_12X12_SRGB = 67,
+ ASTC_2D_8X6 = 68,
+ ASTC_2D_8X6_SRGB = 69,
+ ASTC_2D_6X5 = 70,
+ ASTC_2D_6X5_SRGB = 71,
+ E5B9G9R9F = 72,
MaxColorFormat,
// Depth formats
- Z32F = 72,
- Z16 = 73,
+ Z32F = 73,
+ Z16 = 74,
MaxDepthFormat,
// DepthStencil formats
- Z24S8 = 74,
- S8Z24 = 75,
- Z32FS8 = 76,
+ Z24S8 = 75,
+ S8Z24 = 76,
+ Z32FS8 = 77,
MaxDepthStencilFormat,
@@ -171,6 +172,7 @@ constexpr std::array<u32, MaxPixelFormat> compression_factor_shift_table = {{
0, // RG32UI
0, // RGBX16F
0, // R32UI
+ 0, // R32I
2, // ASTC_2D_8X8
2, // ASTC_2D_8X5
2, // ASTC_2D_5X4
@@ -267,6 +269,7 @@ constexpr std::array<u32, MaxPixelFormat> block_width_table = {{
1, // RG32UI
1, // RGBX16F
1, // R32UI
+ 1, // R32I
8, // ASTC_2D_8X8
8, // ASTC_2D_8X5
5, // ASTC_2D_5X4
@@ -355,6 +358,7 @@ constexpr std::array<u32, MaxPixelFormat> block_height_table = {{
1, // RG32UI
1, // RGBX16F
1, // R32UI
+ 1, // R32I
8, // ASTC_2D_8X8
5, // ASTC_2D_8X5
4, // ASTC_2D_5X4
@@ -443,6 +447,7 @@ constexpr std::array<u32, MaxPixelFormat> bpp_table = {{
64, // RG32UI
64, // RGBX16F
32, // R32UI
+ 32, // R32I
128, // ASTC_2D_8X8
128, // ASTC_2D_8X5
128, // ASTC_2D_5X4
@@ -546,6 +551,7 @@ constexpr std::array<SurfaceCompression, MaxPixelFormat> compression_type_table
SurfaceCompression::None, // RG32UI
SurfaceCompression::None, // RGBX16F
SurfaceCompression::None, // R32UI
+ SurfaceCompression::None, // R32I
SurfaceCompression::Converted, // ASTC_2D_8X8
SurfaceCompression::Converted, // ASTC_2D_8X5
SurfaceCompression::Converted, // ASTC_2D_5X4
diff --git a/src/video_core/texture_cache/format_lookup_table.cpp b/src/video_core/texture_cache/format_lookup_table.cpp
index 81fb9f633..cc3ad8417 100644
--- a/src/video_core/texture_cache/format_lookup_table.cpp
+++ b/src/video_core/texture_cache/format_lookup_table.cpp
@@ -41,7 +41,7 @@ struct Table {
ComponentType alpha_component;
bool is_srgb;
};
-constexpr std::array<Table, 74> DefinitionTable = {{
+constexpr std::array<Table, 75> DefinitionTable = {{
{TextureFormat::A8R8G8B8, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::ABGR8U},
{TextureFormat::A8R8G8B8, C, SNORM, SNORM, SNORM, SNORM, PixelFormat::ABGR8S},
{TextureFormat::A8R8G8B8, C, UINT, UINT, UINT, UINT, PixelFormat::ABGR8UI},
@@ -89,6 +89,7 @@ constexpr std::array<Table, 74> DefinitionTable = {{
{TextureFormat::R32, C, FLOAT, FLOAT, FLOAT, FLOAT, PixelFormat::R32F},
{TextureFormat::R32, C, UINT, UINT, UINT, UINT, PixelFormat::R32UI},
+ {TextureFormat::R32, C, SINT, SINT, SINT, SINT, PixelFormat::R32I},
{TextureFormat::E5B9G9R9_SHAREDEXP, C, FLOAT, FLOAT, FLOAT, FLOAT, PixelFormat::E5B9G9R9F},
diff --git a/src/video_core/texture_cache/surface_params.cpp b/src/video_core/texture_cache/surface_params.cpp
index 38b3a4ba8..f00839313 100644
--- a/src/video_core/texture_cache/surface_params.cpp
+++ b/src/video_core/texture_cache/surface_params.cpp
@@ -84,19 +84,16 @@ SurfaceParams SurfaceParams::CreateForTexture(const FormatLookupTable& lookup_ta
if (entry.IsShadow() && params.type == SurfaceType::ColorTexture) {
switch (params.pixel_format) {
case PixelFormat::R16U:
- case PixelFormat::R16F: {
+ case PixelFormat::R16F:
params.pixel_format = PixelFormat::Z16;
break;
- }
- case PixelFormat::R32F: {
+ case PixelFormat::R32F:
params.pixel_format = PixelFormat::Z32F;
break;
- }
- default: {
+ default:
UNIMPLEMENTED_MSG("Unimplemented shadow convert format: {}",
static_cast<u32>(params.pixel_format));
}
- }
params.type = GetFormatType(params.pixel_format);
}
params.type = GetFormatType(params.pixel_format);
@@ -168,27 +165,29 @@ SurfaceParams SurfaceParams::CreateForImage(const FormatLookupTable& lookup_tabl
return params;
}
-SurfaceParams SurfaceParams::CreateForDepthBuffer(
- Core::System& system, u32 zeta_width, u32 zeta_height, Tegra::DepthFormat format,
- u32 block_width, u32 block_height, u32 block_depth,
- Tegra::Engines::Maxwell3D::Regs::InvMemoryLayout type) {
+SurfaceParams SurfaceParams::CreateForDepthBuffer(Core::System& system) {
+ const auto& regs = system.GPU().Maxwell3D().regs;
+ regs.zeta_width, regs.zeta_height, regs.zeta.format, regs.zeta.memory_layout.type;
SurfaceParams params;
- params.is_tiled = type == Tegra::Engines::Maxwell3D::Regs::InvMemoryLayout::BlockLinear;
+ params.is_tiled = regs.zeta.memory_layout.type ==
+ Tegra::Engines::Maxwell3D::Regs::InvMemoryLayout::BlockLinear;
params.srgb_conversion = false;
- params.block_width = std::min(block_width, 5U);
- params.block_height = std::min(block_height, 5U);
- params.block_depth = std::min(block_depth, 5U);
+ params.block_width = std::min(regs.zeta.memory_layout.block_width.Value(), 5U);
+ params.block_height = std::min(regs.zeta.memory_layout.block_height.Value(), 5U);
+ params.block_depth = std::min(regs.zeta.memory_layout.block_depth.Value(), 5U);
params.tile_width_spacing = 1;
- params.pixel_format = PixelFormatFromDepthFormat(format);
+ params.pixel_format = PixelFormatFromDepthFormat(regs.zeta.format);
params.type = GetFormatType(params.pixel_format);
- params.width = zeta_width;
- params.height = zeta_height;
- params.target = SurfaceTarget::Texture2D;
- params.depth = 1;
+ params.width = regs.zeta_width;
+ params.height = regs.zeta_height;
params.pitch = 0;
params.num_levels = 1;
params.emulated_levels = 1;
- params.is_layered = false;
+
+ const bool is_layered = regs.zeta_layers > 1 && params.block_depth == 0;
+ params.is_layered = is_layered;
+ params.target = is_layered ? SurfaceTarget::Texture2DArray : SurfaceTarget::Texture2D;
+ params.depth = is_layered ? regs.zeta_layers.Value() : 1U;
return params;
}
@@ -214,11 +213,13 @@ SurfaceParams SurfaceParams::CreateForFramebuffer(Core::System& system, std::siz
params.width = params.pitch / bpp;
}
params.height = config.height;
- params.depth = 1;
- params.target = SurfaceTarget::Texture2D;
params.num_levels = 1;
params.emulated_levels = 1;
- params.is_layered = false;
+
+ const bool is_layered = config.layers > 1 && params.block_depth == 0;
+ params.is_layered = is_layered;
+ params.depth = is_layered ? config.layers.Value() : 1;
+ params.target = is_layered ? SurfaceTarget::Texture2DArray : SurfaceTarget::Texture2D;
return params;
}
diff --git a/src/video_core/texture_cache/surface_params.h b/src/video_core/texture_cache/surface_params.h
index 9256fd6d9..995cc3818 100644
--- a/src/video_core/texture_cache/surface_params.h
+++ b/src/video_core/texture_cache/surface_params.h
@@ -35,10 +35,7 @@ public:
const VideoCommon::Shader::Image& entry);
/// Creates SurfaceCachedParams for a depth buffer configuration.
- static SurfaceParams CreateForDepthBuffer(
- Core::System& system, u32 zeta_width, u32 zeta_height, Tegra::DepthFormat format,
- u32 block_width, u32 block_height, u32 block_depth,
- Tegra::Engines::Maxwell3D::Regs::InvMemoryLayout type);
+ static SurfaceParams CreateForDepthBuffer(Core::System& system);
/// Creates SurfaceCachedParams from a framebuffer configuration.
static SurfaceParams CreateForFramebuffer(Core::System& system, std::size_t index);
diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h
index 0d105d386..c70e4aec2 100644
--- a/src/video_core/texture_cache/texture_cache.h
+++ b/src/video_core/texture_cache/texture_cache.h
@@ -160,10 +160,7 @@ public:
SetEmptyDepthBuffer();
return {};
}
- const auto depth_params{SurfaceParams::CreateForDepthBuffer(
- system, regs.zeta_width, regs.zeta_height, regs.zeta.format,
- regs.zeta.memory_layout.block_width, regs.zeta.memory_layout.block_height,
- regs.zeta.memory_layout.block_depth, regs.zeta.memory_layout.type)};
+ const auto depth_params{SurfaceParams::CreateForDepthBuffer(system)};
auto surface_view = GetSurface(gpu_addr, cache_addr, depth_params, preserve_contents, true);
if (depth_buffer.target)
depth_buffer.target->MarkAsRenderTarget(false, NO_RT);