summaryrefslogtreecommitdiffstats
path: root/src/video_core/renderer_opengl/gl_rasterizer.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'src/video_core/renderer_opengl/gl_rasterizer.cpp')
-rw-r--r--src/video_core/renderer_opengl/gl_rasterizer.cpp534
1 files changed, 253 insertions, 281 deletions
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp
index 7ce969f73..70fb54507 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer.cpp
+++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp
@@ -3,6 +3,7 @@
// Refer to the license.txt file included.
#include <algorithm>
+#include <array>
#include <memory>
#include <string>
#include <string_view>
@@ -33,16 +34,19 @@ using PixelFormat = SurfaceParams::PixelFormat;
using SurfaceType = SurfaceParams::SurfaceType;
MICROPROFILE_DEFINE(OpenGL_VAO, "OpenGL", "Vertex Array Setup", MP_RGB(128, 128, 192));
-MICROPROFILE_DEFINE(OpenGL_VS, "OpenGL", "Vertex Shader Setup", MP_RGB(128, 128, 192));
-MICROPROFILE_DEFINE(OpenGL_FS, "OpenGL", "Fragment Shader Setup", MP_RGB(128, 128, 192));
+MICROPROFILE_DEFINE(OpenGL_Shader, "OpenGL", "Shader Setup", MP_RGB(128, 128, 192));
+MICROPROFILE_DEFINE(OpenGL_UBO, "OpenGL", "Const Buffer Setup", MP_RGB(128, 128, 192));
+MICROPROFILE_DEFINE(OpenGL_Index, "OpenGL", "Index Buffer Setup", MP_RGB(128, 128, 192));
+MICROPROFILE_DEFINE(OpenGL_Texture, "OpenGL", "Texture Setup", MP_RGB(128, 128, 192));
+MICROPROFILE_DEFINE(OpenGL_Framebuffer, "OpenGL", "Framebuffer Setup", MP_RGB(128, 128, 192));
MICROPROFILE_DEFINE(OpenGL_Drawing, "OpenGL", "Drawing", MP_RGB(128, 128, 192));
-MICROPROFILE_DEFINE(OpenGL_Blits, "OpenGL", "Blits", MP_RGB(100, 100, 255));
+MICROPROFILE_DEFINE(OpenGL_Blits, "OpenGL", "Blits", MP_RGB(128, 128, 192));
MICROPROFILE_DEFINE(OpenGL_CacheManagement, "OpenGL", "Cache Mgmt", MP_RGB(100, 255, 100));
RasterizerOpenGL::RasterizerOpenGL(Core::Frontend::EmuWindow& window, ScreenInfo& info)
- : emu_window{window}, screen_info{info}, stream_buffer(GL_ARRAY_BUFFER, STREAM_BUFFER_SIZE) {
+ : emu_window{window}, screen_info{info}, buffer_cache(STREAM_BUFFER_SIZE) {
// Create sampler objects
- for (size_t i = 0; i < texture_samplers.size(); ++i) {
+ for (std::size_t i = 0; i < texture_samplers.size(); ++i) {
texture_samplers[i].Create();
state.texture_units[i].sampler = texture_samplers[i].sampler.handle;
}
@@ -55,6 +59,8 @@ RasterizerOpenGL::RasterizerOpenGL(Core::Frontend::EmuWindow& window, ScreenInfo
if (extension == "GL_ARB_direct_state_access") {
has_ARB_direct_state_access = true;
+ } else if (extension == "GL_ARB_multi_bind") {
+ has_ARB_multi_bind = true;
} else if (extension == "GL_ARB_separate_shader_objects") {
has_ARB_separate_shader_objects = true;
} else if (extension == "GL_ARB_vertex_attrib_binding") {
@@ -67,28 +73,13 @@ RasterizerOpenGL::RasterizerOpenGL(Core::Frontend::EmuWindow& window, ScreenInfo
// Clipping plane 0 is always enabled for PICA fixed clip plane z <= 0
state.clip_distance[0] = true;
- // Generate VAO and UBO
- sw_vao.Create();
- uniform_buffer.Create();
-
- state.draw.vertex_array = sw_vao.handle;
- state.draw.uniform_buffer = uniform_buffer.handle;
- state.Apply();
-
// Create render framebuffer
framebuffer.Create();
- hw_vao.Create();
-
- state.draw.vertex_buffer = stream_buffer.GetHandle();
-
shader_program_manager = std::make_unique<GLShader::ProgramManager>();
state.draw.shader_program = 0;
- state.draw.vertex_array = hw_vao.handle;
state.Apply();
- glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, stream_buffer.GetHandle());
-
glEnable(GL_BLEND);
glGetIntegerv(GL_UNIFORM_BUFFER_OFFSET_ALIGNMENT, &uniform_buffer_alignment);
@@ -98,14 +89,60 @@ RasterizerOpenGL::RasterizerOpenGL(Core::Frontend::EmuWindow& window, ScreenInfo
RasterizerOpenGL::~RasterizerOpenGL() {}
-std::pair<u8*, GLintptr> RasterizerOpenGL::SetupVertexArrays(u8* array_ptr,
- GLintptr buffer_offset) {
+void RasterizerOpenGL::SetupVertexArrays() {
MICROPROFILE_SCOPE(OpenGL_VAO);
const auto& gpu = Core::System::GetInstance().GPU().Maxwell3D();
const auto& regs = gpu.regs;
- state.draw.vertex_array = hw_vao.handle;
- state.draw.vertex_buffer = stream_buffer.GetHandle();
+ auto [iter, is_cache_miss] = vertex_array_cache.try_emplace(regs.vertex_attrib_format);
+ auto& VAO = iter->second;
+
+ if (is_cache_miss) {
+ VAO.Create();
+ state.draw.vertex_array = VAO.handle;
+ state.Apply();
+
+ // The index buffer binding is stored within the VAO. Stupid OpenGL, but easy to work
+ // around.
+ glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, buffer_cache.GetHandle());
+
+ // Use the vertex array as-is, assumes that the data is formatted correctly for OpenGL.
+ // Enables the first 16 vertex attributes always, as we don't know which ones are actually
+ // used until shader time. Note, Tegra technically supports 32, but we're capping this to 16
+ // for now to avoid OpenGL errors.
+ // TODO(Subv): Analyze the shader to identify which attributes are actually used and don't
+ // assume every shader uses them all.
+ for (unsigned index = 0; index < 16; ++index) {
+ const auto& attrib = regs.vertex_attrib_format[index];
+
+ // Ignore invalid attributes.
+ if (!attrib.IsValid())
+ continue;
+
+ const auto& buffer = regs.vertex_array[attrib.buffer];
+ LOG_TRACE(HW_GPU,
+ "vertex attrib {}, count={}, size={}, type={}, offset={}, normalize={}",
+ index, attrib.ComponentCount(), attrib.SizeString(), attrib.TypeString(),
+ attrib.offset.Value(), attrib.IsNormalized());
+
+ ASSERT(buffer.IsEnabled());
+
+ glEnableVertexAttribArray(index);
+ if (attrib.type == Tegra::Engines::Maxwell3D::Regs::VertexAttribute::Type::SignedInt ||
+ attrib.type ==
+ Tegra::Engines::Maxwell3D::Regs::VertexAttribute::Type::UnsignedInt) {
+ glVertexAttribIFormat(index, attrib.ComponentCount(),
+ MaxwellToGL::VertexType(attrib), attrib.offset);
+ } else {
+ glVertexAttribFormat(index, attrib.ComponentCount(),
+ MaxwellToGL::VertexType(attrib),
+ attrib.IsNormalized() ? GL_TRUE : GL_FALSE, attrib.offset);
+ }
+ glVertexAttribBinding(index, attrib.buffer);
+ }
+ }
+ state.draw.vertex_array = VAO.handle;
+ state.draw.vertex_buffer = buffer_cache.GetHandle();
state.Apply();
// Upload all guest vertex arrays sequentially to our buffer
@@ -117,77 +154,35 @@ std::pair<u8*, GLintptr> RasterizerOpenGL::SetupVertexArrays(u8* array_ptr,
Tegra::GPUVAddr start = vertex_array.StartAddress();
const Tegra::GPUVAddr end = regs.vertex_array_limit[index].LimitAddress();
- if (regs.instanced_arrays.IsInstancingEnabled(index) && vertex_array.divisor != 0) {
- start += vertex_array.stride * (gpu.state.current_instance / vertex_array.divisor);
- }
-
ASSERT(end > start);
- u64 size = end - start + 1;
-
- GLintptr vertex_buffer_offset;
- std::tie(array_ptr, buffer_offset, vertex_buffer_offset) =
- UploadMemory(array_ptr, buffer_offset, start, size);
+ const u64 size = end - start + 1;
+ const GLintptr vertex_buffer_offset = buffer_cache.UploadMemory(start, size);
// Bind the vertex array to the buffer at the current offset.
- glBindVertexBuffer(index, stream_buffer.GetHandle(), vertex_buffer_offset,
+ glBindVertexBuffer(index, buffer_cache.GetHandle(), vertex_buffer_offset,
vertex_array.stride);
if (regs.instanced_arrays.IsInstancingEnabled(index) && vertex_array.divisor != 0) {
- // Tell OpenGL that this is an instanced vertex buffer to prevent accessing different
- // indexes on each vertex. We do the instance indexing manually by incrementing the
- // start address of the vertex buffer.
- glVertexBindingDivisor(index, 1);
+ // Enable vertex buffer instancing with the specified divisor.
+ glVertexBindingDivisor(index, vertex_array.divisor);
} else {
// Disable the vertex buffer instancing.
glVertexBindingDivisor(index, 0);
}
}
-
- // Use the vertex array as-is, assumes that the data is formatted correctly for OpenGL.
- // Enables the first 16 vertex attributes always, as we don't know which ones are actually used
- // until shader time. Note, Tegra technically supports 32, but we're capping this to 16 for now
- // to avoid OpenGL errors.
- // TODO(Subv): Analyze the shader to identify which attributes are actually used and don't
- // assume every shader uses them all.
- for (unsigned index = 0; index < 16; ++index) {
- auto& attrib = regs.vertex_attrib_format[index];
-
- // Ignore invalid attributes.
- if (!attrib.IsValid())
- continue;
-
- auto& buffer = regs.vertex_array[attrib.buffer];
- LOG_TRACE(HW_GPU, "vertex attrib {}, count={}, size={}, type={}, offset={}, normalize={}",
- index, attrib.ComponentCount(), attrib.SizeString(), attrib.TypeString(),
- attrib.offset.Value(), attrib.IsNormalized());
-
- ASSERT(buffer.IsEnabled());
-
- glEnableVertexAttribArray(index);
- if (attrib.type == Tegra::Engines::Maxwell3D::Regs::VertexAttribute::Type::SignedInt ||
- attrib.type == Tegra::Engines::Maxwell3D::Regs::VertexAttribute::Type::UnsignedInt) {
- glVertexAttribIFormat(index, attrib.ComponentCount(), MaxwellToGL::VertexType(attrib),
- attrib.offset);
- } else {
- glVertexAttribFormat(index, attrib.ComponentCount(), MaxwellToGL::VertexType(attrib),
- attrib.IsNormalized() ? GL_TRUE : GL_FALSE, attrib.offset);
- }
- glVertexAttribBinding(index, attrib.buffer);
- }
-
- return {array_ptr, buffer_offset};
}
-std::pair<u8*, GLintptr> RasterizerOpenGL::SetupShaders(u8* buffer_ptr, GLintptr buffer_offset) {
- auto& gpu = Core::System::GetInstance().GPU().Maxwell3D();
+void RasterizerOpenGL::SetupShaders() {
+ MICROPROFILE_SCOPE(OpenGL_Shader);
+ const auto& gpu = Core::System::GetInstance().GPU().Maxwell3D();
// Next available bindpoints to use when uploading the const buffers and textures to the GLSL
// shaders. The constbuffer bindpoint starts after the shader stage configuration bind points.
u32 current_constbuffer_bindpoint = Tegra::Engines::Maxwell3D::Regs::MaxShaderStage;
u32 current_texture_bindpoint = 0;
- for (size_t index = 0; index < Maxwell::MaxShaderProgram; ++index) {
- auto& shader_config = gpu.regs.shader_config[index];
+ for (std::size_t index = 0; index < Maxwell::MaxShaderProgram; ++index) {
+ const auto& shader_config = gpu.regs.shader_config[index];
const Maxwell::ShaderProgram program{static_cast<Maxwell::ShaderProgram>(index)};
// Skip stages that are not enabled
@@ -195,21 +190,15 @@ std::pair<u8*, GLintptr> RasterizerOpenGL::SetupShaders(u8* buffer_ptr, GLintptr
continue;
}
- std::tie(buffer_ptr, buffer_offset) =
- AlignBuffer(buffer_ptr, buffer_offset, static_cast<size_t>(uniform_buffer_alignment));
-
- const size_t stage{index == 0 ? 0 : index - 1}; // Stage indices are 0 - 5
+ const std::size_t stage{index == 0 ? 0 : index - 1}; // Stage indices are 0 - 5
GLShader::MaxwellUniformData ubo{};
ubo.SetFromRegs(gpu.state.shader_stages[stage]);
- std::memcpy(buffer_ptr, &ubo, sizeof(ubo));
+ const GLintptr offset = buffer_cache.UploadHostMemory(
+ &ubo, sizeof(ubo), static_cast<std::size_t>(uniform_buffer_alignment));
// Bind the buffer
- glBindBufferRange(GL_UNIFORM_BUFFER, stage, stream_buffer.GetHandle(), buffer_offset,
- sizeof(ubo));
-
- buffer_ptr += sizeof(ubo);
- buffer_offset += sizeof(ubo);
+ glBindBufferRange(GL_UNIFORM_BUFFER, stage, buffer_cache.GetHandle(), offset, sizeof(ubo));
Shader shader{shader_cache.GetStageProgram(program)};
@@ -230,9 +219,8 @@ std::pair<u8*, GLintptr> RasterizerOpenGL::SetupShaders(u8* buffer_ptr, GLintptr
}
// Configure the const buffers for this shader stage.
- std::tie(buffer_ptr, buffer_offset, current_constbuffer_bindpoint) =
- SetupConstBuffers(buffer_ptr, buffer_offset, static_cast<Maxwell::ShaderStage>(stage),
- shader, current_constbuffer_bindpoint);
+ current_constbuffer_bindpoint = SetupConstBuffers(static_cast<Maxwell::ShaderStage>(stage),
+ shader, current_constbuffer_bindpoint);
// Configure the textures for this shader stage.
current_texture_bindpoint = SetupTextures(static_cast<Maxwell::ShaderStage>(stage), shader,
@@ -245,15 +233,15 @@ std::pair<u8*, GLintptr> RasterizerOpenGL::SetupShaders(u8* buffer_ptr, GLintptr
}
}
- shader_program_manager->UseTrivialGeometryShader();
+ state.Apply();
- return {buffer_ptr, buffer_offset};
+ shader_program_manager->UseTrivialGeometryShader();
}
-size_t RasterizerOpenGL::CalculateVertexArraysSize() const {
+std::size_t RasterizerOpenGL::CalculateVertexArraysSize() const {
const auto& regs = Core::System::GetInstance().GPU().Maxwell3D().regs;
- size_t size = 0;
+ std::size_t size = 0;
for (u32 index = 0; index < Maxwell::NumVertexArrays; ++index) {
if (!regs.vertex_array[index].IsEnabled())
continue;
@@ -309,60 +297,80 @@ void RasterizerOpenGL::UpdatePagesCachedCount(VAddr addr, u64 size, int delta) {
cached_pages.add({pages_interval, delta});
}
-std::pair<Surface, Surface> RasterizerOpenGL::ConfigureFramebuffers(bool using_color_fb,
- bool using_depth_fb,
- bool preserve_contents) {
+void RasterizerOpenGL::ConfigureFramebuffers(bool using_color_fb, bool using_depth_fb,
+ bool preserve_contents,
+ boost::optional<std::size_t> single_color_target) {
+ MICROPROFILE_SCOPE(OpenGL_Framebuffer);
const auto& regs = Core::System::GetInstance().GPU().Maxwell3D().regs;
- if (regs.rt[0].format == Tegra::RenderTargetFormat::NONE) {
- LOG_ERROR(HW_GPU, "RenderTargetFormat is not configured");
- using_color_fb = false;
+ Surface depth_surface;
+ if (using_depth_fb) {
+ depth_surface = res_cache.GetDepthBufferSurface(preserve_contents);
}
- const bool has_stencil = regs.stencil_enable;
- const bool write_color_fb =
- state.color_mask.red_enabled == GL_TRUE || state.color_mask.green_enabled == GL_TRUE ||
- state.color_mask.blue_enabled == GL_TRUE || state.color_mask.alpha_enabled == GL_TRUE;
+ // TODO(bunnei): Figure out how the below register works. According to envytools, this should be
+ // used to enable multiple render targets. However, it is left unset on all games that I have
+ // tested.
+ ASSERT_MSG(regs.rt_separate_frag_data == 0, "Unimplemented");
- const bool write_depth_fb =
- (state.depth.test_enabled && state.depth.write_mask == GL_TRUE) ||
- (has_stencil && (state.stencil.front.write_mask || state.stencil.back.write_mask));
-
- Surface color_surface;
- Surface depth_surface;
- MathUtil::Rectangle<u32> surfaces_rect;
- std::tie(color_surface, depth_surface, surfaces_rect) =
- res_cache.GetFramebufferSurfaces(using_color_fb, using_depth_fb, preserve_contents);
+ // Bind the framebuffer surfaces
+ state.draw.draw_framebuffer = framebuffer.handle;
+ state.Apply();
- const MathUtil::Rectangle<s32> viewport_rect{regs.viewport_transform[0].GetRect()};
- const MathUtil::Rectangle<u32> draw_rect{
- static_cast<u32>(std::clamp<s32>(static_cast<s32>(surfaces_rect.left) + viewport_rect.left,
- surfaces_rect.left, surfaces_rect.right)), // Left
- static_cast<u32>(std::clamp<s32>(static_cast<s32>(surfaces_rect.bottom) + viewport_rect.top,
- surfaces_rect.bottom, surfaces_rect.top)), // Top
- static_cast<u32>(std::clamp<s32>(static_cast<s32>(surfaces_rect.left) + viewport_rect.right,
- surfaces_rect.left, surfaces_rect.right)), // Right
- static_cast<u32>(
- std::clamp<s32>(static_cast<s32>(surfaces_rect.bottom) + viewport_rect.bottom,
- surfaces_rect.bottom, surfaces_rect.top))}; // Bottom
+ if (using_color_fb) {
+ if (single_color_target) {
+ // Used when just a single color attachment is enabled, e.g. for clearing a color buffer
+ Surface color_surface =
+ res_cache.GetColorBufferSurface(*single_color_target, preserve_contents);
+ glFramebufferTexture2D(
+ GL_DRAW_FRAMEBUFFER,
+ GL_COLOR_ATTACHMENT0 + static_cast<GLenum>(*single_color_target), GL_TEXTURE_2D,
+ color_surface != nullptr ? color_surface->Texture().handle : 0, 0);
+ glDrawBuffer(GL_COLOR_ATTACHMENT0 + static_cast<GLenum>(*single_color_target));
+ } else {
+ // Multiple color attachments are enabled
+ std::array<GLenum, Maxwell::NumRenderTargets> buffers;
+ for (std::size_t index = 0; index < Maxwell::NumRenderTargets; ++index) {
+ Surface color_surface = res_cache.GetColorBufferSurface(index, preserve_contents);
+ buffers[index] = GL_COLOR_ATTACHMENT0 + regs.rt_control.GetMap(index);
+ glFramebufferTexture2D(
+ GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0 + static_cast<GLenum>(index),
+ GL_TEXTURE_2D, color_surface != nullptr ? color_surface->Texture().handle : 0,
+ 0);
+ }
+ glDrawBuffers(regs.rt_control.count, buffers.data());
+ }
+ } else {
+ // No color attachments are enabled - zero out all of them
+ for (std::size_t index = 0; index < Maxwell::NumRenderTargets; ++index) {
+ glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER,
+ GL_COLOR_ATTACHMENT0 + static_cast<GLenum>(index), GL_TEXTURE_2D,
+ 0, 0);
+ }
+ glDrawBuffer(GL_NONE);
+ }
- // Bind the framebuffer surfaces
- BindFramebufferSurfaces(color_surface, depth_surface, has_stencil);
+ if (depth_surface) {
+ if (regs.stencil_enable) {
+ // Attach both depth and stencil
+ glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D,
+ depth_surface->Texture().handle, 0);
+ } else {
+ // Attach depth
+ glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_DEPTH_ATTACHMENT, GL_TEXTURE_2D,
+ depth_surface->Texture().handle, 0);
+ // Clear stencil attachment
+ glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_STENCIL_ATTACHMENT, GL_TEXTURE_2D, 0, 0);
+ }
+ } else {
+ // Clear both depth and stencil attachment
+ glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D, 0,
+ 0);
+ }
- SyncViewport(surfaces_rect);
+ SyncViewport();
- // Viewport can have negative offsets or larger dimensions than our framebuffer sub-rect. Enable
- // scissor test to prevent drawing outside of the framebuffer region
- state.scissor.enabled = true;
- state.scissor.x = draw_rect.left;
- state.scissor.y = draw_rect.bottom;
- state.scissor.width = draw_rect.GetWidth();
- state.scissor.height = draw_rect.GetHeight();
state.Apply();
-
- // Only return the surface to be marked as dirty if writing to it is enabled.
- return std::make_pair(write_color_fb ? color_surface : nullptr,
- write_depth_fb ? depth_surface : nullptr);
}
void RasterizerOpenGL::Clear() {
@@ -370,32 +378,24 @@ void RasterizerOpenGL::Clear() {
SCOPE_EXIT({ prev_state.Apply(); });
const auto& regs = Core::System::GetInstance().GPU().Maxwell3D().regs;
- bool use_color_fb = false;
- bool use_depth_fb = false;
+ bool use_color{};
+ bool use_depth{};
+ bool use_stencil{};
OpenGLState clear_state;
- clear_state.draw.draw_framebuffer = state.draw.draw_framebuffer;
+ clear_state.draw.draw_framebuffer = framebuffer.handle;
clear_state.color_mask.red_enabled = regs.clear_buffers.R ? GL_TRUE : GL_FALSE;
clear_state.color_mask.green_enabled = regs.clear_buffers.G ? GL_TRUE : GL_FALSE;
clear_state.color_mask.blue_enabled = regs.clear_buffers.B ? GL_TRUE : GL_FALSE;
clear_state.color_mask.alpha_enabled = regs.clear_buffers.A ? GL_TRUE : GL_FALSE;
- GLbitfield clear_mask{};
if (regs.clear_buffers.R || regs.clear_buffers.G || regs.clear_buffers.B ||
regs.clear_buffers.A) {
- if (regs.clear_buffers.RT == 0) {
- // We only support clearing the first color attachment for now
- clear_mask |= GL_COLOR_BUFFER_BIT;
- use_color_fb = true;
- } else {
- // TODO(subv): Add support for the other color attachments
- LOG_CRITICAL(HW_GPU, "Clear unimplemented for RT {}", regs.clear_buffers.RT);
- }
+ use_color = true;
}
if (regs.clear_buffers.Z) {
ASSERT_MSG(regs.zeta_enable != 0, "Tried to clear Z but buffer is not enabled!");
- use_depth_fb = true;
- clear_mask |= GL_DEPTH_BUFFER_BIT;
+ use_depth = true;
// Always enable the depth write when clearing the depth buffer. The depth write mask is
// ignored when clearing the buffer in the Switch, but OpenGL obeys it so we set it to true.
@@ -404,59 +404,33 @@ void RasterizerOpenGL::Clear() {
}
if (regs.clear_buffers.S) {
ASSERT_MSG(regs.zeta_enable != 0, "Tried to clear stencil but buffer is not enabled!");
- use_depth_fb = true;
- clear_mask |= GL_STENCIL_BUFFER_BIT;
+ use_stencil = true;
clear_state.stencil.test_enabled = true;
}
- if (!use_color_fb && !use_depth_fb) {
+ if (!use_color && !use_depth && !use_stencil) {
// No color surface nor depth/stencil surface are enabled
return;
}
- if (clear_mask == 0) {
- // No clear mask is enabled
- return;
- }
-
ScopeAcquireGLContext acquire_context{emu_window};
- auto [dirty_color_surface, dirty_depth_surface] =
- ConfigureFramebuffers(use_color_fb, use_depth_fb, false);
+ ConfigureFramebuffers(use_color, use_depth || use_stencil, false,
+ regs.clear_buffers.RT.Value());
clear_state.Apply();
- glClearColor(regs.clear_color[0], regs.clear_color[1], regs.clear_color[2],
- regs.clear_color[3]);
- glClearDepth(regs.clear_depth);
- glClearStencil(regs.clear_stencil);
-
- glClear(clear_mask);
-}
-
-std::pair<u8*, GLintptr> RasterizerOpenGL::AlignBuffer(u8* buffer_ptr, GLintptr buffer_offset,
- size_t alignment) {
- // Align the offset, not the mapped pointer
- GLintptr offset_aligned =
- static_cast<GLintptr>(Common::AlignUp(static_cast<size_t>(buffer_offset), alignment));
- return {buffer_ptr + (offset_aligned - buffer_offset), offset_aligned};
-}
-
-std::tuple<u8*, GLintptr, GLintptr> RasterizerOpenGL::UploadMemory(u8* buffer_ptr,
- GLintptr buffer_offset,
- Tegra::GPUVAddr gpu_addr,
- size_t size, size_t alignment) {
- std::tie(buffer_ptr, buffer_offset) = AlignBuffer(buffer_ptr, buffer_offset, alignment);
- GLintptr uploaded_offset = buffer_offset;
-
- auto& memory_manager = Core::System::GetInstance().GPU().MemoryManager();
- const boost::optional<VAddr> cpu_addr{memory_manager.GpuToCpuAddress(gpu_addr)};
- Memory::ReadBlock(*cpu_addr, buffer_ptr, size);
-
- buffer_ptr += size;
- buffer_offset += size;
+ if (use_color) {
+ glClearBufferfv(GL_COLOR, regs.clear_buffers.RT, regs.clear_color);
+ }
- return {buffer_ptr, buffer_offset, uploaded_offset};
+ if (use_depth && use_stencil) {
+ glClearBufferfi(GL_DEPTH_STENCIL, 0, regs.clear_depth, regs.clear_stencil);
+ } else if (use_depth) {
+ glClearBufferfv(GL_DEPTH, 0, &regs.clear_depth);
+ } else if (use_stencil) {
+ glClearBufferiv(GL_STENCIL, 0, &regs.clear_stencil);
+ }
}
void RasterizerOpenGL::DrawArrays() {
@@ -464,12 +438,12 @@ void RasterizerOpenGL::DrawArrays() {
return;
MICROPROFILE_SCOPE(OpenGL_Drawing);
- const auto& regs = Core::System::GetInstance().GPU().Maxwell3D().regs;
+ const auto& gpu = Core::System::GetInstance().GPU().Maxwell3D();
+ const auto& regs = gpu.regs;
ScopeAcquireGLContext acquire_context{emu_window};
- auto [dirty_color_surface, dirty_depth_surface] =
- ConfigureFramebuffers(true, regs.zeta.Address() != 0 && regs.zeta_enable != 0, true);
+ ConfigureFramebuffers();
SyncDepthTestState();
SyncStencilTestState();
@@ -482,43 +456,46 @@ void RasterizerOpenGL::DrawArrays() {
// Draw the vertex batch
const bool is_indexed = accelerate_draw == AccelDraw::Indexed;
- const u64 index_buffer_size{regs.index_array.count * regs.index_array.FormatSizeInBytes()};
+ const u64 index_buffer_size{static_cast<u64>(regs.index_array.count) *
+ static_cast<u64>(regs.index_array.FormatSizeInBytes())};
- state.draw.vertex_buffer = stream_buffer.GetHandle();
+ state.draw.vertex_buffer = buffer_cache.GetHandle();
state.Apply();
- size_t buffer_size = CalculateVertexArraysSize();
+ std::size_t buffer_size = CalculateVertexArraysSize();
if (is_indexed) {
- buffer_size = Common::AlignUp<size_t>(buffer_size, 4) + index_buffer_size;
+ buffer_size = Common::AlignUp<std::size_t>(buffer_size, 4) + index_buffer_size;
}
// Uniform space for the 5 shader stages
buffer_size =
- Common::AlignUp<size_t>(buffer_size, 4) +
+ Common::AlignUp<std::size_t>(buffer_size, 4) +
(sizeof(GLShader::MaxwellUniformData) + uniform_buffer_alignment) * Maxwell::MaxShaderStage;
// Add space for at least 18 constant buffers
buffer_size += Maxwell::MaxConstBuffers * (MaxConstbufferSize + uniform_buffer_alignment);
- u8* buffer_ptr;
- GLintptr buffer_offset;
- std::tie(buffer_ptr, buffer_offset, std::ignore) =
- stream_buffer.Map(static_cast<GLsizeiptr>(buffer_size), 4);
- u8* buffer_ptr_base = buffer_ptr;
+ buffer_cache.Map(buffer_size);
- std::tie(buffer_ptr, buffer_offset) = SetupVertexArrays(buffer_ptr, buffer_offset);
+ SetupVertexArrays();
// If indexed mode, copy the index buffer
GLintptr index_buffer_offset = 0;
if (is_indexed) {
- std::tie(buffer_ptr, buffer_offset, index_buffer_offset) = UploadMemory(
- buffer_ptr, buffer_offset, regs.index_array.StartAddress(), index_buffer_size);
+ MICROPROFILE_SCOPE(OpenGL_Index);
+
+ // Adjust the index buffer offset so it points to the first desired index.
+ auto index_start = regs.index_array.StartAddress();
+ index_start += static_cast<size_t>(regs.index_array.first) *
+ static_cast<size_t>(regs.index_array.FormatSizeInBytes());
+
+ index_buffer_offset = buffer_cache.UploadMemory(index_start, index_buffer_size);
}
- std::tie(buffer_ptr, buffer_offset) = SetupShaders(buffer_ptr, buffer_offset);
+ SetupShaders();
- stream_buffer.Unmap(buffer_ptr - buffer_ptr_base);
+ buffer_cache.Unmap();
shader_program_manager->ApplyTo(state);
state.Apply();
@@ -527,14 +504,26 @@ void RasterizerOpenGL::DrawArrays() {
if (is_indexed) {
const GLint base_vertex{static_cast<GLint>(regs.vb_element_base)};
- // Adjust the index buffer offset so it points to the first desired index.
- index_buffer_offset += regs.index_array.first * regs.index_array.FormatSizeInBytes();
-
- glDrawElementsBaseVertex(primitive_mode, regs.index_array.count,
- MaxwellToGL::IndexFormat(regs.index_array.format),
- reinterpret_cast<const void*>(index_buffer_offset), base_vertex);
+ if (gpu.state.current_instance > 0) {
+ glDrawElementsInstancedBaseVertexBaseInstance(
+ primitive_mode, regs.index_array.count,
+ MaxwellToGL::IndexFormat(regs.index_array.format),
+ reinterpret_cast<const void*>(index_buffer_offset), 1, base_vertex,
+ gpu.state.current_instance);
+ } else {
+ glDrawElementsBaseVertex(primitive_mode, regs.index_array.count,
+ MaxwellToGL::IndexFormat(regs.index_array.format),
+ reinterpret_cast<const void*>(index_buffer_offset),
+ base_vertex);
+ }
} else {
- glDrawArrays(primitive_mode, regs.vertex_buffer.first, regs.vertex_buffer.count);
+ if (gpu.state.current_instance > 0) {
+ glDrawArraysInstancedBaseInstance(primitive_mode, regs.vertex_buffer.first,
+ regs.vertex_buffer.count, 1,
+ gpu.state.current_instance);
+ } else {
+ glDrawArrays(primitive_mode, regs.vertex_buffer.first, regs.vertex_buffer.count);
+ }
}
// Disable scissor test
@@ -549,24 +538,18 @@ void RasterizerOpenGL::DrawArrays() {
state.Apply();
}
-void RasterizerOpenGL::NotifyMaxwellRegisterChanged(u32 method) {}
+void RasterizerOpenGL::FlushAll() {}
-void RasterizerOpenGL::FlushAll() {
- MICROPROFILE_SCOPE(OpenGL_CacheManagement);
-}
-
-void RasterizerOpenGL::FlushRegion(VAddr addr, u64 size) {
- MICROPROFILE_SCOPE(OpenGL_CacheManagement);
-}
+void RasterizerOpenGL::FlushRegion(VAddr addr, u64 size) {}
void RasterizerOpenGL::InvalidateRegion(VAddr addr, u64 size) {
MICROPROFILE_SCOPE(OpenGL_CacheManagement);
res_cache.InvalidateRegion(addr, size);
shader_cache.InvalidateRegion(addr, size);
+ buffer_cache.InvalidateRegion(addr, size);
}
void RasterizerOpenGL::FlushAndInvalidateRegion(VAddr addr, u64 size) {
- MICROPROFILE_SCOPE(OpenGL_CacheManagement);
InvalidateRegion(addr, size);
}
@@ -614,7 +597,7 @@ bool RasterizerOpenGL::AccelerateDisplay(const Tegra::FramebufferConfig& config,
void RasterizerOpenGL::SamplerInfo::Create() {
sampler.Create();
mag_filter = min_filter = Tegra::Texture::TextureFilter::Linear;
- wrap_u = wrap_v = Tegra::Texture::WrapMode::Wrap;
+ wrap_u = wrap_v = wrap_p = Tegra::Texture::WrapMode::Wrap;
// default is GL_LINEAR_MIPMAP_LINEAR
glSamplerParameteri(sampler.handle, GL_TEXTURE_MIN_FILTER, GL_LINEAR);
@@ -622,7 +605,7 @@ void RasterizerOpenGL::SamplerInfo::Create() {
}
void RasterizerOpenGL::SamplerInfo::SyncWithConfig(const Tegra::Texture::TSCEntry& config) {
- GLuint s = sampler.handle;
+ const GLuint s = sampler.handle;
if (mag_filter != config.mag_filter) {
mag_filter = config.mag_filter;
@@ -641,8 +624,13 @@ void RasterizerOpenGL::SamplerInfo::SyncWithConfig(const Tegra::Texture::TSCEntr
wrap_v = config.wrap_v;
glSamplerParameteri(s, GL_TEXTURE_WRAP_T, MaxwellToGL::WrapMode(wrap_v));
}
+ if (wrap_p != config.wrap_p) {
+ wrap_p = config.wrap_p;
+ glSamplerParameteri(s, GL_TEXTURE_WRAP_R, MaxwellToGL::WrapMode(wrap_p));
+ }
- if (wrap_u == Tegra::Texture::WrapMode::Border || wrap_v == Tegra::Texture::WrapMode::Border) {
+ if (wrap_u == Tegra::Texture::WrapMode::Border || wrap_v == Tegra::Texture::WrapMode::Border ||
+ wrap_p == Tegra::Texture::WrapMode::Border) {
const GLvec4 new_border_color = {{config.border_color_r, config.border_color_g,
config.border_color_b, config.border_color_a}};
if (border_color != new_border_color) {
@@ -652,26 +640,35 @@ void RasterizerOpenGL::SamplerInfo::SyncWithConfig(const Tegra::Texture::TSCEntr
}
}
-std::tuple<u8*, GLintptr, u32> RasterizerOpenGL::SetupConstBuffers(u8* buffer_ptr,
- GLintptr buffer_offset,
- Maxwell::ShaderStage stage,
- Shader& shader,
- u32 current_bindpoint) {
+u32 RasterizerOpenGL::SetupConstBuffers(Maxwell::ShaderStage stage, Shader& shader,
+ u32 current_bindpoint) {
+ MICROPROFILE_SCOPE(OpenGL_UBO);
const auto& gpu = Core::System::GetInstance().GPU();
const auto& maxwell3d = gpu.Maxwell3D();
- const auto& shader_stage = maxwell3d.state.shader_stages[static_cast<size_t>(stage)];
+ const auto& shader_stage = maxwell3d.state.shader_stages[static_cast<std::size_t>(stage)];
const auto& entries = shader->GetShaderEntries().const_buffer_entries;
+ constexpr u64 max_binds = Tegra::Engines::Maxwell3D::Regs::MaxConstBuffers;
+ std::array<GLuint, max_binds> bind_buffers;
+ std::array<GLintptr, max_binds> bind_offsets;
+ std::array<GLsizeiptr, max_binds> bind_sizes;
+
+ ASSERT_MSG(entries.size() <= max_binds, "Exceeded expected number of binding points.");
+
// Upload only the enabled buffers from the 16 constbuffers of each shader stage
for (u32 bindpoint = 0; bindpoint < entries.size(); ++bindpoint) {
const auto& used_buffer = entries[bindpoint];
const auto& buffer = shader_stage.const_buffers[used_buffer.GetIndex()];
if (!buffer.enabled) {
+ // With disabled buffers set values as zero to unbind them
+ bind_buffers[bindpoint] = 0;
+ bind_offsets[bindpoint] = 0;
+ bind_sizes[bindpoint] = 0;
continue;
}
- size_t size = 0;
+ std::size_t size = 0;
if (used_buffer.IsIndirect()) {
// Buffer is accessed indirectly, so upload the entire thing
@@ -692,26 +689,28 @@ std::tuple<u8*, GLintptr, u32> RasterizerOpenGL::SetupConstBuffers(u8* buffer_pt
size = Common::AlignUp(size, sizeof(GLvec4));
ASSERT_MSG(size <= MaxConstbufferSize, "Constbuffer too big");
- GLintptr const_buffer_offset;
- std::tie(buffer_ptr, buffer_offset, const_buffer_offset) =
- UploadMemory(buffer_ptr, buffer_offset, buffer.address, size,
- static_cast<size_t>(uniform_buffer_alignment));
-
- glBindBufferRange(GL_UNIFORM_BUFFER, current_bindpoint + bindpoint,
- stream_buffer.GetHandle(), const_buffer_offset, size);
+ GLintptr const_buffer_offset = buffer_cache.UploadMemory(
+ buffer.address, size, static_cast<std::size_t>(uniform_buffer_alignment));
// Now configure the bindpoint of the buffer inside the shader
glUniformBlockBinding(shader->GetProgramHandle(),
- shader->GetProgramResourceIndex(used_buffer.GetName()),
+ shader->GetProgramResourceIndex(used_buffer),
current_bindpoint + bindpoint);
+
+ // Prepare values for multibind
+ bind_buffers[bindpoint] = buffer_cache.GetHandle();
+ bind_offsets[bindpoint] = const_buffer_offset;
+ bind_sizes[bindpoint] = size;
}
- state.Apply();
+ glBindBuffersRange(GL_UNIFORM_BUFFER, current_bindpoint, static_cast<GLsizei>(entries.size()),
+ bind_buffers.data(), bind_offsets.data(), bind_sizes.data());
- return {buffer_ptr, buffer_offset, current_bindpoint + static_cast<u32>(entries.size())};
+ return current_bindpoint + static_cast<u32>(entries.size());
}
u32 RasterizerOpenGL::SetupTextures(Maxwell::ShaderStage stage, Shader& shader, u32 current_unit) {
+ MICROPROFILE_SCOPE(OpenGL_Texture);
const auto& gpu = Core::System::GetInstance().GPU();
const auto& maxwell3d = gpu.Maxwell3D();
const auto& entries = shader->GetShaderEntries().texture_samplers;
@@ -721,24 +720,25 @@ u32 RasterizerOpenGL::SetupTextures(Maxwell::ShaderStage stage, Shader& shader,
for (u32 bindpoint = 0; bindpoint < entries.size(); ++bindpoint) {
const auto& entry = entries[bindpoint];
- u32 current_bindpoint = current_unit + bindpoint;
+ const u32 current_bindpoint = current_unit + bindpoint;
// Bind the uniform to the sampler.
- glProgramUniform1i(shader->GetProgramHandle(), shader->GetUniformLocation(entry.GetName()),
+ glProgramUniform1i(shader->GetProgramHandle(), shader->GetUniformLocation(entry),
current_bindpoint);
const auto texture = maxwell3d.GetStageTexture(entry.GetStage(), entry.GetOffset());
if (!texture.enabled) {
- state.texture_units[current_bindpoint].texture_2d = 0;
+ state.texture_units[current_bindpoint].texture = 0;
continue;
}
texture_samplers[current_bindpoint].SyncWithConfig(texture.tsc);
Surface surface = res_cache.GetTextureSurface(texture);
if (surface != nullptr) {
- state.texture_units[current_bindpoint].texture_2d = surface->Texture().handle;
+ state.texture_units[current_bindpoint].texture = surface->Texture().handle;
+ state.texture_units[current_bindpoint].target = surface->Target();
state.texture_units[current_bindpoint].swizzle.r =
MaxwellToGL::SwizzleSource(texture.tic.x_source);
state.texture_units[current_bindpoint].swizzle.g =
@@ -749,47 +749,19 @@ u32 RasterizerOpenGL::SetupTextures(Maxwell::ShaderStage stage, Shader& shader,
MaxwellToGL::SwizzleSource(texture.tic.w_source);
} else {
// Can occur when texture addr is null or its memory is unmapped/invalid
- state.texture_units[current_bindpoint].texture_2d = 0;
+ state.texture_units[current_bindpoint].texture = 0;
}
}
- state.Apply();
-
return current_unit + static_cast<u32>(entries.size());
}
-void RasterizerOpenGL::BindFramebufferSurfaces(const Surface& color_surface,
- const Surface& depth_surface, bool has_stencil) {
- state.draw.draw_framebuffer = framebuffer.handle;
- state.Apply();
-
- glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D,
- color_surface != nullptr ? color_surface->Texture().handle : 0, 0);
- if (depth_surface != nullptr) {
- if (has_stencil) {
- // attach both depth and stencil
- glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D,
- depth_surface->Texture().handle, 0);
- } else {
- // attach depth
- glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_DEPTH_ATTACHMENT, GL_TEXTURE_2D,
- depth_surface->Texture().handle, 0);
- // clear stencil attachment
- glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_STENCIL_ATTACHMENT, GL_TEXTURE_2D, 0, 0);
- }
- } else {
- // clear both depth and stencil attachment
- glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D, 0,
- 0);
- }
-}
-
-void RasterizerOpenGL::SyncViewport(const MathUtil::Rectangle<u32>& surfaces_rect) {
+void RasterizerOpenGL::SyncViewport() {
const auto& regs = Core::System::GetInstance().GPU().Maxwell3D().regs;
const MathUtil::Rectangle<s32> viewport_rect{regs.viewport_transform[0].GetRect()};
- state.viewport.x = static_cast<GLint>(surfaces_rect.left) + viewport_rect.left;
- state.viewport.y = static_cast<GLint>(surfaces_rect.bottom) + viewport_rect.bottom;
+ state.viewport.x = viewport_rect.left;
+ state.viewport.y = viewport_rect.bottom;
state.viewport.width = static_cast<GLsizei>(viewport_rect.GetWidth());
state.viewport.height = static_cast<GLsizei>(viewport_rect.GetHeight());
}