diff options
Diffstat (limited to 'src/video_core')
-rw-r--r-- | src/video_core/command_processor.cpp | 4 | ||||
-rw-r--r-- | src/video_core/debug_utils/debug_utils.cpp | 8 | ||||
-rw-r--r-- | src/video_core/rasterizer.cpp | 3 | ||||
-rw-r--r-- | src/video_core/renderer_opengl/gl_rasterizer.cpp | 14 | ||||
-rw-r--r-- | src/video_core/renderer_opengl/gl_rasterizer_cache.cpp | 23 | ||||
-rw-r--r-- | src/video_core/renderer_opengl/gl_rasterizer_cache.h | 4 | ||||
-rw-r--r-- | src/video_core/shader/shader.cpp | 5 | ||||
-rw-r--r-- | src/video_core/shader/shader_jit_x64.cpp | 8 |
8 files changed, 55 insertions, 14 deletions
diff --git a/src/video_core/command_processor.cpp b/src/video_core/command_processor.cpp index d82e20f86..a78985510 100644 --- a/src/video_core/command_processor.cpp +++ b/src/video_core/command_processor.cpp @@ -4,6 +4,7 @@ #include <boost/range/algorithm/fill.hpp> +#include "common/microprofile.h" #include "common/profiler.h" #include "core/hle/service/gsp_gpu.h" @@ -43,6 +44,8 @@ static const u32 expand_bits_to_bytes[] = { 0xffff0000, 0xffff00ff, 0xffffff00, 0xffffffff }; +MICROPROFILE_DEFINE(GPU_Drawing, "GPU", "Drawing", MP_RGB(50, 50, 240)); + static void WritePicaReg(u32 id, u32 value, u32 mask) { auto& regs = g_state.regs; @@ -126,6 +129,7 @@ static void WritePicaReg(u32 id, u32 value, u32 mask) { case PICA_REG_INDEX(trigger_draw_indexed): { Common::Profiling::ScopeTimer scope_timer(category_drawing); + MICROPROFILE_SCOPE(GPU_Drawing); #if PICA_LOG_TEV DebugUtils::DumpTevStageConfig(regs.GetTevStages()); diff --git a/src/video_core/debug_utils/debug_utils.cpp b/src/video_core/debug_utils/debug_utils.cpp index 8ad77f0c8..059445f7d 100644 --- a/src/video_core/debug_utils/debug_utils.cpp +++ b/src/video_core/debug_utils/debug_utils.cpp @@ -25,6 +25,8 @@ #include "common/math_util.h" #include "common/vector_math.h" +#include "core/settings.h" + #include "video_core/pica.h" #include "video_core/renderer_base.h" #include "video_core/utils.h" @@ -45,8 +47,10 @@ void DebugContext::OnEvent(Event event, void* data) { { std::unique_lock<std::mutex> lock(breakpoint_mutex); - // Commit the hardware renderer's framebuffer so it will show on debug widgets - VideoCore::g_renderer->hw_rasterizer->CommitFramebuffer(); + if (Settings::values.use_hw_renderer) { + // Commit the hardware renderer's framebuffer so it will show on debug widgets + VideoCore::g_renderer->hw_rasterizer->CommitFramebuffer(); + } // TODO: Should stop the CPU thread here once we multithread emulation. diff --git a/src/video_core/rasterizer.cpp b/src/video_core/rasterizer.cpp index b83798b0f..4a159da8e 100644 --- a/src/video_core/rasterizer.cpp +++ b/src/video_core/rasterizer.cpp @@ -7,6 +7,7 @@ #include "common/color.h" #include "common/common_types.h" #include "common/math_util.h" +#include "common/microprofile.h" #include "common/profiler.h" #include "core/hw/gpu.h" @@ -267,6 +268,7 @@ static int SignedArea (const Math::Vec2<Fix12P4>& vtx1, }; static Common::Profiling::TimingCategory rasterization_category("Rasterization"); +MICROPROFILE_DEFINE(GPU_Rasterization, "GPU", "Rasterization", MP_RGB(50, 50, 240)); /** * Helper function for ProcessTriangle with the "reversed" flag to allow for implementing @@ -279,6 +281,7 @@ static void ProcessTriangleInternal(const Shader::OutputVertex& v0, { const auto& regs = g_state.regs; Common::Profiling::ScopeTimer timer(rasterization_category); + MICROPROFILE_SCOPE(GPU_Rasterization); // vertex positions in rasterizer coordinates static auto FloatToFix = [](float24 flt) { diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp index 9f1552adf..f0ccc2397 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp @@ -7,6 +7,7 @@ #include "common/color.h" #include "common/math_util.h" +#include "common/microprofile.h" #include "common/profiler.h" #include "core/hw/gpu.h" @@ -230,8 +231,8 @@ void RasterizerOpenGL::DrawTriangles() { u32 cur_fb_depth_size = Pica::Regs::BytesPerDepthPixel(regs.framebuffer.depth_format) * regs.framebuffer.GetWidth() * regs.framebuffer.GetHeight(); - res_cache.NotifyFlush(cur_fb_color_addr, cur_fb_color_size); - res_cache.NotifyFlush(cur_fb_depth_addr, cur_fb_depth_size); + res_cache.NotifyFlush(cur_fb_color_addr, cur_fb_color_size, true); + res_cache.NotifyFlush(cur_fb_depth_addr, cur_fb_depth_size, true); } void RasterizerOpenGL::CommitFramebuffer() { @@ -777,12 +778,16 @@ void RasterizerOpenGL::SyncDrawState() { state.Apply(); } +MICROPROFILE_DEFINE(OpenGL_FramebufferReload, "OpenGL", "FB Reload", MP_RGB(70, 70, 200)); + void RasterizerOpenGL::ReloadColorBuffer() { u8* color_buffer = Memory::GetPhysicalPointer(Pica::g_state.regs.framebuffer.GetColorBufferPhysicalAddress()); if (color_buffer == nullptr) return; + MICROPROFILE_SCOPE(OpenGL_FramebufferReload); + u32 bytes_per_pixel = Pica::Regs::BytesPerColorPixel(fb_color_texture.format); std::unique_ptr<u8[]> temp_fb_color_buffer(new u8[fb_color_texture.width * fb_color_texture.height * bytes_per_pixel]); @@ -822,6 +827,8 @@ void RasterizerOpenGL::ReloadDepthBuffer() { if (depth_buffer == nullptr) return; + MICROPROFILE_SCOPE(OpenGL_FramebufferReload); + u32 bytes_per_pixel = Pica::Regs::BytesPerDepthPixel(fb_depth_texture.format); // OpenGL needs 4 bpp alignment for D24 @@ -868,6 +875,7 @@ void RasterizerOpenGL::ReloadDepthBuffer() { } Common::Profiling::TimingCategory buffer_commit_category("Framebuffer Commit"); +MICROPROFILE_DEFINE(OpenGL_FramebufferCommit, "OpenGL", "FB Commit", MP_RGB(70, 70, 200)); void RasterizerOpenGL::CommitColorBuffer() { if (last_fb_color_addr != 0) { @@ -875,6 +883,7 @@ void RasterizerOpenGL::CommitColorBuffer() { if (color_buffer != nullptr) { Common::Profiling::ScopeTimer timer(buffer_commit_category); + MICROPROFILE_SCOPE(OpenGL_FramebufferCommit); u32 bytes_per_pixel = Pica::Regs::BytesPerColorPixel(fb_color_texture.format); @@ -911,6 +920,7 @@ void RasterizerOpenGL::CommitDepthBuffer() { if (depth_buffer != nullptr) { Common::Profiling::ScopeTimer timer(buffer_commit_category); + MICROPROFILE_SCOPE(OpenGL_FramebufferCommit); u32 bytes_per_pixel = Pica::Regs::BytesPerDepthPixel(fb_depth_texture.format); diff --git a/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp b/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp index 70f0ba5f1..1e38c2e6d 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp @@ -2,8 +2,10 @@ // Licensed under GPLv2 or any later version // Refer to the license.txt file included. +#include "common/hash.h" #include "common/make_unique.h" #include "common/math_util.h" +#include "common/microprofile.h" #include "common/vector_math.h" #include "core/memory.h" @@ -16,15 +18,18 @@ RasterizerCacheOpenGL::~RasterizerCacheOpenGL() { FullFlush(); } +MICROPROFILE_DEFINE(OpenGL_TextureUpload, "OpenGL", "Texture Upload", MP_RGB(128, 64, 192)); + void RasterizerCacheOpenGL::LoadAndBindTexture(OpenGLState &state, unsigned texture_unit, const Pica::Regs::FullTextureConfig& config) { PAddr texture_addr = config.config.GetPhysicalAddress(); - const auto cached_texture = texture_cache.find(texture_addr); if (cached_texture != texture_cache.end()) { state.texture_units[texture_unit].texture_2d = cached_texture->second->texture.handle; state.Apply(); } else { + MICROPROFILE_SCOPE(OpenGL_TextureUpload); + std::unique_ptr<CachedTexture> new_texture = Common::make_unique<CachedTexture>(); new_texture->texture.Create(); @@ -46,12 +51,14 @@ void RasterizerCacheOpenGL::LoadAndBindTexture(OpenGLState &state, unsigned text } const auto info = Pica::DebugUtils::TextureInfo::FromPicaRegister(config.config, config.format); + u8* texture_src_data = Memory::GetPhysicalPointer(texture_addr); new_texture->width = info.width; new_texture->height = info.height; - new_texture->size = info.width * info.height * Pica::Regs::NibblesPerPixel(info.format); + new_texture->size = info.stride * info.height; + new_texture->addr = texture_addr; + new_texture->hash = Common::ComputeHash64(texture_src_data, new_texture->size); - u8* texture_src_data = Memory::GetPhysicalPointer(texture_addr); std::unique_ptr<Math::Vec4<u8>[]> temp_texture_buffer_rgba(new Math::Vec4<u8>[info.width * info.height]); for (int y = 0; y < info.height; ++y) { @@ -66,12 +73,18 @@ void RasterizerCacheOpenGL::LoadAndBindTexture(OpenGLState &state, unsigned text } } -void RasterizerCacheOpenGL::NotifyFlush(PAddr addr, u32 size) { +void RasterizerCacheOpenGL::NotifyFlush(PAddr addr, u32 size, bool ignore_hash) { // Flush any texture that falls in the flushed region // TODO: Optimize by also inserting upper bound (addr + size) of each texture into the same map and also narrow using lower_bound auto cache_upper_bound = texture_cache.upper_bound(addr + size); + for (auto it = texture_cache.begin(); it != cache_upper_bound;) { - if (MathUtil::IntervalsIntersect(addr, size, it->first, it->second->size)) { + const auto& info = *it->second; + + // Flush the texture only if the memory region intersects and a change is detected + if (MathUtil::IntervalsIntersect(addr, size, info.addr, info.size) && + (ignore_hash || info.hash != Common::ComputeHash64(Memory::GetPhysicalPointer(info.addr), info.size))) { + it = texture_cache.erase(it); } else { ++it; diff --git a/src/video_core/renderer_opengl/gl_rasterizer_cache.h b/src/video_core/renderer_opengl/gl_rasterizer_cache.h index 96f3a925c..d8f9edf59 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer_cache.h +++ b/src/video_core/renderer_opengl/gl_rasterizer_cache.h @@ -19,7 +19,7 @@ public: void LoadAndBindTexture(OpenGLState &state, unsigned texture_unit, const Pica::Regs::FullTextureConfig& config); /// Flush any cached resource that touches the flushed region - void NotifyFlush(PAddr addr, u32 size); + void NotifyFlush(PAddr addr, u32 size, bool ignore_hash = false); /// Flush all cached OpenGL resources tracked by this cache manager void FullFlush(); @@ -30,6 +30,8 @@ private: GLuint width; GLuint height; u32 size; + u64 hash; + PAddr addr; }; std::map<PAddr, std::unique_ptr<CachedTexture>> texture_cache; diff --git a/src/video_core/shader/shader.cpp b/src/video_core/shader/shader.cpp index 4e9836c80..f89117521 100644 --- a/src/video_core/shader/shader.cpp +++ b/src/video_core/shader/shader.cpp @@ -9,6 +9,7 @@ #include "common/hash.h" #include "common/make_unique.h" +#include "common/microprofile.h" #include "common/profiler.h" #include "video_core/debug_utils/debug_utils.h" @@ -51,15 +52,19 @@ void Setup(UnitState<false>& state) { } void Shutdown() { +#ifdef ARCHITECTURE_x86_64 shader_map.clear(); +#endif // ARCHITECTURE_x86_64 } static Common::Profiling::TimingCategory shader_category("Vertex Shader"); +MICROPROFILE_DEFINE(GPU_VertexShader, "GPU", "Vertex Shader", MP_RGB(50, 50, 240)); OutputVertex Run(UnitState<false>& state, const InputVertex& input, int num_attributes) { auto& config = g_state.regs.vs; Common::Profiling::ScopeTimer timer(shader_category); + MICROPROFILE_SCOPE(GPU_VertexShader); state.program_counter = config.main_offset; state.debug.max_offset = 0; diff --git a/src/video_core/shader/shader_jit_x64.cpp b/src/video_core/shader/shader_jit_x64.cpp index c8a669b51..d3cfe109e 100644 --- a/src/video_core/shader/shader_jit_x64.cpp +++ b/src/video_core/shader/shader_jit_x64.cpp @@ -493,8 +493,8 @@ void JitCompiler::Compile_MOVA(Instruction instr) { Compile_SwizzleSrc(instr, 1, instr.common.src1, SRC1); - // Convert floats to integers (only care about X and Y components) - CVTPS2DQ(SRC1, R(SRC1)); + // Convert floats to integers using truncation (only care about X and Y components) + CVTTPS2DQ(SRC1, R(SRC1)); // Get result MOVQ_xmm(R(RAX), SRC1); @@ -768,12 +768,12 @@ CompiledShader* JitCompiler::Compile() { // Used to set a register to one static const __m128 one = { 1.f, 1.f, 1.f, 1.f }; MOV(PTRBITS, R(RAX), ImmPtr(&one)); - MOVAPS(ONE, MDisp(RAX, 0)); + MOVAPS(ONE, MatR(RAX)); // Used to negate registers static const __m128 neg = { -0.f, -0.f, -0.f, -0.f }; MOV(PTRBITS, R(RAX), ImmPtr(&neg)); - MOVAPS(NEGBIT, MDisp(RAX, 0)); + MOVAPS(NEGBIT, MatR(RAX)); looping = false; |