summaryrefslogtreecommitdiffstats
path: root/src/video_core
diff options
context:
space:
mode:
Diffstat (limited to 'src/video_core')
-rw-r--r--src/video_core/command_processor.cpp4
-rw-r--r--src/video_core/debug_utils/debug_utils.cpp8
-rw-r--r--src/video_core/rasterizer.cpp3
-rw-r--r--src/video_core/renderer_opengl/gl_rasterizer.cpp14
-rw-r--r--src/video_core/renderer_opengl/gl_rasterizer_cache.cpp23
-rw-r--r--src/video_core/renderer_opengl/gl_rasterizer_cache.h4
-rw-r--r--src/video_core/shader/shader.cpp5
-rw-r--r--src/video_core/shader/shader_jit_x64.cpp8
8 files changed, 55 insertions, 14 deletions
diff --git a/src/video_core/command_processor.cpp b/src/video_core/command_processor.cpp
index d82e20f86..a78985510 100644
--- a/src/video_core/command_processor.cpp
+++ b/src/video_core/command_processor.cpp
@@ -4,6 +4,7 @@
#include <boost/range/algorithm/fill.hpp>
+#include "common/microprofile.h"
#include "common/profiler.h"
#include "core/hle/service/gsp_gpu.h"
@@ -43,6 +44,8 @@ static const u32 expand_bits_to_bytes[] = {
0xffff0000, 0xffff00ff, 0xffffff00, 0xffffffff
};
+MICROPROFILE_DEFINE(GPU_Drawing, "GPU", "Drawing", MP_RGB(50, 50, 240));
+
static void WritePicaReg(u32 id, u32 value, u32 mask) {
auto& regs = g_state.regs;
@@ -126,6 +129,7 @@ static void WritePicaReg(u32 id, u32 value, u32 mask) {
case PICA_REG_INDEX(trigger_draw_indexed):
{
Common::Profiling::ScopeTimer scope_timer(category_drawing);
+ MICROPROFILE_SCOPE(GPU_Drawing);
#if PICA_LOG_TEV
DebugUtils::DumpTevStageConfig(regs.GetTevStages());
diff --git a/src/video_core/debug_utils/debug_utils.cpp b/src/video_core/debug_utils/debug_utils.cpp
index 8ad77f0c8..059445f7d 100644
--- a/src/video_core/debug_utils/debug_utils.cpp
+++ b/src/video_core/debug_utils/debug_utils.cpp
@@ -25,6 +25,8 @@
#include "common/math_util.h"
#include "common/vector_math.h"
+#include "core/settings.h"
+
#include "video_core/pica.h"
#include "video_core/renderer_base.h"
#include "video_core/utils.h"
@@ -45,8 +47,10 @@ void DebugContext::OnEvent(Event event, void* data) {
{
std::unique_lock<std::mutex> lock(breakpoint_mutex);
- // Commit the hardware renderer's framebuffer so it will show on debug widgets
- VideoCore::g_renderer->hw_rasterizer->CommitFramebuffer();
+ if (Settings::values.use_hw_renderer) {
+ // Commit the hardware renderer's framebuffer so it will show on debug widgets
+ VideoCore::g_renderer->hw_rasterizer->CommitFramebuffer();
+ }
// TODO: Should stop the CPU thread here once we multithread emulation.
diff --git a/src/video_core/rasterizer.cpp b/src/video_core/rasterizer.cpp
index b83798b0f..4a159da8e 100644
--- a/src/video_core/rasterizer.cpp
+++ b/src/video_core/rasterizer.cpp
@@ -7,6 +7,7 @@
#include "common/color.h"
#include "common/common_types.h"
#include "common/math_util.h"
+#include "common/microprofile.h"
#include "common/profiler.h"
#include "core/hw/gpu.h"
@@ -267,6 +268,7 @@ static int SignedArea (const Math::Vec2<Fix12P4>& vtx1,
};
static Common::Profiling::TimingCategory rasterization_category("Rasterization");
+MICROPROFILE_DEFINE(GPU_Rasterization, "GPU", "Rasterization", MP_RGB(50, 50, 240));
/**
* Helper function for ProcessTriangle with the "reversed" flag to allow for implementing
@@ -279,6 +281,7 @@ static void ProcessTriangleInternal(const Shader::OutputVertex& v0,
{
const auto& regs = g_state.regs;
Common::Profiling::ScopeTimer timer(rasterization_category);
+ MICROPROFILE_SCOPE(GPU_Rasterization);
// vertex positions in rasterizer coordinates
static auto FloatToFix = [](float24 flt) {
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp
index 9f1552adf..f0ccc2397 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer.cpp
+++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp
@@ -7,6 +7,7 @@
#include "common/color.h"
#include "common/math_util.h"
+#include "common/microprofile.h"
#include "common/profiler.h"
#include "core/hw/gpu.h"
@@ -230,8 +231,8 @@ void RasterizerOpenGL::DrawTriangles() {
u32 cur_fb_depth_size = Pica::Regs::BytesPerDepthPixel(regs.framebuffer.depth_format)
* regs.framebuffer.GetWidth() * regs.framebuffer.GetHeight();
- res_cache.NotifyFlush(cur_fb_color_addr, cur_fb_color_size);
- res_cache.NotifyFlush(cur_fb_depth_addr, cur_fb_depth_size);
+ res_cache.NotifyFlush(cur_fb_color_addr, cur_fb_color_size, true);
+ res_cache.NotifyFlush(cur_fb_depth_addr, cur_fb_depth_size, true);
}
void RasterizerOpenGL::CommitFramebuffer() {
@@ -777,12 +778,16 @@ void RasterizerOpenGL::SyncDrawState() {
state.Apply();
}
+MICROPROFILE_DEFINE(OpenGL_FramebufferReload, "OpenGL", "FB Reload", MP_RGB(70, 70, 200));
+
void RasterizerOpenGL::ReloadColorBuffer() {
u8* color_buffer = Memory::GetPhysicalPointer(Pica::g_state.regs.framebuffer.GetColorBufferPhysicalAddress());
if (color_buffer == nullptr)
return;
+ MICROPROFILE_SCOPE(OpenGL_FramebufferReload);
+
u32 bytes_per_pixel = Pica::Regs::BytesPerColorPixel(fb_color_texture.format);
std::unique_ptr<u8[]> temp_fb_color_buffer(new u8[fb_color_texture.width * fb_color_texture.height * bytes_per_pixel]);
@@ -822,6 +827,8 @@ void RasterizerOpenGL::ReloadDepthBuffer() {
if (depth_buffer == nullptr)
return;
+ MICROPROFILE_SCOPE(OpenGL_FramebufferReload);
+
u32 bytes_per_pixel = Pica::Regs::BytesPerDepthPixel(fb_depth_texture.format);
// OpenGL needs 4 bpp alignment for D24
@@ -868,6 +875,7 @@ void RasterizerOpenGL::ReloadDepthBuffer() {
}
Common::Profiling::TimingCategory buffer_commit_category("Framebuffer Commit");
+MICROPROFILE_DEFINE(OpenGL_FramebufferCommit, "OpenGL", "FB Commit", MP_RGB(70, 70, 200));
void RasterizerOpenGL::CommitColorBuffer() {
if (last_fb_color_addr != 0) {
@@ -875,6 +883,7 @@ void RasterizerOpenGL::CommitColorBuffer() {
if (color_buffer != nullptr) {
Common::Profiling::ScopeTimer timer(buffer_commit_category);
+ MICROPROFILE_SCOPE(OpenGL_FramebufferCommit);
u32 bytes_per_pixel = Pica::Regs::BytesPerColorPixel(fb_color_texture.format);
@@ -911,6 +920,7 @@ void RasterizerOpenGL::CommitDepthBuffer() {
if (depth_buffer != nullptr) {
Common::Profiling::ScopeTimer timer(buffer_commit_category);
+ MICROPROFILE_SCOPE(OpenGL_FramebufferCommit);
u32 bytes_per_pixel = Pica::Regs::BytesPerDepthPixel(fb_depth_texture.format);
diff --git a/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp b/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp
index 70f0ba5f1..1e38c2e6d 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp
@@ -2,8 +2,10 @@
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
+#include "common/hash.h"
#include "common/make_unique.h"
#include "common/math_util.h"
+#include "common/microprofile.h"
#include "common/vector_math.h"
#include "core/memory.h"
@@ -16,15 +18,18 @@ RasterizerCacheOpenGL::~RasterizerCacheOpenGL() {
FullFlush();
}
+MICROPROFILE_DEFINE(OpenGL_TextureUpload, "OpenGL", "Texture Upload", MP_RGB(128, 64, 192));
+
void RasterizerCacheOpenGL::LoadAndBindTexture(OpenGLState &state, unsigned texture_unit, const Pica::Regs::FullTextureConfig& config) {
PAddr texture_addr = config.config.GetPhysicalAddress();
-
const auto cached_texture = texture_cache.find(texture_addr);
if (cached_texture != texture_cache.end()) {
state.texture_units[texture_unit].texture_2d = cached_texture->second->texture.handle;
state.Apply();
} else {
+ MICROPROFILE_SCOPE(OpenGL_TextureUpload);
+
std::unique_ptr<CachedTexture> new_texture = Common::make_unique<CachedTexture>();
new_texture->texture.Create();
@@ -46,12 +51,14 @@ void RasterizerCacheOpenGL::LoadAndBindTexture(OpenGLState &state, unsigned text
}
const auto info = Pica::DebugUtils::TextureInfo::FromPicaRegister(config.config, config.format);
+ u8* texture_src_data = Memory::GetPhysicalPointer(texture_addr);
new_texture->width = info.width;
new_texture->height = info.height;
- new_texture->size = info.width * info.height * Pica::Regs::NibblesPerPixel(info.format);
+ new_texture->size = info.stride * info.height;
+ new_texture->addr = texture_addr;
+ new_texture->hash = Common::ComputeHash64(texture_src_data, new_texture->size);
- u8* texture_src_data = Memory::GetPhysicalPointer(texture_addr);
std::unique_ptr<Math::Vec4<u8>[]> temp_texture_buffer_rgba(new Math::Vec4<u8>[info.width * info.height]);
for (int y = 0; y < info.height; ++y) {
@@ -66,12 +73,18 @@ void RasterizerCacheOpenGL::LoadAndBindTexture(OpenGLState &state, unsigned text
}
}
-void RasterizerCacheOpenGL::NotifyFlush(PAddr addr, u32 size) {
+void RasterizerCacheOpenGL::NotifyFlush(PAddr addr, u32 size, bool ignore_hash) {
// Flush any texture that falls in the flushed region
// TODO: Optimize by also inserting upper bound (addr + size) of each texture into the same map and also narrow using lower_bound
auto cache_upper_bound = texture_cache.upper_bound(addr + size);
+
for (auto it = texture_cache.begin(); it != cache_upper_bound;) {
- if (MathUtil::IntervalsIntersect(addr, size, it->first, it->second->size)) {
+ const auto& info = *it->second;
+
+ // Flush the texture only if the memory region intersects and a change is detected
+ if (MathUtil::IntervalsIntersect(addr, size, info.addr, info.size) &&
+ (ignore_hash || info.hash != Common::ComputeHash64(Memory::GetPhysicalPointer(info.addr), info.size))) {
+
it = texture_cache.erase(it);
} else {
++it;
diff --git a/src/video_core/renderer_opengl/gl_rasterizer_cache.h b/src/video_core/renderer_opengl/gl_rasterizer_cache.h
index 96f3a925c..d8f9edf59 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer_cache.h
+++ b/src/video_core/renderer_opengl/gl_rasterizer_cache.h
@@ -19,7 +19,7 @@ public:
void LoadAndBindTexture(OpenGLState &state, unsigned texture_unit, const Pica::Regs::FullTextureConfig& config);
/// Flush any cached resource that touches the flushed region
- void NotifyFlush(PAddr addr, u32 size);
+ void NotifyFlush(PAddr addr, u32 size, bool ignore_hash = false);
/// Flush all cached OpenGL resources tracked by this cache manager
void FullFlush();
@@ -30,6 +30,8 @@ private:
GLuint width;
GLuint height;
u32 size;
+ u64 hash;
+ PAddr addr;
};
std::map<PAddr, std::unique_ptr<CachedTexture>> texture_cache;
diff --git a/src/video_core/shader/shader.cpp b/src/video_core/shader/shader.cpp
index 4e9836c80..f89117521 100644
--- a/src/video_core/shader/shader.cpp
+++ b/src/video_core/shader/shader.cpp
@@ -9,6 +9,7 @@
#include "common/hash.h"
#include "common/make_unique.h"
+#include "common/microprofile.h"
#include "common/profiler.h"
#include "video_core/debug_utils/debug_utils.h"
@@ -51,15 +52,19 @@ void Setup(UnitState<false>& state) {
}
void Shutdown() {
+#ifdef ARCHITECTURE_x86_64
shader_map.clear();
+#endif // ARCHITECTURE_x86_64
}
static Common::Profiling::TimingCategory shader_category("Vertex Shader");
+MICROPROFILE_DEFINE(GPU_VertexShader, "GPU", "Vertex Shader", MP_RGB(50, 50, 240));
OutputVertex Run(UnitState<false>& state, const InputVertex& input, int num_attributes) {
auto& config = g_state.regs.vs;
Common::Profiling::ScopeTimer timer(shader_category);
+ MICROPROFILE_SCOPE(GPU_VertexShader);
state.program_counter = config.main_offset;
state.debug.max_offset = 0;
diff --git a/src/video_core/shader/shader_jit_x64.cpp b/src/video_core/shader/shader_jit_x64.cpp
index c8a669b51..d3cfe109e 100644
--- a/src/video_core/shader/shader_jit_x64.cpp
+++ b/src/video_core/shader/shader_jit_x64.cpp
@@ -493,8 +493,8 @@ void JitCompiler::Compile_MOVA(Instruction instr) {
Compile_SwizzleSrc(instr, 1, instr.common.src1, SRC1);
- // Convert floats to integers (only care about X and Y components)
- CVTPS2DQ(SRC1, R(SRC1));
+ // Convert floats to integers using truncation (only care about X and Y components)
+ CVTTPS2DQ(SRC1, R(SRC1));
// Get result
MOVQ_xmm(R(RAX), SRC1);
@@ -768,12 +768,12 @@ CompiledShader* JitCompiler::Compile() {
// Used to set a register to one
static const __m128 one = { 1.f, 1.f, 1.f, 1.f };
MOV(PTRBITS, R(RAX), ImmPtr(&one));
- MOVAPS(ONE, MDisp(RAX, 0));
+ MOVAPS(ONE, MatR(RAX));
// Used to negate registers
static const __m128 neg = { -0.f, -0.f, -0.f, -0.f };
MOV(PTRBITS, R(RAX), ImmPtr(&neg));
- MOVAPS(NEGBIT, MDisp(RAX, 0));
+ MOVAPS(NEGBIT, MatR(RAX));
looping = false;