summaryrefslogtreecommitdiffstats
path: root/src
diff options
context:
space:
mode:
Diffstat (limited to '')
-rw-r--r--src/video_core/engines/maxwell_3d.cpp24
-rw-r--r--src/video_core/query_cache.h39
-rw-r--r--src/video_core/renderer_opengl/gl_query_cache.cpp23
-rw-r--r--src/video_core/renderer_opengl/gl_rasterizer.cpp49
-rw-r--r--src/video_core/renderer_opengl/gl_rasterizer.h3
-rw-r--r--src/video_core/renderer_vulkan/vk_rasterizer.cpp4
6 files changed, 93 insertions, 49 deletions
diff --git a/src/video_core/engines/maxwell_3d.cpp b/src/video_core/engines/maxwell_3d.cpp
index 592c28ba3..95ba4f76c 100644
--- a/src/video_core/engines/maxwell_3d.cpp
+++ b/src/video_core/engines/maxwell_3d.cpp
@@ -586,14 +586,22 @@ void Maxwell3D::ProcessQueryCondition() {
}
void Maxwell3D::ProcessCounterReset() {
- switch (regs.clear_report_value) {
- case Regs::ClearReport::ZPassPixelCount:
- rasterizer->ResetCounter(VideoCommon::QueryType::ZPassPixelCount64);
- break;
- default:
- LOG_DEBUG(Render_OpenGL, "Unimplemented counter reset={}", regs.clear_report_value);
- break;
- }
+ const auto query_type = [clear_report = regs.clear_report_value]() {
+ switch (clear_report) {
+ case Tegra::Engines::Maxwell3D::Regs::ClearReport::ZPassPixelCount:
+ return VideoCommon::QueryType::ZPassPixelCount64;
+ case Tegra::Engines::Maxwell3D::Regs::ClearReport::StreamingPrimitivesSucceeded:
+ return VideoCommon::QueryType::StreamingPrimitivesSucceeded;
+ case Tegra::Engines::Maxwell3D::Regs::ClearReport::PrimitivesGenerated:
+ return VideoCommon::QueryType::PrimitivesGenerated;
+ case Tegra::Engines::Maxwell3D::Regs::ClearReport::VtgPrimitivesOut:
+ return VideoCommon::QueryType::VtgPrimitivesOut;
+ default:
+ LOG_DEBUG(HW_GPU, "Unimplemented counter reset={}", clear_report);
+ return VideoCommon::QueryType::Payload;
+ }
+ }();
+ rasterizer->ResetCounter(query_type);
}
void Maxwell3D::ProcessSyncPoint() {
diff --git a/src/video_core/query_cache.h b/src/video_core/query_cache.h
index 9fcaeeac7..a64404ce4 100644
--- a/src/video_core/query_cache.h
+++ b/src/video_core/query_cache.h
@@ -28,8 +28,11 @@
namespace VideoCore {
enum class QueryType {
SamplesPassed,
+ PrimitivesGenerated,
+ TfbPrimitivesWritten,
+ Count,
};
-constexpr std::size_t NumQueryTypes = 1;
+constexpr std::size_t NumQueryTypes = static_cast<size_t>(QueryType::Count);
} // namespace VideoCore
namespace VideoCommon {
@@ -44,15 +47,6 @@ public:
explicit CounterStreamBase(QueryCache& cache_, VideoCore::QueryType type_)
: cache{cache_}, type{type_} {}
- /// Updates the state of the stream, enabling or disabling as needed.
- void Update(bool enabled) {
- if (enabled) {
- Enable();
- } else {
- Disable();
- }
- }
-
/// Resets the stream to zero. It doesn't disable the query after resetting.
void Reset() {
if (current) {
@@ -80,7 +74,6 @@ public:
return current != nullptr;
}
-private:
/// Enables the stream.
void Enable() {
if (current) {
@@ -97,6 +90,7 @@ private:
last = std::exchange(current, nullptr);
}
+private:
QueryCache& cache;
const VideoCore::QueryType type;
@@ -112,8 +106,14 @@ public:
: rasterizer{rasterizer_},
// Use reinterpret_cast instead of static_cast as workaround for
// UBSan bug (https://github.com/llvm/llvm-project/issues/59060)
- cpu_memory{cpu_memory_}, streams{{CounterStream{reinterpret_cast<QueryCache&>(*this),
- VideoCore::QueryType::SamplesPassed}}} {
+ cpu_memory{cpu_memory_}, streams{{
+ {CounterStream{reinterpret_cast<QueryCache&>(*this),
+ VideoCore::QueryType::SamplesPassed}},
+ {CounterStream{reinterpret_cast<QueryCache&>(*this),
+ VideoCore::QueryType::PrimitivesGenerated}},
+ {CounterStream{reinterpret_cast<QueryCache&>(*this),
+ VideoCore::QueryType::TfbPrimitivesWritten}},
+ }} {
(void)slot_async_jobs.insert(); // Null value
}
@@ -157,12 +157,11 @@ public:
AsyncFlushQuery(query, timestamp, lock);
}
- /// Updates counters from GPU state. Expected to be called once per draw, clear or dispatch.
- void UpdateCounters() {
+ /// Enables all available GPU counters
+ void EnableCounters() {
std::unique_lock lock{mutex};
- if (maxwell3d) {
- const auto& regs = maxwell3d->regs;
- Stream(VideoCore::QueryType::SamplesPassed).Update(regs.zpass_pixel_count_enable);
+ for (auto& stream : streams) {
+ stream.Enable();
}
}
@@ -176,7 +175,7 @@ public:
void DisableStreams() {
std::unique_lock lock{mutex};
for (auto& stream : streams) {
- stream.Update(false);
+ stream.Disable();
}
}
@@ -353,7 +352,7 @@ private:
std::shared_ptr<std::vector<AsyncJobId>> uncommitted_flushes{};
std::list<std::shared_ptr<std::vector<AsyncJobId>>> committed_flushes;
-};
+}; // namespace VideoCommon
template <class QueryCache, class HostCounter>
class HostCounterBase {
diff --git a/src/video_core/renderer_opengl/gl_query_cache.cpp b/src/video_core/renderer_opengl/gl_query_cache.cpp
index ec142d48e..fef7360ed 100644
--- a/src/video_core/renderer_opengl/gl_query_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_query_cache.cpp
@@ -18,16 +18,27 @@ namespace OpenGL {
namespace {
-constexpr std::array<GLenum, VideoCore::NumQueryTypes> QueryTargets = {GL_SAMPLES_PASSED};
-
constexpr GLenum GetTarget(VideoCore::QueryType type) {
- return QueryTargets[static_cast<std::size_t>(type)];
+ switch (type) {
+ case VideoCore::QueryType::SamplesPassed:
+ return GL_SAMPLES_PASSED;
+ case VideoCore::QueryType::PrimitivesGenerated:
+ return GL_PRIMITIVES_GENERATED;
+ case VideoCore::QueryType::TfbPrimitivesWritten:
+ return GL_TRANSFORM_FEEDBACK_PRIMITIVES_WRITTEN;
+ default:
+ break;
+ }
+ UNIMPLEMENTED_MSG("Query type {}", type);
+ return 0;
}
} // Anonymous namespace
QueryCache::QueryCache(RasterizerOpenGL& rasterizer_, Core::Memory::Memory& cpu_memory_)
- : QueryCacheLegacy(rasterizer_, cpu_memory_), gl_rasterizer{rasterizer_} {}
+ : QueryCacheLegacy(rasterizer_, cpu_memory_), gl_rasterizer{rasterizer_} {
+ EnableCounters();
+}
QueryCache::~QueryCache() = default;
@@ -103,13 +114,13 @@ u64 CachedQuery::Flush([[maybe_unused]] bool async) {
auto& stream = cache->Stream(type);
const bool slice_counter = WaitPending() && stream.IsEnabled();
if (slice_counter) {
- stream.Update(false);
+ stream.Disable();
}
auto result = VideoCommon::CachedQueryBase<HostCounter>::Flush();
if (slice_counter) {
- stream.Update(true);
+ stream.Enable();
}
return result;
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp
index 279e5a4e0..0545e33c5 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer.cpp
+++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp
@@ -51,6 +51,22 @@ constexpr size_t NUM_SUPPORTED_VERTEX_ATTRIBUTES = 16;
void oglEnable(GLenum cap, bool state) {
(state ? glEnable : glDisable)(cap);
}
+
+std::optional<VideoCore::QueryType> MaxwellToVideoCoreQuery(VideoCommon::QueryType type) {
+ switch (type) {
+ case VideoCommon::QueryType::PrimitivesGenerated:
+ case VideoCommon::QueryType::VtgPrimitivesOut:
+ return VideoCore::QueryType::PrimitivesGenerated;
+ case VideoCommon::QueryType::ZPassPixelCount64:
+ return VideoCore::QueryType::SamplesPassed;
+ case VideoCommon::QueryType::StreamingPrimitivesSucceeded:
+ // case VideoCommon::QueryType::StreamingByteCount:
+ // TODO: StreamingByteCount = StreamingPrimitivesSucceeded * num_verts * vert_stride
+ return VideoCore::QueryType::TfbPrimitivesWritten;
+ default:
+ return std::nullopt;
+ }
+}
} // Anonymous namespace
RasterizerOpenGL::RasterizerOpenGL(Core::Frontend::EmuWindow& emu_window_, Tegra::GPU& gpu_,
@@ -212,7 +228,6 @@ void RasterizerOpenGL::PrepareDraw(bool is_indexed, Func&& draw_func) {
SCOPE_EXIT({ gpu.TickWork(); });
gpu_memory->FlushCaching();
- query_cache.UpdateCounters();
GraphicsPipeline* const pipeline{shader_cache.CurrentGraphicsPipeline()};
if (!pipeline) {
@@ -330,7 +345,6 @@ void RasterizerOpenGL::DrawTexture() {
MICROPROFILE_SCOPE(OpenGL_Drawing);
SCOPE_EXIT({ gpu.TickWork(); });
- query_cache.UpdateCounters();
texture_cache.SynchronizeGraphicsDescriptors();
texture_cache.UpdateRenderTargets(false);
@@ -397,21 +411,28 @@ void RasterizerOpenGL::DispatchCompute() {
}
void RasterizerOpenGL::ResetCounter(VideoCommon::QueryType type) {
- if (type == VideoCommon::QueryType::ZPassPixelCount64) {
- query_cache.ResetCounter(VideoCore::QueryType::SamplesPassed);
+ const auto query_cache_type = MaxwellToVideoCoreQuery(type);
+ if (!query_cache_type.has_value()) {
+ UNIMPLEMENTED_MSG("Reset query type: {}", type);
+ return;
}
+ query_cache.ResetCounter(*query_cache_type);
}
void RasterizerOpenGL::Query(GPUVAddr gpu_addr, VideoCommon::QueryType type,
VideoCommon::QueryPropertiesFlags flags, u32 payload, u32 subreport) {
- if (type == VideoCommon::QueryType::ZPassPixelCount64) {
- if (True(flags & VideoCommon::QueryPropertiesFlags::HasTimeout)) {
- query_cache.Query(gpu_addr, VideoCore::QueryType::SamplesPassed, {gpu.GetTicks()});
- } else {
- query_cache.Query(gpu_addr, VideoCore::QueryType::SamplesPassed, std::nullopt);
- }
- return;
+ const auto query_cache_type = MaxwellToVideoCoreQuery(type);
+ if (!query_cache_type.has_value()) {
+ return QueryFallback(gpu_addr, type, flags, payload, subreport);
}
+ const bool has_timeout = True(flags & VideoCommon::QueryPropertiesFlags::HasTimeout);
+ const auto timestamp = has_timeout ? std::optional<u64>{gpu.GetTicks()} : std::nullopt;
+ query_cache.Query(gpu_addr, *query_cache_type, timestamp);
+}
+
+void RasterizerOpenGL::QueryFallback(GPUVAddr gpu_addr, VideoCommon::QueryType type,
+ VideoCommon::QueryPropertiesFlags flags, u32 payload,
+ u32 subreport) {
if (type != VideoCommon::QueryType::Payload) {
payload = 1u;
}
@@ -1294,15 +1315,13 @@ void RasterizerOpenGL::BeginTransformFeedback(GraphicsPipeline* program, GLenum
program->ConfigureTransformFeedback();
UNIMPLEMENTED_IF(regs.IsShaderConfigEnabled(Maxwell::ShaderType::TessellationInit) ||
- regs.IsShaderConfigEnabled(Maxwell::ShaderType::Tessellation) ||
- regs.IsShaderConfigEnabled(Maxwell::ShaderType::Geometry));
- UNIMPLEMENTED_IF(primitive_mode != GL_POINTS);
+ regs.IsShaderConfigEnabled(Maxwell::ShaderType::Tessellation));
// We may have to call BeginTransformFeedbackNV here since they seem to call different
// implementations on Nvidia's driver (the pointer is different) but we are using
// ARB_transform_feedback3 features with NV_transform_feedback interactions and the ARB
// extension doesn't define BeginTransformFeedback (without NV) interactions. It just works.
- glBeginTransformFeedback(GL_POINTS);
+ glBeginTransformFeedback(primitive_mode);
}
void RasterizerOpenGL::EndTransformFeedback() {
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.h b/src/video_core/renderer_opengl/gl_rasterizer.h
index ceffe1f1e..b79d7a70c 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer.h
+++ b/src/video_core/renderer_opengl/gl_rasterizer.h
@@ -225,6 +225,9 @@ private:
/// End a transform feedback
void EndTransformFeedback();
+ void QueryFallback(GPUVAddr gpu_addr, VideoCommon::QueryType type,
+ VideoCommon::QueryPropertiesFlags flags, u32 payload, u32 subreport);
+
Tegra::GPU& gpu;
const Device& device;
diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp
index 59829c88b..241fc34be 100644
--- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp
+++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp
@@ -485,6 +485,10 @@ void RasterizerVulkan::DispatchCompute() {
}
void RasterizerVulkan::ResetCounter(VideoCommon::QueryType type) {
+ if (type != VideoCommon::QueryType::ZPassPixelCount64) {
+ LOG_DEBUG(Render_Vulkan, "Unimplemented counter reset={}", type);
+ return;
+ }
query_cache.CounterReset(type);
}