From 2fea1b8407b66dd0e9ed1776c34dad043e1becf4 Mon Sep 17 00:00:00 2001 From: Fernando Sahmkow Date: Sat, 19 Aug 2023 21:49:38 +0200 Subject: Query Cache: Fix guest side sample counting --- src/video_core/engines/maxwell_3d.cpp | 6 --- src/video_core/query_cache/query_base.h | 19 ++++--- src/video_core/query_cache/query_cache.h | 46 +++++++++-------- src/video_core/query_cache/query_stream.h | 10 ++++ src/video_core/renderer_vulkan/vk_query_cache.cpp | 62 ++++++++++++++++++++--- 5 files changed, 97 insertions(+), 46 deletions(-) diff --git a/src/video_core/engines/maxwell_3d.cpp b/src/video_core/engines/maxwell_3d.cpp index 46b9c548a..32d767d85 100644 --- a/src/video_core/engines/maxwell_3d.cpp +++ b/src/video_core/engines/maxwell_3d.cpp @@ -586,12 +586,6 @@ void Maxwell3D::ProcessQueryCondition() { } void Maxwell3D::ProcessCounterReset() { -#if ANDROID - if (!Settings::IsGPULevelHigh()) { - // This is problematic on Android, disable on GPU Normal. - return; - } -#endif switch (regs.clear_report_value) { case Regs::ClearReport::ZPassPixelCount: rasterizer->ResetCounter(VideoCommon::QueryType::ZPassPixelCount64); diff --git a/src/video_core/query_cache/query_base.h b/src/video_core/query_cache/query_base.h index 993a13eac..1d786b3a7 100644 --- a/src/video_core/query_cache/query_base.h +++ b/src/video_core/query_cache/query_base.h @@ -9,16 +9,15 @@ namespace VideoCommon { enum class QueryFlagBits : u32 { - HasTimestamp = 1 << 0, ///< Indicates if this query has a timestamp. - IsFinalValueSynced = 1 << 1, ///< Indicates if the query has been synced in the host - IsHostSynced = 1 << 2, ///< Indicates if the query has been synced in the host - IsGuestSynced = 1 << 3, ///< Indicates if the query has been synced with the guest. - IsHostManaged = 1 << 4, ///< Indicates if this query points to a host query - IsRewritten = 1 << 5, ///< Indicates if this query was rewritten by another query - IsInvalidated = 1 << 6, ///< Indicates the value of th query has been nullified. - IsOrphan = 1 << 7, ///< Indicates the query has not been set by a guest query. - IsFence = 1 << 8, ///< Indicates the query is a fence. - IsQueuedForAsyncFlush = 1 << 9, ///< Indicates that the query can be flushed at any moment + HasTimestamp = 1 << 0, ///< Indicates if this query has a timestamp. + IsFinalValueSynced = 1 << 1, ///< Indicates if the query has been synced in the host + IsHostSynced = 1 << 2, ///< Indicates if the query has been synced in the host + IsGuestSynced = 1 << 3, ///< Indicates if the query has been synced with the guest. + IsHostManaged = 1 << 4, ///< Indicates if this query points to a host query + IsRewritten = 1 << 5, ///< Indicates if this query was rewritten by another query + IsInvalidated = 1 << 6, ///< Indicates the value of th query has been nullified. + IsOrphan = 1 << 7, ///< Indicates the query has not been set by a guest query. + IsFence = 1 << 8, ///< Indicates the query is a fence. }; DECLARE_ENUM_FLAG_OPERATORS(QueryFlagBits) diff --git a/src/video_core/query_cache/query_cache.h b/src/video_core/query_cache/query_cache.h index 042af053c..4b89b5bf6 100644 --- a/src/video_core/query_cache/query_cache.h +++ b/src/video_core/query_cache/query_cache.h @@ -256,30 +256,32 @@ void QueryCacheBase::CounterReport(GPUVAddr addr, QueryType counter_type u8* pointer = impl->cpu_memory.GetPointer(cpu_addr); u8* pointer_timestamp = impl->cpu_memory.GetPointer(cpu_addr + 8); bool is_synced = !Settings::IsGPULevelHigh() && is_fence; - std::function operation( - [this, is_synced, query_base = query, query_location, pointer, pointer_timestamp] { - if (True(query_base->flags & QueryFlagBits::IsInvalidated)) { - if (!is_synced) [[likely]] { - impl->pending_unregister.push_back(query_location); - } - return; - } - if (False(query_base->flags & QueryFlagBits::IsFinalValueSynced)) [[unlikely]] { - UNREACHABLE(); - return; - } - if (True(query_base->flags & QueryFlagBits::HasTimestamp)) { - u64 timestamp = impl->gpu.GetTicks(); - std::memcpy(pointer_timestamp, ×tamp, sizeof(timestamp)); - std::memcpy(pointer, &query_base->value, sizeof(query_base->value)); - } else { - u32 value = static_cast(query_base->value); - std::memcpy(pointer, &value, sizeof(value)); - } + std::function operation([this, is_synced, streamer, query_base = query, query_location, + pointer, pointer_timestamp] { + if (True(query_base->flags & QueryFlagBits::IsInvalidated)) { if (!is_synced) [[likely]] { impl->pending_unregister.push_back(query_location); } - }); + return; + } + if (False(query_base->flags & QueryFlagBits::IsFinalValueSynced)) [[unlikely]] { + UNREACHABLE(); + return; + } + query_base->value += streamer->GetAmmendValue(); + streamer->SetAccumulationValue(query_base->value); + if (True(query_base->flags & QueryFlagBits::HasTimestamp)) { + u64 timestamp = impl->gpu.GetTicks(); + std::memcpy(pointer_timestamp, ×tamp, sizeof(timestamp)); + std::memcpy(pointer, &query_base->value, sizeof(query_base->value)); + } else { + u32 value = static_cast(query_base->value); + std::memcpy(pointer, &value, sizeof(value)); + } + if (!is_synced) [[likely]] { + impl->pending_unregister.push_back(query_location); + } + }); if (is_fence) { impl->rasterizer.SignalFence(std::move(operation)); } else { @@ -354,9 +356,9 @@ void QueryCacheBase::NotifySegment(bool resume) { if (resume) { impl->runtime.ResumeHostConditionalRendering(); } else { - impl->runtime.PauseHostConditionalRendering(); CounterClose(VideoCommon::QueryType::ZPassPixelCount64); CounterClose(VideoCommon::QueryType::StreamingByteCount); + impl->runtime.PauseHostConditionalRendering(); } } diff --git a/src/video_core/query_cache/query_stream.h b/src/video_core/query_cache/query_stream.h index e7aac955b..39da6ac07 100644 --- a/src/video_core/query_cache/query_stream.h +++ b/src/video_core/query_cache/query_stream.h @@ -78,6 +78,14 @@ public: return dependence_mask; } + u64 GetAmmendValue() const { + return ammend_value; + } + + void SetAccumulationValue(u64 new_value) { + acumulation_value = new_value; + } + protected: void MakeDependent(StreamerInterface* depend_on) { dependence_mask |= 1ULL << depend_on->id; @@ -87,6 +95,8 @@ protected: const size_t id; u64 dependence_mask; u64 dependent_mask; + u64 ammend_value{}; + u64 acumulation_value{}; }; template diff --git a/src/video_core/renderer_vulkan/vk_query_cache.cpp b/src/video_core/renderer_vulkan/vk_query_cache.cpp index add0c6fb3..2147776f8 100644 --- a/src/video_core/renderer_vulkan/vk_query_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_query_cache.cpp @@ -110,13 +110,16 @@ struct HostSyncValues { class SamplesStreamer : public BaseStreamer { public: - explicit SamplesStreamer(size_t id_, QueryCacheRuntime& runtime_, const Device& device_, + explicit SamplesStreamer(size_t id_, QueryCacheRuntime& runtime_, + VideoCore::RasterizerInterface* rasterizer_, const Device& device_, Scheduler& scheduler_, const MemoryAllocator& memory_allocator_) - : BaseStreamer(id_), runtime{runtime_}, device{device_}, scheduler{scheduler_}, - memory_allocator{memory_allocator_} { + : BaseStreamer(id_), runtime{runtime_}, rasterizer{rasterizer_}, device{device_}, + scheduler{scheduler_}, memory_allocator{memory_allocator_} { BuildResolveBuffer(); current_bank = nullptr; current_query = nullptr; + ammend_value = 0; + acumulation_value = 0; } ~SamplesStreamer() = default; @@ -151,6 +154,11 @@ public: PauseCounter(); } AbandonCurrentQuery(); + std::function func([this, counts = pending_flush_queries.size()] { + ammend_value = 0; + acumulation_value = 0; + }); + rasterizer->SyncOperation(std::move(func)); } void CloseCounter() override { @@ -244,7 +252,7 @@ public: } if (query->size_slots > 1) { // This is problematic. - UNIMPLEMENTED(); + // UNIMPLEMENTED(); } query->flags |= VideoCommon::QueryFlagBits::IsHostSynced; auto loc_data = offsets[query->start_bank_id]; @@ -255,16 +263,20 @@ public: }); } + ReplicateCurrentQueryIfNeeded(); + std::function func([this] { ammend_value = acumulation_value; }); + rasterizer->SyncOperation(std::move(func)); AbandonCurrentQuery(); pending_sync.clear(); } size_t WriteCounter(VAddr address, bool has_timestamp, u32 value, [[maybe_unused]] std::optional subreport) override { + PauseCounter(); auto index = BuildQuery(); auto* new_query = GetQuery(index); new_query->guest_address = address; - new_query->value = 100; + new_query->value = 0; new_query->flags &= ~VideoCommon::QueryFlagBits::IsOrphan; if (has_timestamp) { new_query->flags |= VideoCommon::QueryFlagBits::HasTimestamp; @@ -291,6 +303,7 @@ public: void PushUnsyncedQueries() override { PauseCounter(); + current_bank->Close(); { std::scoped_lock lk(flush_guard); pending_flush_sets.emplace_back(std::move(pending_flush_queries)); @@ -429,6 +442,34 @@ private: current_query_id = 0; } + void ReplicateCurrentQueryIfNeeded() { + if (pending_sync.empty()) { + return; + } + if (!current_query) { + return; + } + auto index = BuildQuery(); + auto* new_query = GetQuery(index); + new_query->guest_address = 0; + new_query->value = 0; + new_query->flags &= ~VideoCommon::QueryFlagBits::IsOrphan; + new_query->start_bank_id = current_query->start_bank_id; + new_query->size_banks = current_query->size_banks; + new_query->start_slot = current_query->start_slot; + new_query->size_slots = current_query->size_slots; + ApplyBankOp(new_query, [](SamplesQueryBank* bank, size_t start, size_t amount) { + bank->AddReference(amount); + }); + pending_flush_queries.push_back(index); + std::function func([this, index] { + auto* query = GetQuery(index); + query->value += GetAmmendValue(); + SetAccumulationValue(query->value); + Free(index); + }); + } + void BuildResolveBuffer() { const VkBufferCreateInfo buffer_ci = { .sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO, @@ -448,6 +489,7 @@ private: static constexpr size_t resolve_slots = 8; QueryCacheRuntime& runtime; + VideoCore::RasterizerInterface* rasterizer; const Device& device; Scheduler& scheduler; const MemoryAllocator& memory_allocator; @@ -470,6 +512,7 @@ private: size_t current_query_id; VideoCommon::HostQueryBase* current_query; bool has_started{}; + bool current_unset{}; std::mutex flush_guard; }; @@ -677,7 +720,6 @@ public: size_t offset_base = staging_ref.offset; for (auto q : pending_flush_queries) { auto* query = GetQuery(q); - query->flags |= VideoCommon::QueryFlagBits::IsQueuedForAsyncFlush; auto& bank = bank_pool.GetBank(query->start_bank_id); bank.Sync(staging_ref, offset_base, query->start_slot, 1); offset_base += TFBQueryBank::QUERY_SIZE; @@ -1047,8 +1089,8 @@ struct QueryCacheRuntimeImpl { buffer_cache{buffer_cache_}, device{device_}, memory_allocator{memory_allocator_}, scheduler{scheduler_}, staging_pool{staging_pool_}, guest_streamer(0, runtime), - sample_streamer(static_cast(QueryType::ZPassPixelCount64), runtime, device, - scheduler, memory_allocator), + sample_streamer(static_cast(QueryType::ZPassPixelCount64), runtime, rasterizer, + device, scheduler, memory_allocator), tfb_streamer(static_cast(QueryType::StreamingByteCount), runtime, device, scheduler, memory_allocator, staging_pool), primitives_succeeded_streamer( @@ -1277,6 +1319,10 @@ bool QueryCacheRuntime::HostConditionalRenderingCompareValues(VideoCommon::Looku return true; } } + if (!is_in_bc[0] && !is_in_bc[1]) { + // Both queries are in query cache, it's best to just flush. + return false; + } HostConditionalRenderingCompareBCImpl(object_1.address, equal_check); return true; } -- cgit v1.2.3