From bdc01254a9b3ce8359f8f007c2102cb2d112418e Mon Sep 17 00:00:00 2001 From: Fernando Sahmkow Date: Fri, 4 Aug 2023 03:31:52 +0200 Subject: Query Cache: Setup Base rework --- src/video_core/query_cache/query_cache.h | 543 +++++++++++++++++++++++++++++++ 1 file changed, 543 insertions(+) create mode 100644 src/video_core/query_cache/query_cache.h (limited to 'src/video_core/query_cache/query_cache.h') diff --git a/src/video_core/query_cache/query_cache.h b/src/video_core/query_cache/query_cache.h new file mode 100644 index 000000000..f6af48d14 --- /dev/null +++ b/src/video_core/query_cache/query_cache.h @@ -0,0 +1,543 @@ +// SPDX-FileCopyrightText: Copyright 2023 yuzu Emulator Project +// SPDX-License-Identifier: GPL-3.0-or-later + +#pragma once + +#include +#include +#include +#include +#include +#include + +#include "common/assert.h" +#include "common/common_types.h" +#include "common/logging/log.h" +#include "common/scope_exit.h" +#include "common/settings.h" +#include "core/memory.h" +#include "video_core/engines/maxwell_3d.h" +#include "video_core/gpu.h" +#include "video_core/memory_manager.h" +#include "video_core/query_cache/bank_base.h" +#include "video_core/query_cache/query_base.h" +#include "video_core/query_cache/query_cache_base.h" +#include "video_core/query_cache/query_stream.h" +#include "video_core/query_cache/types.h" + +namespace VideoCommon { + +using Maxwell = Tegra::Engines::Maxwell3D; + +struct SyncValuesStruct { + VAddr address; + u64 value; + u64 size; + + static constexpr bool GeneratesBaseBuffer = true; +}; + +template +class GuestStreamer : public SimpleStreamer { +public: + using RuntimeType = typename Traits::RuntimeType; + + GuestStreamer(size_t id_, RuntimeType& runtime_) + : SimpleStreamer(id_), runtime{runtime_} {} + + virtual ~GuestStreamer() = default; + + size_t WriteCounter(VAddr address, bool has_timestamp, u32 value, + std::optional subreport = std::nullopt) override { + auto new_id = BuildQuery(has_timestamp, address, static_cast(value)); + pending_sync.push_back(new_id); + return new_id; + } + + bool HasPendingSync() override { + return !pending_sync.empty(); + } + + void SyncWrites() override { + if (pending_sync.empty()) { + return; + } + std::vector sync_values; + sync_values.reserve(pending_sync.size()); + for (size_t pending_id : pending_sync) { + auto& query = slot_queries[pending_id]; + if (True(query.flags & QueryFlagBits::IsRewritten) || + True(query.flags & QueryFlagBits::IsInvalidated)) { + continue; + } + query.flags |= QueryFlagBits::IsHostSynced; + sync_values.emplace_back(query.guest_address, query.value, + True(query.flags & QueryFlagBits::HasTimestamp) ? 8 : 4); + } + pending_sync.clear(); + if (sync_values.size() > 0) { + runtime.template SyncValues(sync_values); + } + } + +private: + RuntimeType& runtime; + std::deque pending_sync; +}; + +template +class StubStreamer : public GuestStreamer { +public: + using RuntimeType = typename Traits::RuntimeType; + + StubStreamer(size_t id_, RuntimeType& runtime_) : GuestStreamer(id_, runtime_) {} + + ~StubStreamer() override = default; + + size_t WriteCounter(VAddr address, bool has_timestamp, [[maybe_unused]] u32 value, + std::optional subreport = std::nullopt) override { + size_t new_id = GuestStreamer::WriteCounter(address, has_timestamp, 1U, subreport); + return new_id; + } +}; + +template +struct QueryCacheBase::QueryCacheBaseImpl { + using RuntimeType = typename Traits::RuntimeType; + + QueryCacheBaseImpl(QueryCacheBase* owner_, VideoCore::RasterizerInterface& rasterizer_, + Core::Memory::Memory& cpu_memory_, RuntimeType& runtime_, Tegra::GPU& gpu_) + : owner{owner_}, rasterizer{rasterizer_}, + cpu_memory{cpu_memory_}, runtime{runtime_}, gpu{gpu_} { + streamer_mask = 0; + for (size_t i = 0; i < static_cast(QueryType::MaxQueryTypes); i++) { + streamers[i] = runtime.GetStreamerInterface(static_cast(i)); + if (streamers[i]) { + streamer_mask |= 1ULL << i; + } + } + } + + template + void ForEachStreamerIn(u64 mask, Func&& func) { + static constexpr bool RETURNS_BOOL = + std::is_same_v, bool>; + while (mask != 0) { + size_t position = std::countr_zero(mask); + mask &= ~(1ULL << position); + if constexpr (RETURNS_BOOL) { + if (func(streamers[position])) { + return; + } + } else { + func(streamers[position]); + } + } + } + + template + void ForEachStreamer(Func&& func) { + ForEachStreamerIn(streamer_mask, func); + } + + QueryBase* ObtainQuery(QueryCacheBase::QueryLocation location) { + size_t which_stream = location.stream_id.Value(); + auto* streamer = streamers[which_stream]; + if (!streamer) { + return nullptr; + } + return streamer->GetQuery(location.query_id.Value()); + } + + QueryCacheBase* owner; + VideoCore::RasterizerInterface& rasterizer; + Core::Memory::Memory& cpu_memory; + Traits::RuntimeType& runtime; + Tegra::GPU& gpu; + std::array(QueryType::MaxQueryTypes)> streamers; + u64 streamer_mask; + std::mutex flush_guard; + std::deque flushes_pending; + std::vector::QueryLocation> pending_unregister; +}; + +template +QueryCacheBase::QueryCacheBase(Tegra::GPU& gpu_, + VideoCore::RasterizerInterface& rasterizer_, + Core::Memory::Memory& cpu_memory_, RuntimeType& runtime_) + : cached_queries{} { + impl = std::make_unique::QueryCacheBaseImpl>( + this, rasterizer_, cpu_memory_, runtime_, gpu_); +} + +template +QueryCacheBase::~QueryCacheBase() = default; + +template +void QueryCacheBase::CounterEnable(QueryType counter_type, bool is_enabled) { + size_t index = static_cast(counter_type); + StreamerInterface* streamer = impl->streamers[index]; + if (!streamer) [[unlikely]] { + UNREACHABLE(); + return; + } + if (is_enabled) { + streamer->StartCounter(); + } else { + streamer->PauseCounter(); + } +} + +template +void QueryCacheBase::CounterClose(QueryType counter_type) { + size_t index = static_cast(counter_type); + StreamerInterface* streamer = impl->streamers[index]; + if (!streamer) [[unlikely]] { + UNREACHABLE(); + return; + } + streamer->CloseCounter(); +} + +template +void QueryCacheBase::CounterReset(QueryType counter_type) { + size_t index = static_cast(counter_type); + StreamerInterface* streamer = impl->streamers[index]; + if (!streamer) [[unlikely]] { + UNIMPLEMENTED(); + return; + } + streamer->ResetCounter(); +} + +template +void QueryCacheBase::BindToChannel(s32 id) { + VideoCommon::ChannelSetupCaches::BindToChannel(id); + impl->runtime.Bind3DEngine(maxwell3d); +} + +template +void QueryCacheBase::CounterReport(GPUVAddr addr, QueryType counter_type, + QueryPropertiesFlags flags, u32 payload, u32 subreport) { + const bool has_timestamp = True(flags & QueryPropertiesFlags::HasTimeout); + const bool is_fence = True(flags & QueryPropertiesFlags::IsAFence); + size_t streamer_id = static_cast(counter_type); + auto* streamer = impl->streamers[streamer_id]; + if (!streamer) [[unlikely]] { + if (has_timestamp) { + u64 timestamp = impl->gpu.GetTicks(); + gpu_memory->Write(addr + 8, timestamp); + gpu_memory->Write(addr, 1ULL); + } else { + gpu_memory->Write(addr, 1U); + } + return; + } + auto cpu_addr_opt = gpu_memory->GpuToCpuAddress(addr); + if (!cpu_addr_opt) [[unlikely]] { + return; + } + VAddr cpu_addr = *cpu_addr_opt; + const size_t new_query_id = streamer->WriteCounter(cpu_addr, has_timestamp, payload, subreport); + auto* query = streamer->GetQuery(new_query_id); + if (is_fence) { + query->flags |= QueryFlagBits::IsFence; + } + QueryLocation query_location{}; + query_location.stream_id.Assign(static_cast(streamer_id)); + query_location.query_id.Assign(static_cast(new_query_id)); + const auto gen_caching_indexing = [](VAddr cur_addr) { + return std::make_pair(cur_addr >> Core::Memory::YUZU_PAGEBITS, + static_cast(cur_addr & Core::Memory::YUZU_PAGEMASK)); + }; + u8* pointer = impl->cpu_memory.GetPointer(cpu_addr); + u8* pointer_timestamp = impl->cpu_memory.GetPointer(cpu_addr + 8); + bool is_synced = !Settings::IsGPULevelHigh() && is_fence; + std::function operation( + [this, is_synced, query_base = query, query_location, pointer, pointer_timestamp] { + if (True(query_base->flags & QueryFlagBits::IsInvalidated)) { + if (!is_synced) [[likely]] { + impl->pending_unregister.push_back(query_location); + } + return; + } + if (False(query_base->flags & QueryFlagBits::IsFinalValueSynced)) [[unlikely]] { + UNREACHABLE(); + return; + } + if (True(query_base->flags & QueryFlagBits::HasTimestamp)) { + u64 timestamp = impl->gpu.GetTicks(); + std::memcpy(pointer_timestamp, ×tamp, sizeof(timestamp)); + std::memcpy(pointer, &query_base->value, sizeof(query_base->value)); + } else { + u32 value = static_cast(query_base->value); + std::memcpy(pointer, &value, sizeof(value)); + } + if (!is_synced) [[likely]] { + impl->pending_unregister.push_back(query_location); + } + }); + if (is_fence) { + impl->rasterizer.SignalFence(std::move(operation)); + } else { + impl->rasterizer.SyncOperation(std::move(operation)); + } + if (is_synced) { + streamer->Free(new_query_id); + return; + } + auto [cont_addr, base] = gen_caching_indexing(cpu_addr); + { + std::scoped_lock lock(cache_mutex); + auto it1 = cached_queries.try_emplace(cont_addr); + auto& sub_container = it1.first->second; + auto it_current = sub_container.find(base); + if (it_current == sub_container.end()) { + sub_container.insert_or_assign(base, query_location); + return; + } + auto* old_query = impl->ObtainQuery(it_current->second); + old_query->flags |= QueryFlagBits::IsRewritten; + sub_container.insert_or_assign(base, query_location); + } +} + +template +void QueryCacheBase::UnregisterPending() { + const auto gen_caching_indexing = [](VAddr cur_addr) { + return std::make_pair(cur_addr >> Core::Memory::YUZU_PAGEBITS, + static_cast(cur_addr & Core::Memory::YUZU_PAGEMASK)); + }; + std::scoped_lock lock(cache_mutex); + for (QueryLocation loc : impl->pending_unregister) { + const auto [streamer_id, query_id] = loc.unpack(); + auto* streamer = impl->streamers[streamer_id]; + if (!streamer) [[unlikely]] { + continue; + } + auto* query = streamer->GetQuery(query_id); + auto [cont_addr, base] = gen_caching_indexing(query->guest_address); + auto it1 = cached_queries.find(cont_addr); + if (it1 != cached_queries.end()) { + auto it2 = it1->second.find(base); + if (it2 != it1->second.end()) { + if (it2->second.raw == loc.raw) { + it1->second.erase(it2); + } + } + } + streamer->Free(query_id); + } + impl->pending_unregister.clear(); +} + +template +void QueryCacheBase::NotifyWFI() { + bool should_sync = false; + impl->ForEachStreamer( + [&should_sync](StreamerInterface* streamer) { should_sync |= streamer->HasPendingSync(); }); + if (!should_sync) { + return; + } + + impl->ForEachStreamer([](StreamerInterface* streamer) { streamer->PresyncWrites(); }); + impl->runtime.Barriers(true); + impl->ForEachStreamer([](StreamerInterface* streamer) { streamer->SyncWrites(); }); + impl->runtime.Barriers(false); +} + +template +void QueryCacheBase::NotifySegment(bool resume) { + if (resume) { + impl->runtime.ResumeHostConditionalRendering(); + } else { + impl->runtime.PauseHostConditionalRendering(); + CounterClose(VideoCommon::QueryType::ZPassPixelCount64); + CounterClose(VideoCommon::QueryType::StreamingByteCount); + } +} + +template +bool QueryCacheBase::AccelerateHostConditionalRendering() { + bool qc_dirty = false; + const auto gen_lookup = [this, &qc_dirty](GPUVAddr address) -> VideoCommon::LookupData { + auto cpu_addr_opt = gpu_memory->GpuToCpuAddress(address); + if (!cpu_addr_opt) [[unlikely]] { + return VideoCommon::LookupData{ + .address = 0, + .found_query = nullptr, + }; + } + VAddr cpu_addr = *cpu_addr_opt; + std::scoped_lock lock(cache_mutex); + auto it1 = cached_queries.find(cpu_addr >> Core::Memory::YUZU_PAGEBITS); + if (it1 == cached_queries.end()) { + return VideoCommon::LookupData{ + .address = cpu_addr, + .found_query = nullptr, + }; + } + auto& sub_container = it1->second; + auto it_current = sub_container.find(cpu_addr & Core::Memory::YUZU_PAGEMASK); + + if (it_current == sub_container.end()) { + auto it_current_2 = sub_container.find((cpu_addr & Core::Memory::YUZU_PAGEMASK) + 4); + if (it_current_2 == sub_container.end()) { + return VideoCommon::LookupData{ + .address = cpu_addr, + .found_query = nullptr, + }; + } + } + auto* query = impl->ObtainQuery(it_current->second); + qc_dirty |= True(query->flags & QueryFlagBits::IsHostManaged) && + False(query->flags & QueryFlagBits::IsGuestSynced); + return VideoCommon::LookupData{ + .address = cpu_addr, + .found_query = query, + }; + }; + + auto& regs = maxwell3d->regs; + if (regs.render_enable_override != Maxwell::Regs::RenderEnable::Override::UseRenderEnable) { + impl->runtime.EndHostConditionalRendering(); + return false; + } + /*if (!Settings::IsGPULevelHigh()) { + impl->runtime.EndHostConditionalRendering(); + return gpu_memory->IsMemoryDirty(regs.render_enable.Address(), 24, + VideoCommon::CacheType::BufferCache | + VideoCommon::CacheType::QueryCache); + }*/ + const ComparisonMode mode = static_cast(regs.render_enable.mode); + const GPUVAddr address = regs.render_enable.Address(); + switch (mode) { + case ComparisonMode::True: + impl->runtime.EndHostConditionalRendering(); + return false; + case ComparisonMode::False: + impl->runtime.EndHostConditionalRendering(); + return false; + case ComparisonMode::Conditional: { + VideoCommon::LookupData object_1{gen_lookup(address)}; + return impl->runtime.HostConditionalRenderingCompareValue(object_1, qc_dirty); + } + case ComparisonMode::IfEqual: { + VideoCommon::LookupData object_1{gen_lookup(address)}; + VideoCommon::LookupData object_2{gen_lookup(address + 16)}; + return impl->runtime.HostConditionalRenderingCompareValues(object_1, object_2, qc_dirty, + true); + } + case ComparisonMode::IfNotEqual: { + VideoCommon::LookupData object_1{gen_lookup(address)}; + VideoCommon::LookupData object_2{gen_lookup(address + 16)}; + return impl->runtime.HostConditionalRenderingCompareValues(object_1, object_2, qc_dirty, + false); + } + default: + return false; + } +} + +// Async downloads +template +void QueryCacheBase::CommitAsyncFlushes() { + u64 mask{}; + { + std::scoped_lock lk(impl->flush_guard); + impl->ForEachStreamer([&mask](StreamerInterface* streamer) { + bool local_result = streamer->HasUnsyncedQueries(); + if (local_result) { + mask |= 1ULL << streamer->GetId(); + } + }); + impl->flushes_pending.push_back(mask); + } + std::function func([this] { UnregisterPending(); }); + impl->rasterizer.SyncOperation(std::move(func)); + if (mask == 0) { + return; + } + impl->ForEachStreamerIn(mask, + [](StreamerInterface* streamer) { streamer->PushUnsyncedQueries(); }); +} + +template +bool QueryCacheBase::HasUncommittedFlushes() const { + bool result = false; + impl->ForEachStreamer([&result](StreamerInterface* streamer) { + result |= streamer->HasUnsyncedQueries(); + return result; + }); + return result; +} + +template +bool QueryCacheBase::ShouldWaitAsyncFlushes() { + std::scoped_lock lk(impl->flush_guard); + return !impl->flushes_pending.empty() && impl->flushes_pending.front() != 0ULL; +} + +template +void QueryCacheBase::PopAsyncFlushes() { + u64 mask; + { + std::scoped_lock lk(impl->flush_guard); + mask = impl->flushes_pending.front(); + impl->flushes_pending.pop_front(); + } + if (mask == 0) { + return; + } + impl->ForEachStreamerIn(mask, + [](StreamerInterface* streamer) { streamer->PopUnsyncedQueries(); }); +} + +// Invalidation + +template +void QueryCacheBase::InvalidateQuery(QueryCacheBase::QueryLocation location) { + auto* query_base = impl->ObtainQuery(location); + if (!query_base) { + return; + } + query_base->flags |= QueryFlagBits::IsInvalidated; +} + +template +bool QueryCacheBase::IsQueryDirty(QueryCacheBase::QueryLocation location) { + auto* query_base = impl->ObtainQuery(location); + if (!query_base) { + return false; + } + return True(query_base->flags & QueryFlagBits::IsHostManaged) && + False(query_base->flags & QueryFlagBits::IsGuestSynced); +} + +template +bool QueryCacheBase::SemiFlushQueryDirty(QueryCacheBase::QueryLocation location) { + auto* query_base = impl->ObtainQuery(location); + if (!query_base) { + return false; + } + if (True(query_base->flags & QueryFlagBits::IsFinalValueSynced) && + False(query_base->flags & QueryFlagBits::IsGuestSynced)) { + auto* ptr = impl->cpu_memory.GetPointer(query_base->guest_address); + if (True(query_base->flags & QueryFlagBits::HasTimestamp)) { + std::memcpy(ptr, &query_base->value, sizeof(query_base->value)); + return false; + } + u32 value_l = static_cast(query_base->value); + std::memcpy(ptr, &value_l, sizeof(value_l)); + return false; + } + return True(query_base->flags & QueryFlagBits::IsHostManaged) && + False(query_base->flags & QueryFlagBits::IsGuestSynced); +} + +template +void QueryCacheBase::RequestGuestHostSync() { + impl->rasterizer.ReleaseFences(); +} + +} // namespace VideoCommon -- cgit v1.2.3 From aa6587d854e4953876b02ca71278a665bcae8179 Mon Sep 17 00:00:00 2001 From: Fernando Sahmkow Date: Fri, 4 Aug 2023 13:38:49 +0200 Subject: QueryCache: Implement dependant queries. --- src/video_core/query_cache/query_cache.h | 18 ++++++++++++++++-- 1 file changed, 16 insertions(+), 2 deletions(-) (limited to 'src/video_core/query_cache/query_cache.h') diff --git a/src/video_core/query_cache/query_cache.h b/src/video_core/query_cache/query_cache.h index f6af48d14..f1393d5c7 100644 --- a/src/video_core/query_cache/query_cache.h +++ b/src/video_core/query_cache/query_cache.h @@ -489,8 +489,22 @@ void QueryCacheBase::PopAsyncFlushes() { if (mask == 0) { return; } - impl->ForEachStreamerIn(mask, - [](StreamerInterface* streamer) { streamer->PopUnsyncedQueries(); }); + u64 ran_mask = 0; + u64 next_phase = 0; + while (mask) { + impl->ForEachStreamerIn(mask, [&mask, &ran_mask, &next_phase](StreamerInterface* streamer) { + u64 dep_mask = streamer->GetDependenceMask(); + if ((dep_mask & ~ran_mask) != 0) { + next_phase |= dep_mask; + return; + } + u64 index = streamer->GetId(); + ran_mask |= (1ULL << index); + mask &= ~(1ULL << index); + streamer->PopUnsyncedQueries(); + }); + ran_mask |= next_phase; + } } // Invalidation -- cgit v1.2.3 From 282ae8fa51e060e6d4ef026b734aa871b1b9331e Mon Sep 17 00:00:00 2001 From: Fernando Sahmkow Date: Sun, 6 Aug 2023 09:38:16 +0200 Subject: Query Cache: address issues --- src/video_core/query_cache/query_cache.h | 66 ++++++++++++++++++-------------- 1 file changed, 37 insertions(+), 29 deletions(-) (limited to 'src/video_core/query_cache/query_cache.h') diff --git a/src/video_core/query_cache/query_cache.h b/src/video_core/query_cache/query_cache.h index f1393d5c7..042af053c 100644 --- a/src/video_core/query_cache/query_cache.h +++ b/src/video_core/query_cache/query_cache.h @@ -54,7 +54,7 @@ public: return new_id; } - bool HasPendingSync() override { + bool HasPendingSync() const override { return !pending_sync.empty(); } @@ -71,8 +71,10 @@ public: continue; } query.flags |= QueryFlagBits::IsHostSynced; - sync_values.emplace_back(query.guest_address, query.value, - True(query.flags & QueryFlagBits::HasTimestamp) ? 8 : 4); + sync_values.emplace_back(SyncValuesStruct{ + .address = query.guest_address, + .value = query.value, + .size = static_cast(True(query.flags & QueryFlagBits::HasTimestamp) ? 8 : 4)}); } pending_sync.clear(); if (sync_values.size() > 0) { @@ -90,15 +92,20 @@ class StubStreamer : public GuestStreamer { public: using RuntimeType = typename Traits::RuntimeType; - StubStreamer(size_t id_, RuntimeType& runtime_) : GuestStreamer(id_, runtime_) {} + StubStreamer(size_t id_, RuntimeType& runtime_, u32 stub_value_) + : GuestStreamer(id_, runtime_), stub_value{stub_value_} {} ~StubStreamer() override = default; size_t WriteCounter(VAddr address, bool has_timestamp, [[maybe_unused]] u32 value, std::optional subreport = std::nullopt) override { - size_t new_id = GuestStreamer::WriteCounter(address, has_timestamp, 1U, subreport); + size_t new_id = + GuestStreamer::WriteCounter(address, has_timestamp, stub_value, subreport); return new_id; } + +private: + u32 stub_value; }; template @@ -113,7 +120,7 @@ struct QueryCacheBase::QueryCacheBaseImpl { for (size_t i = 0; i < static_cast(QueryType::MaxQueryTypes); i++) { streamers[i] = runtime.GetStreamerInterface(static_cast(i)); if (streamers[i]) { - streamer_mask |= 1ULL << i; + streamer_mask |= 1ULL << streamers[i]->GetId(); } } } @@ -152,7 +159,7 @@ struct QueryCacheBase::QueryCacheBaseImpl { QueryCacheBase* owner; VideoCore::RasterizerInterface& rasterizer; Core::Memory::Memory& cpu_memory; - Traits::RuntimeType& runtime; + RuntimeType& runtime; Tegra::GPU& gpu; std::array(QueryType::MaxQueryTypes)> streamers; u64 streamer_mask; @@ -223,15 +230,11 @@ void QueryCacheBase::CounterReport(GPUVAddr addr, QueryType counter_type const bool is_fence = True(flags & QueryPropertiesFlags::IsAFence); size_t streamer_id = static_cast(counter_type); auto* streamer = impl->streamers[streamer_id]; - if (!streamer) [[unlikely]] { - if (has_timestamp) { - u64 timestamp = impl->gpu.GetTicks(); - gpu_memory->Write(addr + 8, timestamp); - gpu_memory->Write(addr, 1ULL); - } else { - gpu_memory->Write(addr, 1U); - } - return; + if (streamer == nullptr) [[unlikely]] { + counter_type = QueryType::Payload; + payload = 1U; + streamer_id = static_cast(counter_type); + streamer = impl->streamers[streamer_id]; } auto cpu_addr_opt = gpu_memory->GpuToCpuAddress(addr); if (!cpu_addr_opt) [[unlikely]] { @@ -403,12 +406,6 @@ bool QueryCacheBase::AccelerateHostConditionalRendering() { impl->runtime.EndHostConditionalRendering(); return false; } - /*if (!Settings::IsGPULevelHigh()) { - impl->runtime.EndHostConditionalRendering(); - return gpu_memory->IsMemoryDirty(regs.render_enable.Address(), 24, - VideoCommon::CacheType::BufferCache | - VideoCommon::CacheType::QueryCache); - }*/ const ComparisonMode mode = static_cast(regs.render_enable.mode); const GPUVAddr address = regs.render_enable.Address(); switch (mode) { @@ -442,6 +439,9 @@ bool QueryCacheBase::AccelerateHostConditionalRendering() { // Async downloads template void QueryCacheBase::CommitAsyncFlushes() { + // Make sure to have the results synced in Host. + NotifyWFI(); + u64 mask{}; { std::scoped_lock lk(impl->flush_guard); @@ -458,8 +458,19 @@ void QueryCacheBase::CommitAsyncFlushes() { if (mask == 0) { return; } - impl->ForEachStreamerIn(mask, - [](StreamerInterface* streamer) { streamer->PushUnsyncedQueries(); }); + u64 ran_mask = ~mask; + while (mask) { + impl->ForEachStreamerIn(mask, [&mask, &ran_mask](StreamerInterface* streamer) { + u64 dep_mask = streamer->GetDependentMask(); + if ((dep_mask & ~ran_mask) != 0) { + return; + } + u64 index = streamer->GetId(); + ran_mask |= (1ULL << index); + mask &= ~(1ULL << index); + streamer->PushUnsyncedQueries(); + }); + } } template @@ -489,13 +500,11 @@ void QueryCacheBase::PopAsyncFlushes() { if (mask == 0) { return; } - u64 ran_mask = 0; - u64 next_phase = 0; + u64 ran_mask = ~mask; while (mask) { - impl->ForEachStreamerIn(mask, [&mask, &ran_mask, &next_phase](StreamerInterface* streamer) { + impl->ForEachStreamerIn(mask, [&mask, &ran_mask](StreamerInterface* streamer) { u64 dep_mask = streamer->GetDependenceMask(); if ((dep_mask & ~ran_mask) != 0) { - next_phase |= dep_mask; return; } u64 index = streamer->GetId(); @@ -503,7 +512,6 @@ void QueryCacheBase::PopAsyncFlushes() { mask &= ~(1ULL << index); streamer->PopUnsyncedQueries(); }); - ran_mask |= next_phase; } } -- cgit v1.2.3 From 2fea1b8407b66dd0e9ed1776c34dad043e1becf4 Mon Sep 17 00:00:00 2001 From: Fernando Sahmkow Date: Sat, 19 Aug 2023 21:49:38 +0200 Subject: Query Cache: Fix guest side sample counting --- src/video_core/query_cache/query_cache.h | 46 +++++++++++++++++--------------- 1 file changed, 24 insertions(+), 22 deletions(-) (limited to 'src/video_core/query_cache/query_cache.h') diff --git a/src/video_core/query_cache/query_cache.h b/src/video_core/query_cache/query_cache.h index 042af053c..4b89b5bf6 100644 --- a/src/video_core/query_cache/query_cache.h +++ b/src/video_core/query_cache/query_cache.h @@ -256,30 +256,32 @@ void QueryCacheBase::CounterReport(GPUVAddr addr, QueryType counter_type u8* pointer = impl->cpu_memory.GetPointer(cpu_addr); u8* pointer_timestamp = impl->cpu_memory.GetPointer(cpu_addr + 8); bool is_synced = !Settings::IsGPULevelHigh() && is_fence; - std::function operation( - [this, is_synced, query_base = query, query_location, pointer, pointer_timestamp] { - if (True(query_base->flags & QueryFlagBits::IsInvalidated)) { - if (!is_synced) [[likely]] { - impl->pending_unregister.push_back(query_location); - } - return; - } - if (False(query_base->flags & QueryFlagBits::IsFinalValueSynced)) [[unlikely]] { - UNREACHABLE(); - return; - } - if (True(query_base->flags & QueryFlagBits::HasTimestamp)) { - u64 timestamp = impl->gpu.GetTicks(); - std::memcpy(pointer_timestamp, ×tamp, sizeof(timestamp)); - std::memcpy(pointer, &query_base->value, sizeof(query_base->value)); - } else { - u32 value = static_cast(query_base->value); - std::memcpy(pointer, &value, sizeof(value)); - } + std::function operation([this, is_synced, streamer, query_base = query, query_location, + pointer, pointer_timestamp] { + if (True(query_base->flags & QueryFlagBits::IsInvalidated)) { if (!is_synced) [[likely]] { impl->pending_unregister.push_back(query_location); } - }); + return; + } + if (False(query_base->flags & QueryFlagBits::IsFinalValueSynced)) [[unlikely]] { + UNREACHABLE(); + return; + } + query_base->value += streamer->GetAmmendValue(); + streamer->SetAccumulationValue(query_base->value); + if (True(query_base->flags & QueryFlagBits::HasTimestamp)) { + u64 timestamp = impl->gpu.GetTicks(); + std::memcpy(pointer_timestamp, ×tamp, sizeof(timestamp)); + std::memcpy(pointer, &query_base->value, sizeof(query_base->value)); + } else { + u32 value = static_cast(query_base->value); + std::memcpy(pointer, &value, sizeof(value)); + } + if (!is_synced) [[likely]] { + impl->pending_unregister.push_back(query_location); + } + }); if (is_fence) { impl->rasterizer.SignalFence(std::move(operation)); } else { @@ -354,9 +356,9 @@ void QueryCacheBase::NotifySegment(bool resume) { if (resume) { impl->runtime.ResumeHostConditionalRendering(); } else { - impl->runtime.PauseHostConditionalRendering(); CounterClose(VideoCommon::QueryType::ZPassPixelCount64); CounterClose(VideoCommon::QueryType::StreamingByteCount); + impl->runtime.PauseHostConditionalRendering(); } } -- cgit v1.2.3 From bf0d6b8806b7367a17bbeb2bb59f4bcba1fb1375 Mon Sep 17 00:00:00 2001 From: Fernando Sahmkow Date: Tue, 22 Aug 2023 17:44:03 +0200 Subject: Query Cache: Fix behavior in Normal Accuracy --- src/video_core/query_cache/query_cache.h | 13 +++++++++++++ 1 file changed, 13 insertions(+) (limited to 'src/video_core/query_cache/query_cache.h') diff --git a/src/video_core/query_cache/query_cache.h b/src/video_core/query_cache/query_cache.h index 4b89b5bf6..78b42b518 100644 --- a/src/video_core/query_cache/query_cache.h +++ b/src/video_core/query_cache/query_cache.h @@ -256,6 +256,7 @@ void QueryCacheBase::CounterReport(GPUVAddr addr, QueryType counter_type u8* pointer = impl->cpu_memory.GetPointer(cpu_addr); u8* pointer_timestamp = impl->cpu_memory.GetPointer(cpu_addr + 8); bool is_synced = !Settings::IsGPULevelHigh() && is_fence; + std::function operation([this, is_synced, streamer, query_base = query, query_location, pointer, pointer_timestamp] { if (True(query_base->flags & QueryFlagBits::IsInvalidated)) { @@ -285,6 +286,18 @@ void QueryCacheBase::CounterReport(GPUVAddr addr, QueryType counter_type if (is_fence) { impl->rasterizer.SignalFence(std::move(operation)); } else { + if (!Settings::IsGPULevelHigh() && counter_type == QueryType::Payload) { + if (has_timestamp) { + u64 timestamp = impl->gpu.GetTicks(); + u64 value = static_cast(payload); + std::memcpy(pointer_timestamp, ×tamp, sizeof(timestamp)); + std::memcpy(pointer, &value, sizeof(value)); + } else { + std::memcpy(pointer, &payload, sizeof(payload)); + } + streamer->Free(new_query_id); + return; + } impl->rasterizer.SyncOperation(std::move(operation)); } if (is_synced) { -- cgit v1.2.3