From bcd348f2388cf944f2ac49364a8d13b47cc21456 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Tue, 11 Feb 2020 18:59:44 -0300 Subject: vk_query_cache: Implement generic query cache on Vulkan --- src/video_core/renderer_vulkan/vk_device.cpp | 10 +- src/video_core/renderer_vulkan/vk_query_cache.cpp | 122 ++++++++++++++++++++++ src/video_core/renderer_vulkan/vk_query_cache.h | 104 ++++++++++++++++++ src/video_core/renderer_vulkan/vk_rasterizer.cpp | 21 +++- src/video_core/renderer_vulkan/vk_rasterizer.h | 6 +- src/video_core/renderer_vulkan/vk_scheduler.cpp | 8 ++ src/video_core/renderer_vulkan/vk_scheduler.h | 15 +++ 7 files changed, 283 insertions(+), 3 deletions(-) create mode 100644 src/video_core/renderer_vulkan/vk_query_cache.cpp create mode 100644 src/video_core/renderer_vulkan/vk_query_cache.h (limited to 'src/video_core/renderer_vulkan') diff --git a/src/video_core/renderer_vulkan/vk_device.cpp b/src/video_core/renderer_vulkan/vk_device.cpp index 9840f26e5..588a6835f 100644 --- a/src/video_core/renderer_vulkan/vk_device.cpp +++ b/src/video_core/renderer_vulkan/vk_device.cpp @@ -104,6 +104,7 @@ bool VKDevice::Create(const vk::DispatchLoaderDynamic& dldi, vk::Instance instan features.depthBiasClamp = true; features.geometryShader = true; features.tessellationShader = true; + features.occlusionQueryPrecise = true; features.fragmentStoresAndAtomics = true; features.shaderImageGatherExtended = true; features.shaderStorageImageWriteWithoutFormat = true; @@ -117,6 +118,10 @@ bool VKDevice::Create(const vk::DispatchLoaderDynamic& dldi, vk::Instance instan bit8_storage.uniformAndStorageBuffer8BitAccess = true; SetNext(next, bit8_storage); + vk::PhysicalDeviceHostQueryResetFeaturesEXT host_query_reset; + host_query_reset.hostQueryReset = true; + SetNext(next, host_query_reset); + vk::PhysicalDeviceFloat16Int8FeaturesKHR float16_int8; if (is_float16_supported) { float16_int8.shaderFloat16 = true; @@ -273,6 +278,7 @@ bool VKDevice::IsSuitable(const vk::DispatchLoaderDynamic& dldi, vk::PhysicalDev VK_EXT_VERTEX_ATTRIBUTE_DIVISOR_EXTENSION_NAME, VK_EXT_SHADER_SUBGROUP_BALLOT_EXTENSION_NAME, VK_EXT_SHADER_SUBGROUP_VOTE_EXTENSION_NAME, + VK_EXT_HOST_QUERY_RESET_EXTENSION_NAME, }; std::bitset available_extensions{}; @@ -340,6 +346,7 @@ bool VKDevice::IsSuitable(const vk::DispatchLoaderDynamic& dldi, vk::PhysicalDev std::make_pair(features.depthBiasClamp, "depthBiasClamp"), std::make_pair(features.geometryShader, "geometryShader"), std::make_pair(features.tessellationShader, "tessellationShader"), + std::make_pair(features.occlusionQueryPrecise, "occlusionQueryPrecise"), std::make_pair(features.fragmentStoresAndAtomics, "fragmentStoresAndAtomics"), std::make_pair(features.shaderImageGatherExtended, "shaderImageGatherExtended"), std::make_pair(features.shaderStorageImageWriteWithoutFormat, @@ -376,7 +383,7 @@ std::vector VKDevice::LoadExtensions(const vk::DispatchLoaderDynami } }; - extensions.reserve(13); + extensions.reserve(14); extensions.push_back(VK_KHR_SWAPCHAIN_EXTENSION_NAME); extensions.push_back(VK_KHR_16BIT_STORAGE_EXTENSION_NAME); extensions.push_back(VK_KHR_8BIT_STORAGE_EXTENSION_NAME); @@ -384,6 +391,7 @@ std::vector VKDevice::LoadExtensions(const vk::DispatchLoaderDynami extensions.push_back(VK_EXT_VERTEX_ATTRIBUTE_DIVISOR_EXTENSION_NAME); extensions.push_back(VK_EXT_SHADER_SUBGROUP_BALLOT_EXTENSION_NAME); extensions.push_back(VK_EXT_SHADER_SUBGROUP_VOTE_EXTENSION_NAME); + extensions.push_back(VK_EXT_HOST_QUERY_RESET_EXTENSION_NAME); [[maybe_unused]] const bool nsight = std::getenv("NVTX_INJECTION64_PATH") || std::getenv("NSIGHT_LAUNCHED"); diff --git a/src/video_core/renderer_vulkan/vk_query_cache.cpp b/src/video_core/renderer_vulkan/vk_query_cache.cpp new file mode 100644 index 000000000..ffbf60dda --- /dev/null +++ b/src/video_core/renderer_vulkan/vk_query_cache.cpp @@ -0,0 +1,122 @@ +// Copyright 2020 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include +#include +#include +#include +#include + +#include "video_core/renderer_vulkan/declarations.h" +#include "video_core/renderer_vulkan/vk_device.h" +#include "video_core/renderer_vulkan/vk_query_cache.h" +#include "video_core/renderer_vulkan/vk_resource_manager.h" +#include "video_core/renderer_vulkan/vk_scheduler.h" + +namespace Vulkan { + +namespace { + +constexpr std::array QUERY_TARGETS = {vk::QueryType::eOcclusion}; + +constexpr vk::QueryType GetTarget(VideoCore::QueryType type) { + return QUERY_TARGETS[static_cast(type)]; +} + +} // Anonymous namespace + +QueryPool::QueryPool() : VKFencedPool{GROW_STEP} {} + +QueryPool::~QueryPool() = default; + +void QueryPool::Initialize(const VKDevice& device_, VideoCore::QueryType type_) { + device = &device_; + type = type_; +} + +std::pair QueryPool::Commit(VKFence& fence) { + std::size_t index; + do { + index = CommitResource(fence); + } while (usage[index]); + usage[index] = true; + + return {*pools[index / GROW_STEP], static_cast(index % GROW_STEP)}; +} + +void QueryPool::Allocate(std::size_t begin, std::size_t end) { + usage.resize(end); + + const auto dev = device->GetLogical(); + const u32 size = static_cast(end - begin); + const vk::QueryPoolCreateInfo query_pool_ci({}, GetTarget(type), size, {}); + pools.push_back(dev.createQueryPoolUnique(query_pool_ci, nullptr, device->GetDispatchLoader())); +} + +void QueryPool::Reserve(std::pair query) { + const auto it = + std::find_if(std::begin(pools), std::end(pools), + [query_pool = query.first](auto& pool) { return query_pool == *pool; }); + ASSERT(it != std::end(pools)); + + const std::ptrdiff_t pool_index = std::distance(std::begin(pools), it); + usage[pool_index * GROW_STEP + static_cast(query.second)] = false; +} + +VKQueryCache::VKQueryCache(Core::System& system, VideoCore::RasterizerInterface& rasterizer, + const VKDevice& device, VKScheduler& scheduler) + : VideoCommon::QueryCacheBase{system, rasterizer}, + device{device}, scheduler{scheduler} { + for (std::size_t i = 0; i < static_cast(VideoCore::NumQueryTypes); ++i) { + query_pools[i].Initialize(device, static_cast(i)); + } +} + +VKQueryCache::~VKQueryCache() = default; + +std::pair VKQueryCache::AllocateQuery(VideoCore::QueryType type) { + return query_pools[static_cast(type)].Commit(scheduler.GetFence()); +} + +void VKQueryCache::Reserve(VideoCore::QueryType type, + std::pair query) { + query_pools[static_cast(type)].Reserve(query); +} + +HostCounter::HostCounter(VKQueryCache& cache, std::shared_ptr dependency, + VideoCore::QueryType type) + : VideoCommon::HostCounterBase{std::move(dependency)}, cache{cache}, + type{type}, query{cache.AllocateQuery(type)}, ticks{cache.Scheduler().Ticks()} { + const auto dev = cache.Device().GetLogical(); + cache.Scheduler().Record([dev, query = query](vk::CommandBuffer cmdbuf, auto& dld) { + dev.resetQueryPoolEXT(query.first, query.second, 1, dld); + cmdbuf.beginQuery(query.first, query.second, vk::QueryControlFlagBits::ePrecise, dld); + }); +} + +HostCounter::~HostCounter() { + cache.Reserve(type, query); +} + +void HostCounter::EndQuery() { + cache.Scheduler().Record([query = query](auto cmdbuf, auto& dld) { + cmdbuf.endQuery(query.first, query.second, dld); + }); +} + +u64 HostCounter::BlockingQuery() const { + if (ticks >= cache.Scheduler().Ticks()) { + cache.Scheduler().Flush(); + } + + const auto dev = cache.Device().GetLogical(); + const auto& dld = cache.Device().GetDispatchLoader(); + u64 value; + dev.getQueryPoolResults(query.first, query.second, 1, sizeof(value), &value, sizeof(value), + vk::QueryResultFlagBits::e64 | vk::QueryResultFlagBits::eWait, dld); + return value; +} + +} // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_query_cache.h b/src/video_core/renderer_vulkan/vk_query_cache.h new file mode 100644 index 000000000..c3092ee96 --- /dev/null +++ b/src/video_core/renderer_vulkan/vk_query_cache.h @@ -0,0 +1,104 @@ +// Copyright 2020 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include +#include +#include +#include +#include + +#include "common/common_types.h" +#include "video_core/query_cache.h" +#include "video_core/renderer_vulkan/declarations.h" +#include "video_core/renderer_vulkan/vk_resource_manager.h" + +namespace VideoCore { +class RasterizerInterface; +} + +namespace Vulkan { + +class CachedQuery; +class HostCounter; +class VKDevice; +class VKQueryCache; +class VKScheduler; + +using CounterStream = VideoCommon::CounterStreamBase; + +class QueryPool final : public VKFencedPool { +public: + explicit QueryPool(); + ~QueryPool() override; + + void Initialize(const VKDevice& device, VideoCore::QueryType type); + + std::pair Commit(VKFence& fence); + + void Reserve(std::pair query); + +protected: + void Allocate(std::size_t begin, std::size_t end) override; + +private: + static constexpr std::size_t GROW_STEP = 512; + + const VKDevice* device = nullptr; + VideoCore::QueryType type = {}; + + std::vector pools; + std::vector usage; +}; + +class VKQueryCache final + : public VideoCommon::QueryCacheBase { +public: + explicit VKQueryCache(Core::System& system, VideoCore::RasterizerInterface& rasterizer, + const VKDevice& device, VKScheduler& scheduler); + ~VKQueryCache(); + + std::pair AllocateQuery(VideoCore::QueryType type); + + void Reserve(VideoCore::QueryType type, std::pair query); + + const VKDevice& Device() const noexcept { + return device; + } + + VKScheduler& Scheduler() const noexcept { + return scheduler; + } + +private: + const VKDevice& device; + VKScheduler& scheduler; +}; + +class HostCounter final : public VideoCommon::HostCounterBase { +public: + explicit HostCounter(VKQueryCache& cache, std::shared_ptr dependency, + VideoCore::QueryType type); + ~HostCounter(); + + void EndQuery(); + +private: + u64 BlockingQuery() const override; + + VKQueryCache& cache; + const VideoCore::QueryType type; + const std::pair query; + const u64 ticks; +}; + +class CachedQuery : public VideoCommon::CachedQueryBase { +public: + explicit CachedQuery(VKQueryCache&, VideoCore::QueryType, VAddr cpu_addr, u8* host_ptr) + : VideoCommon::CachedQueryBase{cpu_addr, host_ptr} {} +}; + +} // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp index aada38702..79aa121ed 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp +++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp @@ -289,7 +289,9 @@ RasterizerVulkan::RasterizerVulkan(Core::System& system, Core::Frontend::EmuWind staging_pool), pipeline_cache(system, *this, device, scheduler, descriptor_pool, update_descriptor_queue), buffer_cache(*this, system, device, memory_manager, scheduler, staging_pool), - sampler_cache(device) {} + sampler_cache(device), query_cache(system, *this, device, scheduler) { + scheduler.SetQueryCache(query_cache); +} RasterizerVulkan::~RasterizerVulkan() = default; @@ -308,6 +310,8 @@ void RasterizerVulkan::Draw(bool is_indexed, bool is_instanced) { FlushWork(); + query_cache.UpdateCounters(); + const auto& gpu = system.GPU().Maxwell3D(); GraphicsPipelineCacheKey key{GetFixedPipelineState(gpu.regs)}; @@ -362,6 +366,8 @@ void RasterizerVulkan::Draw(bool is_indexed, bool is_instanced) { void RasterizerVulkan::Clear() { MICROPROFILE_SCOPE(Vulkan_Clearing); + query_cache.UpdateCounters(); + const auto& gpu = system.GPU().Maxwell3D(); if (!system.GPU().Maxwell3D().ShouldExecute()) { return; @@ -429,6 +435,8 @@ void RasterizerVulkan::DispatchCompute(GPUVAddr code_addr) { sampled_views.clear(); image_views.clear(); + query_cache.UpdateCounters(); + const auto& launch_desc = system.GPU().KeplerCompute().launch_description; const ComputePipelineCacheKey key{ code_addr, @@ -471,17 +479,28 @@ void RasterizerVulkan::DispatchCompute(GPUVAddr code_addr) { }); } +void RasterizerVulkan::ResetCounter(VideoCore::QueryType type) { + query_cache.ResetCounter(type); +} + +void RasterizerVulkan::Query(GPUVAddr gpu_addr, VideoCore::QueryType type, + std::optional timestamp) { + query_cache.Query(gpu_addr, type, timestamp); +} + void RasterizerVulkan::FlushAll() {} void RasterizerVulkan::FlushRegion(CacheAddr addr, u64 size) { texture_cache.FlushRegion(addr, size); buffer_cache.FlushRegion(addr, size); + query_cache.FlushRegion(addr, size); } void RasterizerVulkan::InvalidateRegion(CacheAddr addr, u64 size) { texture_cache.InvalidateRegion(addr, size); pipeline_cache.InvalidateRegion(addr, size); buffer_cache.InvalidateRegion(addr, size); + query_cache.InvalidateRegion(addr, size); } void RasterizerVulkan::FlushAndInvalidateRegion(CacheAddr addr, u64 size) { diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.h b/src/video_core/renderer_vulkan/vk_rasterizer.h index 7be71e734..add1ad88c 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.h +++ b/src/video_core/renderer_vulkan/vk_rasterizer.h @@ -24,6 +24,7 @@ #include "video_core/renderer_vulkan/vk_descriptor_pool.h" #include "video_core/renderer_vulkan/vk_memory_manager.h" #include "video_core/renderer_vulkan/vk_pipeline_cache.h" +#include "video_core/renderer_vulkan/vk_query_cache.h" #include "video_core/renderer_vulkan/vk_renderpass_cache.h" #include "video_core/renderer_vulkan/vk_resource_manager.h" #include "video_core/renderer_vulkan/vk_sampler_cache.h" @@ -96,7 +97,7 @@ struct ImageView { vk::ImageLayout* layout = nullptr; }; -class RasterizerVulkan : public VideoCore::RasterizerAccelerated { +class RasterizerVulkan final : public VideoCore::RasterizerAccelerated { public: explicit RasterizerVulkan(Core::System& system, Core::Frontend::EmuWindow& render_window, VKScreenInfo& screen_info, const VKDevice& device, @@ -108,6 +109,8 @@ public: bool DrawMultiBatch(bool is_indexed) override; void Clear() override; void DispatchCompute(GPUVAddr code_addr) override; + void ResetCounter(VideoCore::QueryType type) override; + void Query(GPUVAddr gpu_addr, VideoCore::QueryType type, std::optional timestamp) override; void FlushAll() override; void FlushRegion(CacheAddr addr, u64 size) override; void InvalidateRegion(CacheAddr addr, u64 size) override; @@ -247,6 +250,7 @@ private: VKPipelineCache pipeline_cache; VKBufferCache buffer_cache; VKSamplerCache sampler_cache; + VKQueryCache query_cache; std::array color_attachments; View zeta_attachment; diff --git a/src/video_core/renderer_vulkan/vk_scheduler.cpp b/src/video_core/renderer_vulkan/vk_scheduler.cpp index d66133ad1..92bd6c344 100644 --- a/src/video_core/renderer_vulkan/vk_scheduler.cpp +++ b/src/video_core/renderer_vulkan/vk_scheduler.cpp @@ -6,6 +6,7 @@ #include "common/microprofile.h" #include "video_core/renderer_vulkan/declarations.h" #include "video_core/renderer_vulkan/vk_device.h" +#include "video_core/renderer_vulkan/vk_query_cache.h" #include "video_core/renderer_vulkan/vk_resource_manager.h" #include "video_core/renderer_vulkan/vk_scheduler.h" @@ -139,6 +140,8 @@ void VKScheduler::SubmitExecution(vk::Semaphore semaphore) { } void VKScheduler::AllocateNewContext() { + ++ticks; + std::unique_lock lock{mutex}; current_fence = next_fence; next_fence = &resource_manager.CommitFence(); @@ -146,6 +149,10 @@ void VKScheduler::AllocateNewContext() { current_cmdbuf = resource_manager.CommitCommandBuffer(*current_fence); current_cmdbuf.begin({vk::CommandBufferUsageFlagBits::eOneTimeSubmit}, device.GetDispatchLoader()); + // Enable counters once again. These are disabled when a command buffer is finished. + if (query_cache) { + query_cache->UpdateCounters(); + } } void VKScheduler::InvalidateState() { @@ -159,6 +166,7 @@ void VKScheduler::InvalidateState() { } void VKScheduler::EndPendingOperations() { + query_cache->DisableStreams(); EndRenderPass(); } diff --git a/src/video_core/renderer_vulkan/vk_scheduler.h b/src/video_core/renderer_vulkan/vk_scheduler.h index bcdffbba0..62fd7858b 100644 --- a/src/video_core/renderer_vulkan/vk_scheduler.h +++ b/src/video_core/renderer_vulkan/vk_scheduler.h @@ -4,6 +4,7 @@ #pragma once +#include #include #include #include @@ -18,6 +19,7 @@ namespace Vulkan { class VKDevice; class VKFence; +class VKQueryCache; class VKResourceManager; class VKFenceView { @@ -67,6 +69,11 @@ public: /// Binds a pipeline to the current execution context. void BindGraphicsPipeline(vk::Pipeline pipeline); + /// Assigns the query cache. + void SetQueryCache(VKQueryCache& query_cache_) { + query_cache = &query_cache_; + } + /// Returns true when viewports have been set in the current command buffer. bool TouchViewports() { return std::exchange(state.viewports, true); @@ -112,6 +119,11 @@ public: return current_fence; } + /// Returns the current command buffer tick. + u64 Ticks() const { + return ticks; + } + private: class Command { public: @@ -205,6 +217,8 @@ private: const VKDevice& device; VKResourceManager& resource_manager; + VKQueryCache* query_cache = nullptr; + vk::CommandBuffer current_cmdbuf; VKFence* current_fence = nullptr; VKFence* next_fence = nullptr; @@ -227,6 +241,7 @@ private: Common::SPSCQueue> chunk_reserve; std::mutex mutex; std::condition_variable cv; + std::atomic ticks = 0; bool quit = false; }; -- cgit v1.2.3