summaryrefslogtreecommitdiffstats
path: root/src/video_core/renderer_opengl/gl_query_cache.cpp
diff options
context:
space:
mode:
authorReinUsesLisp <reinuseslisp@airmail.cc>2019-11-28 06:15:34 +0100
committerReinUsesLisp <reinuseslisp@airmail.cc>2020-02-14 21:38:27 +0100
commit73d2d3342dc8867d32f08f89b2ca36ff071598dc (patch)
tree3a032d4a36d0f07981eeb8b396472670bfd11e5a /src/video_core/renderer_opengl/gl_query_cache.cpp
parentgl_query_cache: Implement host queries using a deferred cache (diff)
downloadyuzu-73d2d3342dc8867d32f08f89b2ca36ff071598dc.tar
yuzu-73d2d3342dc8867d32f08f89b2ca36ff071598dc.tar.gz
yuzu-73d2d3342dc8867d32f08f89b2ca36ff071598dc.tar.bz2
yuzu-73d2d3342dc8867d32f08f89b2ca36ff071598dc.tar.lz
yuzu-73d2d3342dc8867d32f08f89b2ca36ff071598dc.tar.xz
yuzu-73d2d3342dc8867d32f08f89b2ca36ff071598dc.tar.zst
yuzu-73d2d3342dc8867d32f08f89b2ca36ff071598dc.zip
Diffstat (limited to '')
-rw-r--r--src/video_core/renderer_opengl/gl_query_cache.cpp214
1 files changed, 161 insertions, 53 deletions
diff --git a/src/video_core/renderer_opengl/gl_query_cache.cpp b/src/video_core/renderer_opengl/gl_query_cache.cpp
index 8f0e8241d..74cb73209 100644
--- a/src/video_core/renderer_opengl/gl_query_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_query_cache.cpp
@@ -2,8 +2,10 @@
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
+#include <algorithm>
#include <cstring>
#include <memory>
+#include <unordered_map>
#include <utility>
#include <vector>
@@ -22,6 +24,13 @@ using VideoCore::QueryType;
namespace {
+constexpr std::uintptr_t PAGE_SIZE = 4096;
+constexpr int PAGE_SHIFT = 12;
+
+constexpr std::size_t SMALL_QUERY_SIZE = 8; // Query size without timestamp
+constexpr std::size_t LARGE_QUERY_SIZE = 16; // Query size with timestamp
+constexpr std::ptrdiff_t TIMESTAMP_OFFSET = 8;
+
constexpr std::array<GLenum, VideoCore::NumQueryTypes> QueryTargets = {GL_SAMPLES_PASSED};
constexpr GLenum GetTarget(QueryType type) {
@@ -37,23 +46,19 @@ CounterStream::~CounterStream() = default;
void CounterStream::Update(bool enabled, bool any_command_queued) {
if (enabled) {
- if (!current) {
- current = cache.GetHostCounter(last, type);
- }
- return;
- }
-
- if (current) {
- EndQuery(any_command_queued);
+ Enable();
+ } else {
+ Disable(any_command_queued);
}
- last = std::exchange(current, nullptr);
}
void CounterStream::Reset(bool any_command_queued) {
if (current) {
EndQuery(any_command_queued);
+
+ // Immediately start a new query to avoid disabling its state.
+ current = cache.GetHostCounter(nullptr, type);
}
- current = nullptr;
last = nullptr;
}
@@ -67,6 +72,20 @@ std::shared_ptr<HostCounter> CounterStream::GetCurrent(bool any_command_queued)
return last;
}
+void CounterStream::Enable() {
+ if (current) {
+ return;
+ }
+ current = cache.GetHostCounter(last, type);
+}
+
+void CounterStream::Disable(bool any_command_queued) {
+ if (current) {
+ EndQuery(any_command_queued);
+ }
+ last = std::exchange(current, nullptr);
+}
+
void CounterStream::EndQuery(bool any_command_queued) {
if (!any_command_queued) {
// There are chances a query waited on without commands (glDraw, glClear, glDispatch). Not
@@ -78,26 +97,57 @@ void CounterStream::EndQuery(bool any_command_queued) {
}
QueryCache::QueryCache(Core::System& system, RasterizerOpenGL& rasterizer)
- : RasterizerCache{rasterizer}, system{system},
- rasterizer{rasterizer}, streams{{CounterStream{*this, QueryType::SamplesPassed}}} {}
+ : system{system}, rasterizer{rasterizer}, streams{{CounterStream{*this,
+ QueryType::SamplesPassed}}} {}
QueryCache::~QueryCache() = default;
-void QueryCache::Query(GPUVAddr gpu_addr, QueryType type) {
+void QueryCache::InvalidateRegion(CacheAddr addr, std::size_t size) {
+ const u64 addr_begin = static_cast<u64>(addr);
+ const u64 addr_end = addr_begin + static_cast<u64>(size);
+ const auto in_range = [addr_begin, addr_end](CachedQuery& query) {
+ const u64 cache_begin = query.GetCacheAddr();
+ const u64 cache_end = cache_begin + query.GetSizeInBytes();
+ return cache_begin < addr_end && addr_begin < cache_end;
+ };
+
+ const u64 page_end = addr_end >> PAGE_SHIFT;
+ for (u64 page = addr_begin >> PAGE_SHIFT; page <= page_end; ++page) {
+ const auto& it = cached_queries.find(page);
+ if (it == std::end(cached_queries)) {
+ continue;
+ }
+ auto& contents = it->second;
+ for (auto& query : contents) {
+ if (!in_range(query)) {
+ continue;
+ }
+ rasterizer.UpdatePagesCachedCount(query.GetCpuAddr(), query.GetSizeInBytes(), -1);
+ Flush(query);
+ }
+ contents.erase(std::remove_if(std::begin(contents), std::end(contents), in_range),
+ std::end(contents));
+ }
+}
+
+void QueryCache::FlushRegion(CacheAddr addr, std::size_t size) {
+ // We can handle flushes in the same way as invalidations.
+ InvalidateRegion(addr, size);
+}
+
+void QueryCache::Query(GPUVAddr gpu_addr, QueryType type, std::optional<u64> timestamp) {
auto& memory_manager = system.GPU().MemoryManager();
const auto host_ptr = memory_manager.GetPointer(gpu_addr);
- auto query = TryGet(host_ptr);
+ CachedQuery* query = TryGet(ToCacheAddr(host_ptr));
if (!query) {
const auto cpu_addr = memory_manager.GpuToCpuAddress(gpu_addr);
ASSERT_OR_EXECUTE(cpu_addr, return;);
- query = std::make_shared<CachedQuery>(type, *cpu_addr, host_ptr);
- Register(query);
+ query = &Register(CachedQuery(type, *cpu_addr, host_ptr));
}
- query->SetCounter(GetStream(type).GetCurrent(rasterizer.AnyCommandQueued()));
- query->MarkAsModified(true, *this);
+ query->SetCounter(GetStream(type).GetCurrent(rasterizer.AnyCommandQueued()), timestamp);
}
void QueryCache::UpdateCounters() {
@@ -117,34 +167,54 @@ void QueryCache::Reserve(QueryType type, OGLQuery&& query) {
std::shared_ptr<HostCounter> QueryCache::GetHostCounter(std::shared_ptr<HostCounter> dependency,
QueryType type) {
- const auto type_index = static_cast<std::size_t>(type);
- auto& reserve = reserved_queries[type_index];
-
+ auto& reserve = reserved_queries[static_cast<std::size_t>(type)];
+ OGLQuery query;
if (reserve.empty()) {
- return std::make_shared<HostCounter>(*this, std::move(dependency), type);
+ query.Create(GetTarget(type));
+ } else {
+ query = std::move(reserve.back());
+ reserve.pop_back();
}
- auto counter = std::make_shared<HostCounter>(*this, std::move(dependency), type,
- std::move(reserve.back()));
- reserve.pop_back();
- return counter;
+ return std::make_shared<HostCounter>(*this, std::move(dependency), type, std::move(query));
+}
+
+CachedQuery& QueryCache::Register(CachedQuery&& cached_query) {
+ const u64 page = static_cast<u64>(cached_query.GetCacheAddr()) >> PAGE_SHIFT;
+ auto& stored_ref = cached_queries[page].emplace_back(std::move(cached_query));
+ rasterizer.UpdatePagesCachedCount(stored_ref.GetCpuAddr(), stored_ref.GetSizeInBytes(), 1);
+ return stored_ref;
+}
+
+CachedQuery* QueryCache::TryGet(CacheAddr addr) {
+ const u64 page = static_cast<u64>(addr) >> PAGE_SHIFT;
+ const auto it = cached_queries.find(page);
+ if (it == std::end(cached_queries)) {
+ return nullptr;
+ }
+ auto& contents = it->second;
+ const auto found =
+ std::find_if(std::begin(contents), std::end(contents),
+ [addr](const auto& query) { return query.GetCacheAddr() == addr; });
+ return found != std::end(contents) ? &*found : nullptr;
}
-void QueryCache::FlushObjectInner(const std::shared_ptr<CachedQuery>& counter_) {
- auto& counter = *counter_;
- auto& stream = GetStream(counter.GetType());
+void QueryCache::Flush(CachedQuery& cached_query) {
+ auto& stream = GetStream(cached_query.GetType());
// Waiting for a query while another query of the same target is enabled locks Nvidia's driver.
// To avoid this disable and re-enable keeping the dependency stream.
- const bool is_enabled = stream.IsEnabled();
- if (is_enabled) {
- stream.Update(false, false);
+ // But we only have to do this if we have pending waits to be done.
+ const bool slice_counter = stream.IsEnabled() && cached_query.WaitPending();
+ const bool any_command_queued = rasterizer.AnyCommandQueued();
+ if (slice_counter) {
+ stream.Update(false, any_command_queued);
}
- counter.Flush();
+ cached_query.Flush();
- if (is_enabled) {
- stream.Update(true, false);
+ if (slice_counter) {
+ stream.Update(true, any_command_queued);
}
}
@@ -152,13 +222,6 @@ CounterStream& QueryCache::GetStream(QueryType type) {
return streams[static_cast<std::size_t>(type)];
}
-HostCounter::HostCounter(QueryCache& cache, std::shared_ptr<HostCounter> dependency, QueryType type)
- : cache{cache}, type{type}, dependency{std::move(dependency)} {
- const GLenum target = GetTarget(type);
- query.Create(target);
- glBeginQuery(target, query.handle);
-}
-
HostCounter::HostCounter(QueryCache& cache, std::shared_ptr<HostCounter> dependency, QueryType type,
OGLQuery&& query_)
: cache{cache}, type{type}, dependency{std::move(dependency)}, query{std::move(query_)} {
@@ -170,35 +233,80 @@ HostCounter::~HostCounter() {
}
u64 HostCounter::Query() {
- if (query.handle == 0) {
- return result;
+ if (result) {
+ return *result;
}
- glGetQueryObjectui64v(query.handle, GL_QUERY_RESULT, &result);
-
+ u64 value;
+ glGetQueryObjectui64v(query.handle, GL_QUERY_RESULT, &value);
if (dependency) {
- result += dependency->Query();
+ value += dependency->Query();
}
- return result;
+ return *(result = value);
+}
+
+bool HostCounter::WaitPending() const noexcept {
+ return result.has_value();
}
CachedQuery::CachedQuery(QueryType type, VAddr cpu_addr, u8* host_ptr)
- : RasterizerCacheObject{host_ptr}, type{type}, cpu_addr{cpu_addr}, host_ptr{host_ptr} {}
+ : type{type}, cpu_addr{cpu_addr}, host_ptr{host_ptr} {}
+
+CachedQuery::CachedQuery(CachedQuery&& rhs) noexcept
+ : type{rhs.type}, cpu_addr{rhs.cpu_addr}, host_ptr{rhs.host_ptr},
+ counter{std::move(rhs.counter)}, timestamp{rhs.timestamp} {}
CachedQuery::~CachedQuery() = default;
+CachedQuery& CachedQuery::operator=(CachedQuery&& rhs) noexcept {
+ type = rhs.type;
+ cpu_addr = rhs.cpu_addr;
+ host_ptr = rhs.host_ptr;
+ counter = std::move(rhs.counter);
+ timestamp = rhs.timestamp;
+ return *this;
+}
+
void CachedQuery::Flush() {
- const u64 value = counter->Query();
- std::memcpy(host_ptr, &value, sizeof(value));
+ // When counter is nullptr it means that it's just been reseted. We are supposed to write a zero
+ // in these cases.
+ const u64 value = counter ? counter->Query() : 0;
+ std::memcpy(host_ptr, &value, sizeof(u64));
+
+ if (timestamp) {
+ std::memcpy(host_ptr + TIMESTAMP_OFFSET, &*timestamp, sizeof(u64));
+ }
}
-void CachedQuery::SetCounter(std::shared_ptr<HostCounter> counter_) {
+void CachedQuery::SetCounter(std::shared_ptr<HostCounter> counter_, std::optional<u64> timestamp_) {
+ if (counter) {
+ // If there's an old counter set it means the query is being rewritten by the game.
+ // To avoid losing the data forever, flush here.
+ Flush();
+ }
counter = std::move(counter_);
+ timestamp = timestamp_;
+}
+
+bool CachedQuery::WaitPending() const noexcept {
+ return counter && counter->WaitPending();
}
-QueryType CachedQuery::GetType() const {
+QueryType CachedQuery::GetType() const noexcept {
return type;
}
+VAddr CachedQuery::GetCpuAddr() const noexcept {
+ return cpu_addr;
+}
+
+CacheAddr CachedQuery::GetCacheAddr() const noexcept {
+ return ToCacheAddr(host_ptr);
+}
+
+u64 CachedQuery::GetSizeInBytes() const noexcept {
+ return timestamp ? LARGE_QUERY_SIZE : SMALL_QUERY_SIZE;
+}
+
} // namespace OpenGL