summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorFernando Sahmkow <fsahmkow27@gmail.com>2022-02-06 01:16:11 +0100
committerFernando Sahmkow <fsahmkow27@gmail.com>2022-10-06 21:00:52 +0200
commitbc8b3d225eda388f0603830cbff8357893abb0f9 (patch)
tree479b41b73913feceeeb0c9c6f3147d6491c0fa04
parentMemoryManager: initial multi paging system implementation. (diff)
downloadyuzu-bc8b3d225eda388f0603830cbff8357893abb0f9.tar
yuzu-bc8b3d225eda388f0603830cbff8357893abb0f9.tar.gz
yuzu-bc8b3d225eda388f0603830cbff8357893abb0f9.tar.bz2
yuzu-bc8b3d225eda388f0603830cbff8357893abb0f9.tar.lz
yuzu-bc8b3d225eda388f0603830cbff8357893abb0f9.tar.xz
yuzu-bc8b3d225eda388f0603830cbff8357893abb0f9.tar.zst
yuzu-bc8b3d225eda388f0603830cbff8357893abb0f9.zip
Diffstat (limited to '')
-rw-r--r--src/core/hle/service/nvdrv/devices/nvdisp_disp0.cpp5
-rw-r--r--src/core/hle/service/nvdrv/devices/nvdisp_disp0.h3
-rw-r--r--src/core/hle/service/nvflinger/nvflinger.cpp15
-rw-r--r--src/video_core/buffer_cache/buffer_cache.h13
-rw-r--r--src/video_core/dma_pusher.cpp3
-rw-r--r--src/video_core/engines/maxwell_3d.cpp24
-rw-r--r--src/video_core/engines/puller.cpp39
-rw-r--r--src/video_core/fence_manager.h96
-rw-r--r--src/video_core/gpu.cpp17
-rw-r--r--src/video_core/gpu.h4
-rw-r--r--src/video_core/gpu_thread.cpp2
-rw-r--r--src/video_core/rasterizer_interface.h7
-rw-r--r--src/video_core/renderer_opengl/gl_fence_manager.cpp13
-rw-r--r--src/video_core/renderer_opengl/gl_fence_manager.h6
-rw-r--r--src/video_core/renderer_opengl/gl_rasterizer.cpp22
-rw-r--r--src/video_core/renderer_opengl/gl_rasterizer.h5
-rw-r--r--src/video_core/renderer_vulkan/vk_fence_manager.cpp15
-rw-r--r--src/video_core/renderer_vulkan/vk_fence_manager.h6
-rw-r--r--src/video_core/renderer_vulkan/vk_rasterizer.cpp21
-rw-r--r--src/video_core/renderer_vulkan/vk_rasterizer.h5
20 files changed, 154 insertions, 167 deletions
diff --git a/src/core/hle/service/nvdrv/devices/nvdisp_disp0.cpp b/src/core/hle/service/nvdrv/devices/nvdisp_disp0.cpp
index e6a976714..18c5324a9 100644
--- a/src/core/hle/service/nvdrv/devices/nvdisp_disp0.cpp
+++ b/src/core/hle/service/nvdrv/devices/nvdisp_disp0.cpp
@@ -40,7 +40,8 @@ void nvdisp_disp0::OnClose(DeviceFD fd) {}
void nvdisp_disp0::flip(u32 buffer_handle, u32 offset, android::PixelFormat format, u32 width,
u32 height, u32 stride, android::BufferTransformFlags transform,
- const Common::Rectangle<int>& crop_rect) {
+ const Common::Rectangle<int>& crop_rect,
+ std::array<Service::Nvidia::NvFence, 4>& fences, u32 num_fences) {
const VAddr addr = nvmap.GetHandleAddress(buffer_handle);
LOG_TRACE(Service,
"Drawing from address {:X} offset {:08X} Width {} Height {} Stride {} Format {}",
@@ -50,7 +51,7 @@ void nvdisp_disp0::flip(u32 buffer_handle, u32 offset, android::PixelFormat form
stride, format, transform, crop_rect};
system.GetPerfStats().EndSystemFrame();
- system.GPU().RequestSwapBuffers(&framebuffer, nullptr, 0);
+ system.GPU().RequestSwapBuffers(&framebuffer, fences, num_fences);
system.SpeedLimiter().DoSpeedLimiting(system.CoreTiming().GetGlobalTimeUs());
system.GetPerfStats().BeginSystemFrame();
}
diff --git a/src/core/hle/service/nvdrv/devices/nvdisp_disp0.h b/src/core/hle/service/nvdrv/devices/nvdisp_disp0.h
index 1ca9b2e74..04217ab12 100644
--- a/src/core/hle/service/nvdrv/devices/nvdisp_disp0.h
+++ b/src/core/hle/service/nvdrv/devices/nvdisp_disp0.h
@@ -38,7 +38,8 @@ public:
/// Performs a screen flip, drawing the buffer pointed to by the handle.
void flip(u32 buffer_handle, u32 offset, android::PixelFormat format, u32 width, u32 height,
u32 stride, android::BufferTransformFlags transform,
- const Common::Rectangle<int>& crop_rect);
+ const Common::Rectangle<int>& crop_rect,
+ std::array<Service::Nvidia::NvFence, 4>& fences, u32 num_fences);
Kernel::KEvent* QueryEvent(u32 event_id) override;
diff --git a/src/core/hle/service/nvflinger/nvflinger.cpp b/src/core/hle/service/nvflinger/nvflinger.cpp
index aa112021d..4658f1e8b 100644
--- a/src/core/hle/service/nvflinger/nvflinger.cpp
+++ b/src/core/hle/service/nvflinger/nvflinger.cpp
@@ -269,17 +269,6 @@ void NVFlinger::Compose() {
return; // We are likely shutting down
}
- auto& syncpoint_manager = system.Host1x().GetSyncpointManager();
- const auto& multi_fence = buffer.fence;
- guard->unlock();
- for (u32 fence_id = 0; fence_id < multi_fence.num_fences; fence_id++) {
- const auto& fence = multi_fence.fences[fence_id];
- syncpoint_manager.WaitGuest(fence.id, fence.value);
- }
- guard->lock();
-
- MicroProfileFlip();
-
// Now send the buffer to the GPU for drawing.
// TODO(Subv): Support more than just disp0. The display device selection is probably based
// on which display we're drawing (Default, Internal, External, etc)
@@ -293,8 +282,10 @@ void NVFlinger::Compose() {
nvdisp->flip(igbp_buffer.BufferId(), igbp_buffer.Offset(), igbp_buffer.ExternalFormat(),
igbp_buffer.Width(), igbp_buffer.Height(), igbp_buffer.Stride(),
- static_cast<android::BufferTransformFlags>(buffer.transform), crop_rect);
+ static_cast<android::BufferTransformFlags>(buffer.transform), crop_rect,
+ buffer.fence.fences, buffer.fence.num_fences);
+ MicroProfileFlip();
guard->lock();
swap_interval = buffer.swap_interval;
diff --git a/src/video_core/buffer_cache/buffer_cache.h b/src/video_core/buffer_cache/buffer_cache.h
index 6b6764d72..e55cac0d6 100644
--- a/src/video_core/buffer_cache/buffer_cache.h
+++ b/src/video_core/buffer_cache/buffer_cache.h
@@ -826,6 +826,19 @@ void BufferCache<P>::CommitAsyncFlushesHigh() {
const bool is_accuracy_normal =
Settings::values.gpu_accuracy.GetValue() == Settings::GPUAccuracy::Normal;
+ auto it = committed_ranges.begin();
+ while (it != committed_ranges.end()) {
+ auto& current_intervals = *it;
+ auto next_it = std::next(it);
+ while (next_it != committed_ranges.end()) {
+ for (auto& interval : *next_it) {
+ current_intervals.subtract(interval);
+ }
+ next_it++;
+ }
+ it++;
+ }
+
boost::container::small_vector<std::pair<BufferCopy, BufferId>, 1> downloads;
u64 total_size_bytes = 0;
u64 largest_copy = 0;
diff --git a/src/video_core/dma_pusher.cpp b/src/video_core/dma_pusher.cpp
index b01f04d0c..9835e3ac1 100644
--- a/src/video_core/dma_pusher.cpp
+++ b/src/video_core/dma_pusher.cpp
@@ -24,8 +24,6 @@ MICROPROFILE_DEFINE(DispatchCalls, "GPU", "Execute command buffer", MP_RGB(128,
void DmaPusher::DispatchCalls() {
MICROPROFILE_SCOPE(DispatchCalls);
- gpu.SyncGuestHost();
-
dma_pushbuffer_subindex = 0;
dma_state.is_last_call = true;
@@ -36,7 +34,6 @@ void DmaPusher::DispatchCalls() {
}
}
gpu.FlushCommands();
- gpu.SyncGuestHost();
gpu.OnCommandListEnd();
}
diff --git a/src/video_core/engines/maxwell_3d.cpp b/src/video_core/engines/maxwell_3d.cpp
index 3a4646289..950c70dcd 100644
--- a/src/video_core/engines/maxwell_3d.cpp
+++ b/src/video_core/engines/maxwell_3d.cpp
@@ -242,6 +242,9 @@ void Maxwell3D::ProcessMethodCall(u32 method, u32 argument, u32 nonshadow_argume
return;
case MAXWELL3D_REG_INDEX(fragment_barrier):
return rasterizer->FragmentBarrier();
+ case MAXWELL3D_REG_INDEX(invalidate_texture_data_cache):
+ rasterizer->InvalidateGPUCache();
+ return rasterizer->WaitForIdle();
case MAXWELL3D_REG_INDEX(tiled_cache_barrier):
return rasterizer->TiledCacheBarrier();
}
@@ -472,10 +475,25 @@ void Maxwell3D::ProcessQueryGet() {
switch (regs.query.query_get.operation) {
case Regs::QueryOperation::Release:
- if (regs.query.query_get.fence == 1) {
- rasterizer->SignalSemaphore(regs.query.QueryAddress(), regs.query.query_sequence);
+ if (regs.query.query_get.fence == 1 || regs.query.query_get.short_query != 0) {
+ const GPUVAddr sequence_address{regs.query.QueryAddress()};
+ const u32 payload = regs.query.query_sequence;
+ std::function<void()> operation([this, sequence_address, payload] {
+ memory_manager.Write<u32>(sequence_address, payload);
+ });
+ rasterizer->SignalFence(std::move(operation));
} else {
- StampQueryResult(regs.query.query_sequence, regs.query.query_get.short_query == 0);
+ struct LongQueryResult {
+ u64_le value;
+ u64_le timestamp;
+ };
+ const GPUVAddr sequence_address{regs.query.QueryAddress()};
+ const u32 payload = regs.query.query_sequence;
+ std::function<void()> operation([this, sequence_address, payload] {
+ LongQueryResult query_result{payload, system.GPU().GetTicks()};
+ memory_manager.WriteBlock(sequence_address, &query_result, sizeof(query_result));
+ });
+ rasterizer->SignalFence(std::move(operation));
}
break;
case Regs::QueryOperation::Acquire:
diff --git a/src/video_core/engines/puller.cpp b/src/video_core/engines/puller.cpp
index 8c17639e4..dd9494efa 100644
--- a/src/video_core/engines/puller.cpp
+++ b/src/video_core/engines/puller.cpp
@@ -79,12 +79,15 @@ void Puller::ProcessSemaphoreTriggerMethod() {
u64 timestamp;
};
- Block block{};
- block.sequence = regs.semaphore_sequence;
- // TODO(Kmather73): Generate a real GPU timestamp and write it here instead of
- // CoreTiming
- block.timestamp = gpu.GetTicks();
- memory_manager.WriteBlock(regs.semaphore_address.SemaphoreAddress(), &block, sizeof(block));
+ const GPUVAddr sequence_address{regs.semaphore_address.SemaphoreAddress()};
+ const u32 payload = regs.semaphore_sequence;
+ std::function<void()> operation([this, sequence_address, payload] {
+ Block block{};
+ block.sequence = payload;
+ block.timestamp = gpu.GetTicks();
+ memory_manager.WriteBlock(sequence_address, &block, sizeof(block));
+ });
+ rasterizer->SignalFence(std::move(operation));
} else {
do {
const u32 word{memory_manager.Read<u32>(regs.semaphore_address.SemaphoreAddress())};
@@ -94,6 +97,7 @@ void Puller::ProcessSemaphoreTriggerMethod() {
regs.acquire_active = true;
regs.acquire_mode = false;
if (word != regs.acquire_value) {
+ rasterizer->ReleaseFences();
std::this_thread::sleep_for(std::chrono::milliseconds(1));
continue;
}
@@ -101,11 +105,13 @@ void Puller::ProcessSemaphoreTriggerMethod() {
regs.acquire_active = true;
regs.acquire_mode = true;
if (word < regs.acquire_value) {
+ rasterizer->ReleaseFences();
std::this_thread::sleep_for(std::chrono::milliseconds(1));
continue;
}
} else if (op == GpuSemaphoreOperation::AcquireMask) {
- if (word & regs.semaphore_sequence == 0) {
+ if (word && regs.semaphore_sequence == 0) {
+ rasterizer->ReleaseFences();
std::this_thread::sleep_for(std::chrono::milliseconds(1));
continue;
}
@@ -117,16 +123,23 @@ void Puller::ProcessSemaphoreTriggerMethod() {
}
void Puller::ProcessSemaphoreRelease() {
- rasterizer->SignalSemaphore(regs.semaphore_address.SemaphoreAddress(), regs.semaphore_release);
+ const GPUVAddr sequence_address{regs.semaphore_address.SemaphoreAddress()};
+ const u32 payload = regs.semaphore_release;
+ std::function<void()> operation([this, sequence_address, payload] {
+ memory_manager.Write<u32>(sequence_address, payload);
+ });
+ rasterizer->SignalFence(std::move(operation));
}
void Puller::ProcessSemaphoreAcquire() {
- const u32 word = memory_manager.Read<u32>(regs.semaphore_address.SemaphoreAddress());
+ u32 word = memory_manager.Read<u32>(regs.semaphore_address.SemaphoreAddress());
const auto value = regs.semaphore_acquire;
- std::this_thread::sleep_for(std::chrono::milliseconds(5));
- if (word != value) {
+ while (word != value) {
regs.acquire_active = true;
regs.acquire_value = value;
+ std::this_thread::sleep_for(std::chrono::milliseconds(1));
+ rasterizer->ReleaseFences();
+ word = memory_manager.Read<u32>(regs.semaphore_address.SemaphoreAddress());
// TODO(kemathe73) figure out how to do the acquire_timeout
regs.acquire_mode = false;
regs.acquire_source = false;
@@ -147,9 +160,9 @@ void Puller::CallPullerMethod(const MethodCall& method_call) {
case BufferMethods::SemaphoreAddressHigh:
case BufferMethods::SemaphoreAddressLow:
case BufferMethods::SemaphoreSequencePayload:
- case BufferMethods::WrcacheFlush:
case BufferMethods::SyncpointPayload:
break;
+ case BufferMethods::WrcacheFlush:
case BufferMethods::RefCnt:
rasterizer->SignalReference();
break;
@@ -173,7 +186,7 @@ void Puller::CallPullerMethod(const MethodCall& method_call) {
}
case BufferMethods::MemOpB: {
// Implement this better.
- rasterizer->SyncGuestHost();
+ rasterizer->InvalidateGPUCache();
break;
}
case BufferMethods::MemOpC:
diff --git a/src/video_core/fence_manager.h b/src/video_core/fence_manager.h
index 03a70e5e0..c390ac91b 100644
--- a/src/video_core/fence_manager.h
+++ b/src/video_core/fence_manager.h
@@ -5,6 +5,8 @@
#include <algorithm>
#include <cstring>
+#include <deque>
+#include <functional>
#include <memory>
#include <queue>
@@ -19,28 +21,7 @@ namespace VideoCommon {
class FenceBase {
public:
- explicit FenceBase(u32 payload_, bool is_stubbed_)
- : address{}, payload{payload_}, is_semaphore{false}, is_stubbed{is_stubbed_} {}
-
- explicit FenceBase(u8* address_, u32 payload_, bool is_stubbed_)
- : address{address_}, payload{payload_}, is_semaphore{true}, is_stubbed{is_stubbed_} {}
-
- u8* GetAddress() const {
- return address;
- }
-
- u32 GetPayload() const {
- return payload;
- }
-
- bool IsSemaphore() const {
- return is_semaphore;
- }
-
-private:
- u8* address;
- u32 payload;
- bool is_semaphore;
+ explicit FenceBase(bool is_stubbed_) : is_stubbed{is_stubbed_} {}
protected:
bool is_stubbed;
@@ -60,31 +41,28 @@ public:
buffer_cache.AccumulateFlushes();
}
- void SignalSemaphore(u8* addr, u32 value) {
+ void SyncOperation(std::function<void()>&& func) {
+ uncommitted_operations.emplace_back(std::move(func));
+ }
+
+ void SignalFence(std::function<void()>&& func) {
TryReleasePendingFences();
const bool should_flush = ShouldFlush();
CommitAsyncFlushes();
- TFence new_fence = CreateFence(addr, value, !should_flush);
+ uncommitted_operations.emplace_back(std::move(func));
+ CommitOperations();
+ TFence new_fence = CreateFence(!should_flush);
fences.push(new_fence);
QueueFence(new_fence);
if (should_flush) {
rasterizer.FlushCommands();
}
- rasterizer.SyncGuestHost();
}
void SignalSyncPoint(u32 value) {
syncpoint_manager.IncrementGuest(value);
- TryReleasePendingFences();
- const bool should_flush = ShouldFlush();
- CommitAsyncFlushes();
- TFence new_fence = CreateFence(value, !should_flush);
- fences.push(new_fence);
- QueueFence(new_fence);
- if (should_flush) {
- rasterizer.FlushCommands();
- }
- rasterizer.SyncGuestHost();
+ std::function<void()> func([this, value] { syncpoint_manager.IncrementHost(value); });
+ SignalFence(std::move(func));
}
void WaitPendingFences() {
@@ -94,12 +72,10 @@ public:
WaitFence(current_fence);
}
PopAsyncFlushes();
- if (current_fence->IsSemaphore()) {
- char* address = reinterpret_cast<char*>(current_fence->GetAddress());
- auto payload = current_fence->GetPayload();
- std::memcpy(address, &payload, sizeof(payload));
- } else {
- syncpoint_manager.IncrementHost(current_fence->GetPayload());
+ auto operations = std::move(pending_operations.front());
+ pending_operations.pop_front();
+ for (auto& operation : operations) {
+ operation();
}
PopFence();
}
@@ -114,11 +90,9 @@ protected:
virtual ~FenceManager() = default;
- /// Creates a Sync Point Fence Interface, does not create a backend fence if 'is_stubbed' is
+ /// Creates a Fence Interface, does not create a backend fence if 'is_stubbed' is
/// true
- virtual TFence CreateFence(u32 value, bool is_stubbed) = 0;
- /// Creates a Semaphore Fence Interface, does not create a backend fence if 'is_stubbed' is true
- virtual TFence CreateFence(u8* addr, u32 value, bool is_stubbed) = 0;
+ virtual TFence CreateFence(bool is_stubbed) = 0;
/// Queues a fence into the backend if the fence isn't stubbed.
virtual void QueueFence(TFence& fence) = 0;
/// Notifies that the backend fence has been signaled/reached in host GPU.
@@ -141,12 +115,10 @@ private:
return;
}
PopAsyncFlushes();
- if (current_fence->IsSemaphore()) {
- char* address = reinterpret_cast<char*>(current_fence->GetAddress());
- const auto payload = current_fence->GetPayload();
- std::memcpy(address, &payload, sizeof(payload));
- } else {
- syncpoint_manager.IncrementHost(current_fence->GetPayload());
+ auto operations = std::move(pending_operations.front());
+ pending_operations.pop_front();
+ for (auto& operation : operations) {
+ operation();
}
PopFence();
}
@@ -165,16 +137,20 @@ private:
}
void PopAsyncFlushes() {
- std::scoped_lock lock{buffer_cache.mutex, texture_cache.mutex};
- texture_cache.PopAsyncFlushes();
- buffer_cache.PopAsyncFlushes();
+ {
+ std::scoped_lock lock{buffer_cache.mutex, texture_cache.mutex};
+ texture_cache.PopAsyncFlushes();
+ buffer_cache.PopAsyncFlushes();
+ }
query_cache.PopAsyncFlushes();
}
void CommitAsyncFlushes() {
- std::scoped_lock lock{buffer_cache.mutex, texture_cache.mutex};
- texture_cache.CommitAsyncFlushes();
- buffer_cache.CommitAsyncFlushes();
+ {
+ std::scoped_lock lock{buffer_cache.mutex, texture_cache.mutex};
+ texture_cache.CommitAsyncFlushes();
+ buffer_cache.CommitAsyncFlushes();
+ }
query_cache.CommitAsyncFlushes();
}
@@ -183,7 +159,13 @@ private:
fences.pop();
}
+ void CommitOperations() {
+ pending_operations.emplace_back(std::move(uncommitted_operations));
+ }
+
std::queue<TFence> fences;
+ std::deque<std::function<void()>> uncommitted_operations;
+ std::deque<std::deque<std::function<void()>>> pending_operations;
DelayedDestructionRing<TFence, 6> delayed_destruction_ring;
};
diff --git a/src/video_core/gpu.cpp b/src/video_core/gpu.cpp
index a1d19b1c8..d7a3dd96b 100644
--- a/src/video_core/gpu.cpp
+++ b/src/video_core/gpu.cpp
@@ -93,16 +93,13 @@ struct GPU::Impl {
}
/// Synchronizes CPU writes with Host GPU memory.
- void SyncGuestHost() {
- rasterizer->SyncGuestHost();
+ void InvalidateGPUCache() {
+ rasterizer->InvalidateGPUCache();
}
/// Signal the ending of command list.
void OnCommandListEnd() {
- if (is_async) {
- // This command only applies to asynchronous GPU mode
- gpu_thread.OnCommandListEnd();
- }
+ gpu_thread.OnCommandListEnd();
}
/// Request a host GPU memory flush from the CPU.
@@ -296,7 +293,7 @@ struct GPU::Impl {
}
void RequestSwapBuffers(const Tegra::FramebufferConfig* framebuffer,
- Service::Nvidia::NvFence* fences, size_t num_fences) {
+ std::array<Service::Nvidia::NvFence, 4>& fences, size_t num_fences) {
size_t current_request_counter{};
{
std::unique_lock<std::mutex> lk(request_swap_mutex);
@@ -412,8 +409,8 @@ void GPU::FlushCommands() {
impl->FlushCommands();
}
-void GPU::SyncGuestHost() {
- impl->SyncGuestHost();
+void GPU::InvalidateGPUCache() {
+ impl->InvalidateGPUCache();
}
void GPU::OnCommandListEnd() {
@@ -488,7 +485,7 @@ const VideoCore::ShaderNotify& GPU::ShaderNotify() const {
}
void GPU::RequestSwapBuffers(const Tegra::FramebufferConfig* framebuffer,
- Service::Nvidia::NvFence* fences, size_t num_fences) {
+ std::array<Service::Nvidia::NvFence, 4>& fences, size_t num_fences) {
impl->RequestSwapBuffers(framebuffer, fences, num_fences);
}
diff --git a/src/video_core/gpu.h b/src/video_core/gpu.h
index 655373b33..0a4a8b14f 100644
--- a/src/video_core/gpu.h
+++ b/src/video_core/gpu.h
@@ -110,7 +110,7 @@ public:
/// Flush all current written commands into the host GPU for execution.
void FlushCommands();
/// Synchronizes CPU writes with Host GPU memory.
- void SyncGuestHost();
+ void InvalidateGPUCache();
/// Signal the ending of command list.
void OnCommandListEnd();
@@ -180,7 +180,7 @@ public:
void RendererFrameEndNotify();
void RequestSwapBuffers(const Tegra::FramebufferConfig* framebuffer,
- Service::Nvidia::NvFence* fences, size_t num_fences);
+ std::array<Service::Nvidia::NvFence, 4>& fences, size_t num_fences);
/// Performs any additional setup necessary in order to begin GPU emulation.
/// This can be used to launch any necessary threads and register any necessary
diff --git a/src/video_core/gpu_thread.cpp b/src/video_core/gpu_thread.cpp
index 2c03545bf..1bd477011 100644
--- a/src/video_core/gpu_thread.cpp
+++ b/src/video_core/gpu_thread.cpp
@@ -98,7 +98,7 @@ void ThreadManager::FlushRegion(VAddr addr, u64 size) {
}
void ThreadManager::TickGPU() {
- PushCommand(GPUTickCommand(), true);
+ PushCommand(GPUTickCommand());
}
void ThreadManager::InvalidateRegion(VAddr addr, u64 size) {
diff --git a/src/video_core/rasterizer_interface.h b/src/video_core/rasterizer_interface.h
index 5362aafb6..cb07f3d38 100644
--- a/src/video_core/rasterizer_interface.h
+++ b/src/video_core/rasterizer_interface.h
@@ -62,7 +62,10 @@ public:
virtual void DisableGraphicsUniformBuffer(size_t stage, u32 index) = 0;
/// Signal a GPU based semaphore as a fence
- virtual void SignalSemaphore(GPUVAddr addr, u32 value) = 0;
+ virtual void SignalFence(std::function<void()>&& func) = 0;
+
+ /// Send an operation to be done after a certain amount of flushes.
+ virtual void SyncOperation(std::function<void()>&& func) = 0;
/// Signal a GPU based syncpoint as a fence
virtual void SignalSyncPoint(u32 value) = 0;
@@ -89,7 +92,7 @@ public:
virtual void OnCPUWrite(VAddr addr, u64 size) = 0;
/// Sync memory between guest and host.
- virtual void SyncGuestHost() = 0;
+ virtual void InvalidateGPUCache() = 0;
/// Unmap memory range
virtual void UnmapMemory(VAddr addr, u64 size) = 0;
diff --git a/src/video_core/renderer_opengl/gl_fence_manager.cpp b/src/video_core/renderer_opengl/gl_fence_manager.cpp
index c76446b60..91463f854 100644
--- a/src/video_core/renderer_opengl/gl_fence_manager.cpp
+++ b/src/video_core/renderer_opengl/gl_fence_manager.cpp
@@ -10,10 +10,7 @@
namespace OpenGL {
-GLInnerFence::GLInnerFence(u32 payload_, bool is_stubbed_) : FenceBase{payload_, is_stubbed_} {}
-
-GLInnerFence::GLInnerFence(u8* address_, u32 payload_, bool is_stubbed_)
- : FenceBase{address_, payload_, is_stubbed_} {}
+GLInnerFence::GLInnerFence(bool is_stubbed_) : FenceBase{is_stubbed_} {}
GLInnerFence::~GLInnerFence() = default;
@@ -48,12 +45,8 @@ FenceManagerOpenGL::FenceManagerOpenGL(VideoCore::RasterizerInterface& rasterize
BufferCache& buffer_cache_, QueryCache& query_cache_)
: GenericFenceManager{rasterizer_, gpu_, texture_cache_, buffer_cache_, query_cache_} {}
-Fence FenceManagerOpenGL::CreateFence(u32 value, bool is_stubbed) {
- return std::make_shared<GLInnerFence>(value, is_stubbed);
-}
-
-Fence FenceManagerOpenGL::CreateFence(u8* addr, u32 value, bool is_stubbed) {
- return std::make_shared<GLInnerFence>(addr, value, is_stubbed);
+Fence FenceManagerOpenGL::CreateFence(bool is_stubbed) {
+ return std::make_shared<GLInnerFence>(is_stubbed);
}
void FenceManagerOpenGL::QueueFence(Fence& fence) {
diff --git a/src/video_core/renderer_opengl/gl_fence_manager.h b/src/video_core/renderer_opengl/gl_fence_manager.h
index fced8d002..f1446e732 100644
--- a/src/video_core/renderer_opengl/gl_fence_manager.h
+++ b/src/video_core/renderer_opengl/gl_fence_manager.h
@@ -16,8 +16,7 @@ namespace OpenGL {
class GLInnerFence : public VideoCommon::FenceBase {
public:
- explicit GLInnerFence(u32 payload_, bool is_stubbed_);
- explicit GLInnerFence(u8* address_, u32 payload_, bool is_stubbed_);
+ explicit GLInnerFence(bool is_stubbed_);
~GLInnerFence();
void Queue();
@@ -40,8 +39,7 @@ public:
QueryCache& query_cache);
protected:
- Fence CreateFence(u32 value, bool is_stubbed) override;
- Fence CreateFence(u8* addr, u32 value, bool is_stubbed) override;
+ Fence CreateFence(bool is_stubbed) override;
void QueueFence(Fence& fence) override;
bool IsFenceSignaled(Fence& fence) const override;
void WaitFence(Fence& fence) override;
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp
index b572950a6..6ebd6cff9 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer.cpp
+++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp
@@ -358,7 +358,7 @@ void RasterizerOpenGL::OnCPUWrite(VAddr addr, u64 size) {
}
}
-void RasterizerOpenGL::SyncGuestHost() {
+void RasterizerOpenGL::InvalidateGPUCache() {
MICROPROFILE_SCOPE(OpenGL_CacheManagement);
shader_cache.SyncGuestHost();
{
@@ -386,13 +386,12 @@ void RasterizerOpenGL::ModifyGPUMemory(size_t as_id, GPUVAddr addr, u64 size) {
}
}
-void RasterizerOpenGL::SignalSemaphore(GPUVAddr addr, u32 value) {
- if (!gpu.IsAsync()) {
- gpu_memory->Write<u32>(addr, value);
- return;
- }
- auto paddr = gpu_memory->GetPointer(addr);
- fence_manager.SignalSemaphore(paddr, value);
+void RasterizerOpenGL::SignalFence(std::function<void()>&& func) {
+ fence_manager.SignalFence(std::move(func));
+}
+
+void RasterizerOpenGL::SyncOperation(std::function<void()>&& func) {
+ fence_manager.SyncOperation(std::move(func));
}
void RasterizerOpenGL::SignalSyncPoint(u32 value) {
@@ -400,16 +399,10 @@ void RasterizerOpenGL::SignalSyncPoint(u32 value) {
}
void RasterizerOpenGL::SignalReference() {
- if (!gpu.IsAsync()) {
- return;
- }
fence_manager.SignalOrdering();
}
void RasterizerOpenGL::ReleaseFences() {
- if (!gpu.IsAsync()) {
- return;
- }
fence_manager.WaitPendingFences();
}
@@ -426,6 +419,7 @@ void RasterizerOpenGL::WaitForIdle() {
}
void RasterizerOpenGL::FragmentBarrier() {
+ glTextureBarrier();
glMemoryBarrier(GL_FRAMEBUFFER_BARRIER_BIT | GL_TEXTURE_FETCH_BARRIER_BIT);
}
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.h b/src/video_core/renderer_opengl/gl_rasterizer.h
index d469075a1..fe0ba979a 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer.h
+++ b/src/video_core/renderer_opengl/gl_rasterizer.h
@@ -80,10 +80,11 @@ public:
bool MustFlushRegion(VAddr addr, u64 size) override;
void InvalidateRegion(VAddr addr, u64 size) override;
void OnCPUWrite(VAddr addr, u64 size) override;
- void SyncGuestHost() override;
+ void InvalidateGPUCache() override;
void UnmapMemory(VAddr addr, u64 size) override;
void ModifyGPUMemory(size_t as_id, GPUVAddr addr, u64 size) override;
- void SignalSemaphore(GPUVAddr addr, u32 value) override;
+ void SignalFence(std::function<void()>&& func) override;
+ void SyncOperation(std::function<void()>&& func) override;
void SignalSyncPoint(u32 value) override;
void SignalReference() override;
void ReleaseFences() override;
diff --git a/src/video_core/renderer_vulkan/vk_fence_manager.cpp b/src/video_core/renderer_vulkan/vk_fence_manager.cpp
index 301cbbabe..0214b103a 100644
--- a/src/video_core/renderer_vulkan/vk_fence_manager.cpp
+++ b/src/video_core/renderer_vulkan/vk_fence_manager.cpp
@@ -11,11 +11,8 @@
namespace Vulkan {
-InnerFence::InnerFence(Scheduler& scheduler_, u32 payload_, bool is_stubbed_)
- : FenceBase{payload_, is_stubbed_}, scheduler{scheduler_} {}
-
-InnerFence::InnerFence(Scheduler& scheduler_, u8* address_, u32 payload_, bool is_stubbed_)
- : FenceBase{address_, payload_, is_stubbed_}, scheduler{scheduler_} {}
+InnerFence::InnerFence(Scheduler& scheduler_, bool is_stubbed_)
+ : FenceBase{is_stubbed_}, scheduler{scheduler_} {}
InnerFence::~InnerFence() = default;
@@ -48,12 +45,8 @@ FenceManager::FenceManager(VideoCore::RasterizerInterface& rasterizer_, Tegra::G
: GenericFenceManager{rasterizer_, gpu_, texture_cache_, buffer_cache_, query_cache_},
scheduler{scheduler_} {}
-Fence FenceManager::CreateFence(u32 value, bool is_stubbed) {
- return std::make_shared<InnerFence>(scheduler, value, is_stubbed);
-}
-
-Fence FenceManager::CreateFence(u8* addr, u32 value, bool is_stubbed) {
- return std::make_shared<InnerFence>(scheduler, addr, value, is_stubbed);
+Fence FenceManager::CreateFence(bool is_stubbed) {
+ return std::make_shared<InnerFence>(scheduler, is_stubbed);
}
void FenceManager::QueueFence(Fence& fence) {
diff --git a/src/video_core/renderer_vulkan/vk_fence_manager.h b/src/video_core/renderer_vulkan/vk_fence_manager.h
index ea9e88052..7fe2afcd9 100644
--- a/src/video_core/renderer_vulkan/vk_fence_manager.h
+++ b/src/video_core/renderer_vulkan/vk_fence_manager.h
@@ -25,8 +25,7 @@ class Scheduler;
class InnerFence : public VideoCommon::FenceBase {
public:
- explicit InnerFence(Scheduler& scheduler_, u32 payload_, bool is_stubbed_);
- explicit InnerFence(Scheduler& scheduler_, u8* address_, u32 payload_, bool is_stubbed_);
+ explicit InnerFence(Scheduler& scheduler_, bool is_stubbed_);
~InnerFence();
void Queue();
@@ -50,8 +49,7 @@ public:
QueryCache& query_cache, const Device& device, Scheduler& scheduler);
protected:
- Fence CreateFence(u32 value, bool is_stubbed) override;
- Fence CreateFence(u8* addr, u32 value, bool is_stubbed) override;
+ Fence CreateFence(bool is_stubbed) override;
void QueueFence(Fence& fence) override;
bool IsFenceSignaled(Fence& fence) const override;
void WaitFence(Fence& fence) override;
diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp
index d7b57e0f3..a35e41199 100644
--- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp
+++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp
@@ -428,7 +428,7 @@ void RasterizerVulkan::OnCPUWrite(VAddr addr, u64 size) {
}
}
-void RasterizerVulkan::SyncGuestHost() {
+void RasterizerVulkan::InvalidateGPUCache() {
pipeline_cache.SyncGuestHost();
{
std::scoped_lock lock{buffer_cache.mutex};
@@ -455,13 +455,12 @@ void RasterizerVulkan::ModifyGPUMemory(size_t as_id, GPUVAddr addr, u64 size) {
}
}
-void RasterizerVulkan::SignalSemaphore(GPUVAddr addr, u32 value) {
- if (!gpu.IsAsync()) {
- gpu_memory->Write<u32>(addr, value);
- return;
- }
- auto paddr = gpu_memory->GetPointer(addr);
- fence_manager.SignalSemaphore(paddr, value);
+void RasterizerVulkan::SignalFence(std::function<void()>&& func) {
+ fence_manager.SignalFence(std::move(func));
+}
+
+void RasterizerVulkan::SyncOperation(std::function<void()>&& func) {
+ fence_manager.SyncOperation(std::move(func));
}
void RasterizerVulkan::SignalSyncPoint(u32 value) {
@@ -469,16 +468,10 @@ void RasterizerVulkan::SignalSyncPoint(u32 value) {
}
void RasterizerVulkan::SignalReference() {
- if (!gpu.IsAsync()) {
- return;
- }
fence_manager.SignalOrdering();
}
void RasterizerVulkan::ReleaseFences() {
- if (!gpu.IsAsync()) {
- return;
- }
fence_manager.WaitPendingFences();
}
diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.h b/src/video_core/renderer_vulkan/vk_rasterizer.h
index c836158b8..fb9e83e8f 100644
--- a/src/video_core/renderer_vulkan/vk_rasterizer.h
+++ b/src/video_core/renderer_vulkan/vk_rasterizer.h
@@ -76,10 +76,11 @@ public:
bool MustFlushRegion(VAddr addr, u64 size) override;
void InvalidateRegion(VAddr addr, u64 size) override;
void OnCPUWrite(VAddr addr, u64 size) override;
- void SyncGuestHost() override;
+ void InvalidateGPUCache() override;
void UnmapMemory(VAddr addr, u64 size) override;
void ModifyGPUMemory(size_t as_id, GPUVAddr addr, u64 size) override;
- void SignalSemaphore(GPUVAddr addr, u32 value) override;
+ void SignalFence(std::function<void()>&& func) override;
+ void SyncOperation(std::function<void()>&& func) override;
void SignalSyncPoint(u32 value) override;
void SignalReference() override;
void ReleaseFences() override;