summaryrefslogtreecommitdiffstats
path: root/external/optick/optick_gpu.vulkan.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'external/optick/optick_gpu.vulkan.cpp')
-rw-r--r--external/optick/optick_gpu.vulkan.cpp365
1 files changed, 365 insertions, 0 deletions
diff --git a/external/optick/optick_gpu.vulkan.cpp b/external/optick/optick_gpu.vulkan.cpp
new file mode 100644
index 0000000..6d6f29d
--- /dev/null
+++ b/external/optick/optick_gpu.vulkan.cpp
@@ -0,0 +1,365 @@
+#include "optick.config.h"
+
+#if USE_OPTICK
+#if OPTICK_ENABLE_GPU_VULKAN
+#include <vulkan/vulkan.h>
+
+#include "optick_core.h"
+#include "optick_gpu.h"
+
+#define OPTICK_VK_CHECK(args) do { VkResult __hr = args; OPTICK_ASSERT(__hr == VK_SUCCESS, "Failed check"); (void)__hr; } while(false);
+
+namespace Optick
+{
+ class GPUProfilerVulkan : public GPUProfiler
+ {
+ protected:
+ struct Frame
+ {
+ VkCommandBuffer commandBuffer;
+ VkFence fence;
+ Frame() : commandBuffer(VK_NULL_HANDLE), fence(VK_NULL_HANDLE) {}
+ };
+
+ struct NodePayload
+ {
+ VkDevice device;
+ VkPhysicalDevice physicalDevice;
+ VkQueue queue;
+ VkQueryPool queryPool;
+ VkCommandPool commandPool;
+
+ array<Frame, NUM_FRAMES_DELAY> frames;
+
+ NodePayload() : device(VK_NULL_HANDLE), physicalDevice(VK_NULL_HANDLE), queue(VK_NULL_HANDLE), queryPool(VK_NULL_HANDLE), commandPool(VK_NULL_HANDLE) {}
+ ~NodePayload();
+ };
+ vector<NodePayload*> nodePayloads;
+
+ void ResolveTimestamps(VkCommandBuffer commandBuffer, uint32_t startIndex, uint32_t count);
+ void WaitForFrame(uint64_t frameNumber);
+
+ public:
+ GPUProfilerVulkan();
+ ~GPUProfilerVulkan();
+
+ void InitDevice(VkDevice* devices, VkPhysicalDevice* physicalDevices, VkQueue* cmdQueues, uint32_t* cmdQueuesFamily, uint32_t nodeCount);
+ void QueryTimestamp(VkCommandBuffer commandBuffer, int64_t* outCpuTimestamp);
+
+
+ // Interface implementation
+ ClockSynchronization GetClockSynchronization(uint32_t nodeIndex) override;
+
+ void QueryTimestamp(void* context, int64_t* outCpuTimestamp) override
+ {
+ QueryTimestamp((VkCommandBuffer)context, outCpuTimestamp);
+ }
+
+ void Flip(void* swapChain) override;
+ };
+
+ void InitGpuVulkan(void* vkDevices, void* vkPhysicalDevices, void* vkQueues, uint32_t* cmdQueuesFamily, uint32_t numQueues)
+ {
+ GPUProfilerVulkan* gpuProfiler = Memory::New<GPUProfilerVulkan>();
+ gpuProfiler->InitDevice((VkDevice*)vkDevices, (VkPhysicalDevice*)vkPhysicalDevices, (VkQueue*)vkQueues, cmdQueuesFamily, numQueues);
+ Core::Get().InitGPUProfiler(gpuProfiler);
+ }
+
+ GPUProfilerVulkan::GPUProfilerVulkan()
+ {
+ }
+
+ void GPUProfilerVulkan::InitDevice(VkDevice* devices, VkPhysicalDevice* physicalDevices, VkQueue* cmdQueues, uint32_t* cmdQueuesFamily, uint32_t nodeCount)
+ {
+ VkQueryPoolCreateInfo queryPoolCreateInfo;
+ queryPoolCreateInfo.sType = VK_STRUCTURE_TYPE_QUERY_POOL_CREATE_INFO;
+ queryPoolCreateInfo.pNext = 0;
+ queryPoolCreateInfo.flags = 0;
+ queryPoolCreateInfo.queryType = VK_QUERY_TYPE_TIMESTAMP;
+ queryPoolCreateInfo.queryCount = MAX_QUERIES_COUNT + 1;
+
+ VkCommandPoolCreateInfo commandPoolCreateInfo;
+ commandPoolCreateInfo.sType = VK_STRUCTURE_TYPE_COMMAND_POOL_CREATE_INFO;
+ commandPoolCreateInfo.pNext = 0;
+ commandPoolCreateInfo.flags = VK_COMMAND_POOL_CREATE_TRANSIENT_BIT | VK_COMMAND_POOL_CREATE_RESET_COMMAND_BUFFER_BIT;
+
+ nodes.resize(nodeCount);
+ nodePayloads.resize(nodeCount);
+
+ VkResult r;
+ for (uint32_t i = 0; i < nodeCount; ++i)
+ {
+ VkPhysicalDeviceProperties properties = { 0 };
+ vkGetPhysicalDeviceProperties(physicalDevices[i], &properties);
+ GPUProfiler::InitNode(properties.deviceName, i);
+
+ NodePayload* nodePayload = Memory::New<NodePayload>();
+ nodePayloads[i] = nodePayload;
+ nodePayload->device = devices[i];
+ nodePayload->physicalDevice = physicalDevices[i];
+ nodePayload->queue = cmdQueues[i];
+
+ r = vkCreateQueryPool(devices[i], &queryPoolCreateInfo, 0, &nodePayload->queryPool);
+ OPTICK_ASSERT(r == VK_SUCCESS, "Failed");
+
+ commandPoolCreateInfo.queueFamilyIndex = cmdQueuesFamily[i];
+ r = vkCreateCommandPool(nodePayload->device, &commandPoolCreateInfo, 0, &nodePayload->commandPool);
+ OPTICK_ASSERT(r == VK_SUCCESS, "Failed");
+
+ for (uint32_t j = 0; j < nodePayload->frames.size(); ++j)
+ {
+ Frame& frame = nodePayload->frames[j];
+
+ VkCommandBufferAllocateInfo allocInfo;
+ allocInfo.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_ALLOCATE_INFO;
+ allocInfo.pNext = 0;
+ allocInfo.commandBufferCount = 1;
+ allocInfo.commandPool = nodePayload->commandPool;
+ allocInfo.level = VK_COMMAND_BUFFER_LEVEL_PRIMARY;
+ r = vkAllocateCommandBuffers(nodePayload->device, &allocInfo, &frame.commandBuffer);
+ OPTICK_ASSERT(r == VK_SUCCESS, "Failed");
+
+ VkFenceCreateInfo fenceCreateInfo;
+ fenceCreateInfo.sType = VK_STRUCTURE_TYPE_FENCE_CREATE_INFO;
+ fenceCreateInfo.pNext = 0;
+ fenceCreateInfo.flags = j == 0 ? 0 : VK_FENCE_CREATE_SIGNALED_BIT;
+ r = vkCreateFence(nodePayload->device, &fenceCreateInfo, 0, &frame.fence);
+ OPTICK_ASSERT(r == VK_SUCCESS, "Failed");
+ if (j == 0)
+ {
+ VkCommandBufferBeginInfo commandBufferBeginInfo;
+ commandBufferBeginInfo.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO;
+ commandBufferBeginInfo.pNext = 0;
+ commandBufferBeginInfo.pInheritanceInfo = 0;
+ commandBufferBeginInfo.flags = VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT;
+ vkBeginCommandBuffer(frame.commandBuffer, &commandBufferBeginInfo);
+ vkCmdResetQueryPool(frame.commandBuffer, nodePayload->queryPool, 0, MAX_QUERIES_COUNT);
+ vkEndCommandBuffer(frame.commandBuffer);
+
+ VkSubmitInfo submitInfo = {};
+ submitInfo.sType = VK_STRUCTURE_TYPE_SUBMIT_INFO;
+ submitInfo.pNext = nullptr;
+ submitInfo.waitSemaphoreCount = 0;
+ submitInfo.pWaitSemaphores = nullptr;
+ submitInfo.commandBufferCount = 1;
+ submitInfo.pCommandBuffers = &frame.commandBuffer;
+ submitInfo.signalSemaphoreCount = 0;
+ submitInfo.pSignalSemaphores = nullptr;
+ vkQueueSubmit(nodePayload->queue, 1, &submitInfo, frame.fence);
+ vkWaitForFences(nodePayload->device, 1, &frame.fence, 1, (uint64_t)-1);
+ vkResetCommandBuffer(frame.commandBuffer, VK_COMMAND_BUFFER_RESET_RELEASE_RESOURCES_BIT);
+ }
+ }
+ }
+ }
+
+ void GPUProfilerVulkan::QueryTimestamp(VkCommandBuffer commandBuffer, int64_t* outCpuTimestamp)
+ {
+ if (currentState == STATE_RUNNING)
+ {
+ uint32_t index = nodes[currentNode]->QueryTimestamp(outCpuTimestamp);
+ vkCmdWriteTimestamp(commandBuffer, VK_PIPELINE_STAGE_ALL_GRAPHICS_BIT, nodePayloads[currentNode]->queryPool, index);
+ }
+ }
+
+ void GPUProfilerVulkan::ResolveTimestamps(VkCommandBuffer commandBuffer, uint32_t startIndex, uint32_t count)
+ {
+ if (count)
+ {
+ Node* node = nodes[currentNode];
+
+ NodePayload* payload = nodePayloads[currentNode];
+
+ OPTICK_VK_CHECK(vkGetQueryPoolResults(payload->device, payload->queryPool, startIndex, count, 8 * count, &nodes[currentNode]->queryGpuTimestamps[startIndex], 8, VK_QUERY_RESULT_64_BIT));
+ vkCmdResetQueryPool(commandBuffer, payload->queryPool, startIndex, count);
+
+ // Convert GPU timestamps => CPU Timestamps
+ for (uint32_t index = startIndex; index < startIndex + count; ++index)
+ *node->queryCpuTimestamps[index] = node->clock.GetCPUTimestamp(node->queryGpuTimestamps[index]);
+ }
+ }
+
+ void GPUProfilerVulkan::WaitForFrame(uint64_t frameNumberToWait)
+ {
+ OPTICK_EVENT();
+
+ int r = VK_SUCCESS;
+ do
+ {
+ NodePayload& payload = *nodePayloads[currentNode];
+ r = vkWaitForFences(nodePayloads[currentNode]->device, 1, &payload.frames[frameNumberToWait % payload.frames.size()].fence, 1, 1000 * 30);
+ } while (r != VK_SUCCESS);
+ }
+
+ void GPUProfilerVulkan::Flip(void* /*swapChain*/)
+ {
+ OPTICK_CATEGORY("GPUProfilerVulkan::Flip", Category::Debug);
+
+ std::lock_guard<std::recursive_mutex> lock(updateLock);
+
+ if (currentState == STATE_STARTING)
+ currentState = STATE_RUNNING;
+
+ if (currentState == STATE_RUNNING)
+ {
+ Node& node = *nodes[currentNode];
+ NodePayload& payload = *nodePayloads[currentNode];
+
+ uint32_t currentFrameIndex = frameNumber % NUM_FRAMES_DELAY;
+ uint32_t nextFrameIndex = (frameNumber + 1) % NUM_FRAMES_DELAY;
+
+ QueryFrame& currentFrame = node.queryGpuframes[currentFrameIndex];
+ QueryFrame& nextFrame = node.queryGpuframes[nextFrameIndex];
+
+ VkCommandBuffer commandBuffer = payload.frames[currentFrameIndex].commandBuffer;
+ VkFence fence = payload.frames[currentFrameIndex].fence;
+ VkDevice device = payload.device;
+ VkQueue queue = payload.queue;
+
+ vkWaitForFences(device, 1, &fence, 1, (uint64_t)-1);
+
+ VkCommandBufferBeginInfo commandBufferBeginInfo;
+ commandBufferBeginInfo.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO;
+ commandBufferBeginInfo.pNext = 0;
+ commandBufferBeginInfo.pInheritanceInfo = 0;
+ commandBufferBeginInfo.flags = VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT;
+ OPTICK_VK_CHECK(vkBeginCommandBuffer(commandBuffer, &commandBufferBeginInfo));
+ vkResetFences(device, 1, &fence);
+
+ if (EventData* frameEvent = currentFrame.frameEvent)
+ QueryTimestamp(commandBuffer, &frameEvent->finish);
+
+ // Generate GPU Frame event for the next frame
+ EventData& event = AddFrameEvent();
+ QueryTimestamp(commandBuffer, &event.start);
+ QueryTimestamp(commandBuffer, &AddFrameTag().timestamp);
+ nextFrame.frameEvent = &event;
+
+ OPTICK_VK_CHECK(vkEndCommandBuffer(commandBuffer));
+ VkSubmitInfo submitInfo = {};
+ submitInfo.sType = VK_STRUCTURE_TYPE_SUBMIT_INFO;
+ submitInfo.pNext = nullptr;
+ submitInfo.waitSemaphoreCount = 0;
+ submitInfo.pWaitSemaphores = nullptr;
+ submitInfo.commandBufferCount = 1;
+ submitInfo.pCommandBuffers = &commandBuffer;
+ submitInfo.signalSemaphoreCount = 0;
+ submitInfo.pSignalSemaphores = nullptr;
+ OPTICK_VK_CHECK(vkQueueSubmit(queue, 1, &submitInfo, fence));
+
+ uint32_t queryBegin = currentFrame.queryIndexStart;
+ uint32_t queryEnd = node.queryIndex;
+
+ if (queryBegin != (uint32_t)-1)
+ {
+ currentFrame.queryIndexCount = queryEnd - queryBegin;
+ }
+
+ // Preparing Next Frame
+ // Try resolve timestamps for the current frame
+ if (nextFrame.queryIndexStart != (uint32_t)-1)
+ {
+ uint32_t startIndex = nextFrame.queryIndexStart % MAX_QUERIES_COUNT;
+ uint32_t finishIndex = (startIndex + nextFrame.queryIndexCount) % MAX_QUERIES_COUNT;
+
+ if (startIndex < finishIndex)
+ {
+ ResolveTimestamps(commandBuffer, startIndex, finishIndex - startIndex);
+ }
+ else if (startIndex > finishIndex)
+ {
+ ResolveTimestamps(commandBuffer, startIndex, MAX_QUERIES_COUNT - startIndex);
+ ResolveTimestamps(commandBuffer, 0, finishIndex);
+ }
+ }
+
+ nextFrame.queryIndexStart = queryEnd;
+ nextFrame.queryIndexCount = 0;
+ }
+
+ ++frameNumber;
+ }
+
+ GPUProfiler::ClockSynchronization GPUProfilerVulkan::GetClockSynchronization(uint32_t nodeIndex)
+ {
+ GPUProfiler::ClockSynchronization clock;
+
+ NodePayload& node = *nodePayloads[nodeIndex];
+ Frame& currentFrame = node.frames[frameNumber % NUM_FRAMES_DELAY];
+
+ VkCommandBufferBeginInfo commandBufferBeginInfo;
+ commandBufferBeginInfo.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO;
+ commandBufferBeginInfo.pNext = 0;
+ commandBufferBeginInfo.pInheritanceInfo = 0;
+ commandBufferBeginInfo.flags = VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT;
+ VkCommandBuffer CB = currentFrame.commandBuffer;
+ VkDevice Device = node.device;
+ VkFence Fence = currentFrame.fence;
+
+ vkWaitForFences(Device, 1, &Fence, 1, (uint64_t)-1);
+ vkResetFences(Device, 1, &Fence);
+ vkResetCommandBuffer(CB, VK_COMMAND_BUFFER_RESET_RELEASE_RESOURCES_BIT);
+ vkBeginCommandBuffer(CB, &commandBufferBeginInfo);
+ vkCmdResetQueryPool(CB, nodePayloads[nodeIndex]->queryPool, 0, 1);
+ vkCmdWriteTimestamp(CB, VK_PIPELINE_STAGE_ALL_GRAPHICS_BIT, nodePayloads[nodeIndex]->queryPool, 0);
+ vkEndCommandBuffer(CB);
+
+ VkSubmitInfo submitInfo = {};
+ submitInfo.sType = VK_STRUCTURE_TYPE_SUBMIT_INFO;
+ submitInfo.pNext = nullptr;
+ submitInfo.waitSemaphoreCount = 0;
+ submitInfo.pWaitSemaphores = nullptr;
+ submitInfo.commandBufferCount = 1;
+ submitInfo.pCommandBuffers = &CB;
+ submitInfo.signalSemaphoreCount = 0;
+ submitInfo.pSignalSemaphores = nullptr;
+ vkQueueSubmit(nodePayloads[nodeIndex]->queue, 1, &submitInfo, Fence);
+ vkWaitForFences(Device, 1, &Fence, 1, (uint64_t)-1);
+
+ clock.timestampGPU = 0;
+ vkGetQueryPoolResults(Device, nodePayloads[nodeIndex]->queryPool, 0, 1, 8, &clock.timestampGPU, 8, VK_QUERY_RESULT_64_BIT);
+ clock.timestampCPU = GetHighPrecisionTime();
+ clock.frequencyCPU = GetHighPrecisionFrequency();
+
+ VkPhysicalDeviceProperties Properties;
+ vkGetPhysicalDeviceProperties(nodePayloads[nodeIndex]->physicalDevice, &Properties);
+ clock.frequencyGPU = (uint64_t)(1000000000ll / Properties.limits.timestampPeriod);
+
+ return clock;
+ }
+
+ GPUProfilerVulkan::NodePayload::~NodePayload()
+ {
+ vkDestroyCommandPool(device, commandPool, nullptr);
+ vkDestroyQueryPool(device, queryPool, nullptr);
+ }
+
+ GPUProfilerVulkan::~GPUProfilerVulkan()
+ {
+ WaitForFrame(frameNumber - 1);
+
+ for (NodePayload* payload : nodePayloads)
+ {
+ for (Frame& frame : payload->frames)
+ {
+ vkDestroyFence(payload->device, frame.fence, nullptr);
+ vkFreeCommandBuffers(payload->device, payload->commandPool, 1, &frame.commandBuffer);
+ }
+
+ Memory::Delete(payload);
+ }
+
+ nodePayloads.clear();
+ }
+}
+#else
+#include "optick_common.h"
+namespace Optick
+{
+ void InitGpuVulkan(void* /*devices*/, void* /*physicalDevices*/, void* /*cmdQueues*/, uint32_t* /*cmdQueuesFamily*/, uint32_t /*numQueues*/)
+ {
+ OPTICK_FAILED("OPTICK_ENABLE_GPU_VULKAN is disabled! Can't initialize GPU Profiler!");
+ }
+}
+#endif //OPTICK_ENABLE_GPU_D3D12
+#endif //USE_OPTICK \ No newline at end of file