diff options
author | LaG1924 <12997935+LaG1924@users.noreply.github.com> | 2019-04-30 13:12:35 +0200 |
---|---|---|
committer | LaG1924 <12997935+LaG1924@users.noreply.github.com> | 2019-04-30 13:12:35 +0200 |
commit | 868ba6279a20e4d1412c2d576c67400167de6694 (patch) | |
tree | a7090b2da96987c4c532c5bedf812df20f604964 /external | |
parent | Second iteration of changing to single-threaded model (diff) | |
download | AltCraft-868ba6279a20e4d1412c2d576c67400167de6694.tar AltCraft-868ba6279a20e4d1412c2d576c67400167de6694.tar.gz AltCraft-868ba6279a20e4d1412c2d576c67400167de6694.tar.bz2 AltCraft-868ba6279a20e4d1412c2d576c67400167de6694.tar.lz AltCraft-868ba6279a20e4d1412c2d576c67400167de6694.tar.xz AltCraft-868ba6279a20e4d1412c2d576c67400167de6694.tar.zst AltCraft-868ba6279a20e4d1412c2d576c67400167de6694.zip |
Diffstat (limited to '')
-rw-r--r-- | external/CMakeLists.txt | 5 | ||||
-rw-r--r-- | external/optick/optick.config.h | 51 | ||||
-rw-r--r-- | external/optick/optick.h | 872 | ||||
-rw-r--r-- | external/optick/optick_common.h | 142 | ||||
-rw-r--r-- | external/optick/optick_core.cpp | 1657 | ||||
-rw-r--r-- | external/optick/optick_core.h | 568 | ||||
-rw-r--r-- | external/optick/optick_core.linux.h | 410 | ||||
-rw-r--r-- | external/optick/optick_core.macos.h | 289 | ||||
-rw-r--r-- | external/optick/optick_core.platform.h | 92 | ||||
-rw-r--r-- | external/optick/optick_core.win.h | 1664 | ||||
-rw-r--r-- | external/optick/optick_gpu.cpp | 136 | ||||
-rw-r--r-- | external/optick/optick_gpu.d3d12.cpp | 382 | ||||
-rw-r--r-- | external/optick/optick_gpu.h | 129 | ||||
-rw-r--r-- | external/optick/optick_gpu.vulkan.cpp | 365 | ||||
-rw-r--r-- | external/optick/optick_memory.h | 419 | ||||
-rw-r--r-- | external/optick/optick_message.cpp | 172 | ||||
-rw-r--r-- | external/optick/optick_message.h | 130 | ||||
-rw-r--r-- | external/optick/optick_serialization.cpp | 178 | ||||
-rw-r--r-- | external/optick/optick_serialization.h | 120 | ||||
-rw-r--r-- | external/optick/optick_server.cpp | 338 | ||||
-rw-r--r-- | external/optick/optick_server.h | 42 |
21 files changed, 8160 insertions, 1 deletions
diff --git a/external/CMakeLists.txt b/external/CMakeLists.txt index d370f1a..b3cc884 100644 --- a/external/CMakeLists.txt +++ b/external/CMakeLists.txt @@ -7,4 +7,7 @@ file(GLOB_RECURSE HEADERS "./include/*") include_directories(./include/) -add_library(deps STATIC ${SOURCES} ${HEADERS})
\ No newline at end of file +add_library(deps STATIC ${SOURCES} ${HEADERS}) + +file(GLOB OPTICK_SRC "./optick/*.cpp") +add_library(optick STATIC ${OPTICK_SRC})
\ No newline at end of file diff --git a/external/optick/optick.config.h b/external/optick/optick.config.h new file mode 100644 index 0000000..dcc6e98 --- /dev/null +++ b/external/optick/optick.config.h @@ -0,0 +1,51 @@ +#pragma once + +#define OPTICK_ENABLE_GPU_D3D12 false +#define OPTICK_ENABLE_GPU_VULKAN false + +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +// GLOBAL SETTINGS +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +// [x] USE_OPTICK - (Master Switch) +// [x] OPTICK_ENABLE_TRACING - (Enable Kernel-level tracing) +// [x] OPTICK_ENABLE_GPU_D3D12 - (GPU D3D12) +// [ ] OPTICK_ENABLE_GPU_VULKAN - (GPU VULKAN) +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +// MASTER SWITCH - use it for disabling profiler in final builds // +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +#if !defined(USE_OPTICK) +#define USE_OPTICK (1) +#endif +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +// Enable Low-level platform-specific tracing (Switch Contexts, Autosampling, etc.) +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +#if !defined(OPTICK_ENABLE_TRACING) +#define OPTICK_ENABLE_TRACING (USE_OPTICK /*&& 0*/) +#endif //OPTICK_ENABLE_TRACING +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +// GPU Counters +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +#if !defined(OPTICK_ENABLE_GPU) +#define OPTICK_ENABLE_GPU (USE_OPTICK /*&& 0*/) +#endif //OPTICK_ENABLE_GPU + +// D3D12 +#if !defined(OPTICK_ENABLE_GPU_D3D12) +#if defined(_MSC_VER) +#define OPTICK_ENABLE_GPU_D3D12 (OPTICK_ENABLE_GPU /*&& 0*/) +#else +#define OPTICK_ENABLE_GPU_D3D12 (0) +#endif +#endif + +// VUKLAN +#if !defined(OPTICK_ENABLE_GPU_VULKAN) +#define OPTICK_ENABLE_GPU_VULKAN (OPTICK_ENABLE_GPU && 0) +#endif + diff --git a/external/optick/optick.h b/external/optick/optick.h new file mode 100644 index 0000000..e3eb512 --- /dev/null +++ b/external/optick/optick.h @@ -0,0 +1,872 @@ +#pragma once +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +// Config +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +#include "optick.config.h" + +#if USE_OPTICK +#include <stdint.h> + +#if defined(__clang__) || defined(__GNUC__) +# define OPTICK_GCC (1) +# if defined(__APPLE_CC__) +# define OPTICK_OSX (1) +# elif defined(__linux__) +# define OPTICK_LINUX (1) +# elif defined(__ORBIS__) +# define OPTICK_PS4 (1) +# endif +#elif defined(_MSC_VER) +# define OPTICK_MSVC (1) +# if defined(_DURANGO) +# define OPTICK_XBOX (1) +# else +# define OPTICK_PC (1) +#endif +#else +#error Compiler not supported +#endif + +//////////////////////////////////////////////////////////////////////// +// Target Platform +//////////////////////////////////////////////////////////////////////// + +#if defined(OPTICK_GCC) +#define OPTICK_FUNC __PRETTY_FUNCTION__ +#elif defined(OPTICK_MSVC) +#define OPTICK_FUNC __FUNCSIG__ +#else +#error Compiler not supported +#endif + +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +// EXPORTS +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +#ifdef OPTICK_EXPORTS +#define OPTICK_API __declspec(dllexport) +#else +#define OPTICK_API //__declspec(dllimport) +#endif +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +#define OPTICK_CONCAT_IMPL(x, y) x##y +#define OPTICK_CONCAT(x, y) OPTICK_CONCAT_IMPL(x, y) + +#if defined(OPTICK_MSVC) +#define OPTICK_INLINE __forceinline +#elif defined(OPTICK_GCC) +#define OPTICK_INLINE __attribute__((always_inline)) inline +#else +#error Compiler is not supported +#endif + + +// Vulkan Forward Declarations +#define OPTICK_DEFINE_HANDLE(object) typedef struct object##_T* object; +OPTICK_DEFINE_HANDLE(VkDevice); +OPTICK_DEFINE_HANDLE(VkPhysicalDevice); +OPTICK_DEFINE_HANDLE(VkQueue); +OPTICK_DEFINE_HANDLE(VkCommandBuffer); + +// D3D12 Forward Declarations +struct ID3D12CommandList; +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + +namespace Optick +{ + // Source: http://msdn.microsoft.com/en-us/library/system.windows.media.colors(v=vs.110).aspx + // Image: http://i.msdn.microsoft.com/dynimg/IC24340.png + struct Color + { + enum + { + Null = 0x00000000, + AliceBlue = 0xFFF0F8FF, + AntiqueWhite = 0xFFFAEBD7, + Aqua = 0xFF00FFFF, + Aquamarine = 0xFF7FFFD4, + Azure = 0xFFF0FFFF, + Beige = 0xFFF5F5DC, + Bisque = 0xFFFFE4C4, + Black = 0xFF000000, + BlanchedAlmond = 0xFFFFEBCD, + Blue = 0xFF0000FF, + BlueViolet = 0xFF8A2BE2, + Brown = 0xFFA52A2A, + BurlyWood = 0xFFDEB887, + CadetBlue = 0xFF5F9EA0, + Chartreuse = 0xFF7FFF00, + Chocolate = 0xFFD2691E, + Coral = 0xFFFF7F50, + CornflowerBlue = 0xFF6495ED, + Cornsilk = 0xFFFFF8DC, + Crimson = 0xFFDC143C, + Cyan = 0xFF00FFFF, + DarkBlue = 0xFF00008B, + DarkCyan = 0xFF008B8B, + DarkGoldenRod = 0xFFB8860B, + DarkGray = 0xFFA9A9A9, + DarkGreen = 0xFF006400, + DarkKhaki = 0xFFBDB76B, + DarkMagenta = 0xFF8B008B, + DarkOliveGreen = 0xFF556B2F, + DarkOrange = 0xFFFF8C00, + DarkOrchid = 0xFF9932CC, + DarkRed = 0xFF8B0000, + DarkSalmon = 0xFFE9967A, + DarkSeaGreen = 0xFF8FBC8F, + DarkSlateBlue = 0xFF483D8B, + DarkSlateGray = 0xFF2F4F4F, + DarkTurquoise = 0xFF00CED1, + DarkViolet = 0xFF9400D3, + DeepPink = 0xFFFF1493, + DeepSkyBlue = 0xFF00BFFF, + DimGray = 0xFF696969, + DodgerBlue = 0xFF1E90FF, + FireBrick = 0xFFB22222, + FloralWhite = 0xFFFFFAF0, + ForestGreen = 0xFF228B22, + Fuchsia = 0xFFFF00FF, + Gainsboro = 0xFFDCDCDC, + GhostWhite = 0xFFF8F8FF, + Gold = 0xFFFFD700, + GoldenRod = 0xFFDAA520, + Gray = 0xFF808080, + Green = 0xFF008000, + GreenYellow = 0xFFADFF2F, + HoneyDew = 0xFFF0FFF0, + HotPink = 0xFFFF69B4, + IndianRed = 0xFFCD5C5C, + Indigo = 0xFF4B0082, + Ivory = 0xFFFFFFF0, + Khaki = 0xFFF0E68C, + Lavender = 0xFFE6E6FA, + LavenderBlush = 0xFFFFF0F5, + LawnGreen = 0xFF7CFC00, + LemonChiffon = 0xFFFFFACD, + LightBlue = 0xFFADD8E6, + LightCoral = 0xFFF08080, + LightCyan = 0xFFE0FFFF, + LightGoldenRodYellow = 0xFFFAFAD2, + LightGray = 0xFFD3D3D3, + LightGreen = 0xFF90EE90, + LightPink = 0xFFFFB6C1, + LightSalmon = 0xFFFFA07A, + LightSeaGreen = 0xFF20B2AA, + LightSkyBlue = 0xFF87CEFA, + LightSlateGray = 0xFF778899, + LightSteelBlue = 0xFFB0C4DE, + LightYellow = 0xFFFFFFE0, + Lime = 0xFF00FF00, + LimeGreen = 0xFF32CD32, + Linen = 0xFFFAF0E6, + Magenta = 0xFFFF00FF, + Maroon = 0xFF800000, + MediumAquaMarine = 0xFF66CDAA, + MediumBlue = 0xFF0000CD, + MediumOrchid = 0xFFBA55D3, + MediumPurple = 0xFF9370DB, + MediumSeaGreen = 0xFF3CB371, + MediumSlateBlue = 0xFF7B68EE, + MediumSpringGreen = 0xFF00FA9A, + MediumTurquoise = 0xFF48D1CC, + MediumVioletRed = 0xFFC71585, + MidnightBlue = 0xFF191970, + MintCream = 0xFFF5FFFA, + MistyRose = 0xFFFFE4E1, + Moccasin = 0xFFFFE4B5, + NavajoWhite = 0xFFFFDEAD, + Navy = 0xFF000080, + OldLace = 0xFFFDF5E6, + Olive = 0xFF808000, + OliveDrab = 0xFF6B8E23, + Orange = 0xFFFFA500, + OrangeRed = 0xFFFF4500, + Orchid = 0xFFDA70D6, + PaleGoldenRod = 0xFFEEE8AA, + PaleGreen = 0xFF98FB98, + PaleTurquoise = 0xFFAFEEEE, + PaleVioletRed = 0xFFDB7093, + PapayaWhip = 0xFFFFEFD5, + PeachPuff = 0xFFFFDAB9, + Peru = 0xFFCD853F, + Pink = 0xFFFFC0CB, + Plum = 0xFFDDA0DD, + PowderBlue = 0xFFB0E0E6, + Purple = 0xFF800080, + Red = 0xFFFF0000, + RosyBrown = 0xFFBC8F8F, + RoyalBlue = 0xFF4169E1, + SaddleBrown = 0xFF8B4513, + Salmon = 0xFFFA8072, + SandyBrown = 0xFFF4A460, + SeaGreen = 0xFF2E8B57, + SeaShell = 0xFFFFF5EE, + Sienna = 0xFFA0522D, + Silver = 0xFFC0C0C0, + SkyBlue = 0xFF87CEEB, + SlateBlue = 0xFF6A5ACD, + SlateGray = 0xFF708090, + Snow = 0xFFFFFAFA, + SpringGreen = 0xFF00FF7F, + SteelBlue = 0xFF4682B4, + Tan = 0xFFD2B48C, + Teal = 0xFF008080, + Thistle = 0xFFD8BFD8, + Tomato = 0xFFFF6347, + Turquoise = 0xFF40E0D0, + Violet = 0xFFEE82EE, + Wheat = 0xFFF5DEB3, + White = 0xFFFFFFFF, + WhiteSmoke = 0xFFF5F5F5, + Yellow = 0xFFFFFF00, + YellowGreen = 0xFF9ACD32, + }; + }; + + struct Filter + { + enum Type : uint32_t + { + None, + + // CPU + AI, + Animation, + Audio, + Debug, + Camera, + Cloth, + GameLogic, + Input, + Navigation, + Network, + Physics, + Rendering, + Scene, + Script, + Streaming, + UI, + VFX, + Visibility, + Wait, + + // IO + IO, + + // GPU + GPU_Cloth, + GPU_Lighting, + GPU_PostFX, + GPU_Reflections, + GPU_Scene, + GPU_Shadows, + GPU_UI, + GPU_VFX, + GPU_Water, + + }; + }; + + #define OPTICK_MAKE_CATEGORY(filter, color) (((uint64_t)(1ull) << (filter + 32)) | (uint64_t)color) + + struct Category + { + enum Type : uint64_t + { + // CPU + None = OPTICK_MAKE_CATEGORY(Filter::None, Color::Null), + AI = OPTICK_MAKE_CATEGORY(Filter::AI, Color::Purple), + Animation = OPTICK_MAKE_CATEGORY(Filter::Animation, Color::LightSkyBlue), + Audio = OPTICK_MAKE_CATEGORY(Filter::Audio, Color::HotPink), + Debug = OPTICK_MAKE_CATEGORY(Filter::Debug, Color::Black), + Camera = OPTICK_MAKE_CATEGORY(Filter::Camera, Color::Black), + Cloth = OPTICK_MAKE_CATEGORY(Filter::Cloth, Color::DarkGreen), + GameLogic = OPTICK_MAKE_CATEGORY(Filter::GameLogic, Color::RoyalBlue), + Input = OPTICK_MAKE_CATEGORY(Filter::Input, Color::Ivory), + Navigation = OPTICK_MAKE_CATEGORY(Filter::Navigation, Color::Magenta), + Network = OPTICK_MAKE_CATEGORY(Filter::Network, Color::Olive), + Physics = OPTICK_MAKE_CATEGORY(Filter::Physics, Color::LawnGreen), + Rendering = OPTICK_MAKE_CATEGORY(Filter::Rendering, Color::BurlyWood), + Scene = OPTICK_MAKE_CATEGORY(Filter::Scene, Color::RoyalBlue), + Script = OPTICK_MAKE_CATEGORY(Filter::Script, Color::Plum), + Streaming = OPTICK_MAKE_CATEGORY(Filter::Streaming, Color::Gold), + UI = OPTICK_MAKE_CATEGORY(Filter::UI, Color::PaleTurquoise), + VFX = OPTICK_MAKE_CATEGORY(Filter::VFX, Color::SaddleBrown), + Visibility = OPTICK_MAKE_CATEGORY(Filter::Visibility, Color::Snow), + Wait = OPTICK_MAKE_CATEGORY(Filter::Wait, Color::Tomato), + WaitEmpty = OPTICK_MAKE_CATEGORY(Filter::Wait, Color::White), + // IO + IO = OPTICK_MAKE_CATEGORY(Filter::IO, Color::Khaki), + // GPU + GPU_Cloth = OPTICK_MAKE_CATEGORY(Filter::GPU_Cloth, Color::DarkGreen), + GPU_Lighting = OPTICK_MAKE_CATEGORY(Filter::GPU_Lighting, Color::Khaki), + GPU_PostFX = OPTICK_MAKE_CATEGORY(Filter::GPU_PostFX, Color::Maroon), + GPU_Reflections = OPTICK_MAKE_CATEGORY(Filter::GPU_Reflections, Color::CadetBlue), + GPU_Scene = OPTICK_MAKE_CATEGORY(Filter::GPU_Scene, Color::RoyalBlue), + GPU_Shadows = OPTICK_MAKE_CATEGORY(Filter::GPU_Shadows, Color::LightSlateGray), + GPU_UI = OPTICK_MAKE_CATEGORY(Filter::GPU_UI, Color::PaleTurquoise), + GPU_VFX = OPTICK_MAKE_CATEGORY(Filter::GPU_VFX, Color::SaddleBrown), + GPU_Water = OPTICK_MAKE_CATEGORY(Filter::GPU_Water, Color::SteelBlue), + }; + + static uint32_t GetMask(Type t) { return (uint32_t)(t >> 32); } + static uint32_t GetColor(Type t) { return (uint32_t)(t); } + }; + + //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +} + + +namespace Optick +{ +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +struct Mode +{ + enum Type + { + OFF = 0x0, + INSTRUMENTATION_CATEGORIES = (1 << 0), + INSTRUMENTATION_EVENTS = (1 << 1), + INSTRUMENTATION = (INSTRUMENTATION_CATEGORIES | INSTRUMENTATION_EVENTS), + SAMPLING = (1 << 2), + TAGS = (1 << 3), + AUTOSAMPLING = (1 << 4), + SWITCH_CONTEXT = (1 << 5), + IO = (1 << 6), + GPU = (1 << 7), + END_SCREENSHOT = (1 << 8), + RESERVED_0 = (1 << 9), + RESERVED_1 = (1 << 10), + HW_COUNTERS = (1 << 11), + LIVE = (1 << 12), + RESERVED_2 = (1 << 13), + RESERVED_3 = (1 << 14), + RESERVED_4 = (1 << 15), + SYS_CALLS = (1 << 16), + OTHER_PROCESSES = (1 << 17), + + TRACER = AUTOSAMPLING | SWITCH_CONTEXT | SYS_CALLS, + DEFAULT = INSTRUMENTATION | TAGS | AUTOSAMPLING | SWITCH_CONTEXT | IO | GPU | SYS_CALLS | OTHER_PROCESSES, + }; +}; +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +OPTICK_API int64_t GetHighPrecisionTime(); +OPTICK_API int64_t GetHighPrecisionFrequency(); +OPTICK_API uint32_t NextFrame(); +OPTICK_API bool IsActive(Mode::Type mode = Mode::INSTRUMENTATION_EVENTS); +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +struct EventStorage; +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +OPTICK_API bool RegisterFiber(uint64_t fiberId, EventStorage** slot); +OPTICK_API bool RegisterThread(const char* name); +OPTICK_API bool RegisterThread(const wchar_t* name); +OPTICK_API bool UnRegisterThread(bool keepAlive); +OPTICK_API EventStorage** GetEventStorageSlotForCurrentThread(); +OPTICK_API bool IsFiberStorage(EventStorage* fiberStorage); +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +struct ThreadMask +{ + enum Type + { + None = 0, + Main = 1 << 0, + GPU = 1 << 1, + IO = 1 << 2, + Idle = 1 << 3, + }; +}; + +OPTICK_API EventStorage* RegisterStorage(const char* name, uint64_t threadID = uint64_t(-1), ThreadMask::Type type = ThreadMask::None); +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +struct State +{ + enum Type + { + // Starting a new capture + START_CAPTURE, + + // Stopping current capture + STOP_CAPTURE, + + // Dumping capture to the GUI + // Useful for attaching summary and screenshot to the capture + DUMP_CAPTURE, + + // Cancel current capture + CANCEL_CAPTURE, + }; +}; +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +// Sets a state change callback +typedef bool (*StateCallback)(State::Type state); +OPTICK_API bool SetStateChangedCallback(StateCallback cb); + +// Attaches a key-value pair to the capture's summary +// Example: AttachSummary("Version", "v12.0.1"); +// AttachSummary("Platform", "Windows"); +// AttachSummary("Config", "Release_x64"); +// AttachSummary("Settings", "Ultra"); +// AttachSummary("Map", "Atlantida"); +// AttachSummary("Position", "123.0,120.0,41.1"); +// AttachSummary("CPU", "Intel(R) Xeon(R) CPU E5410@2.33GHz"); +// AttachSummary("GPU", "NVIDIA GeForce GTX 980 Ti"); +OPTICK_API bool AttachSummary(const char* key, const char* value); + +struct File +{ + enum Type + { + // Supported formats: PNG, JPEG, BMP, TIFF + OPTICK_IMAGE, + + // Text file + OPTICK_TEXT, + + // Any other type + OPTICK_OTHER, + }; +}; +// Attaches a file to the current capture +OPTICK_API bool AttachFile(File::Type type, const char* name, const uint8_t* data, uint32_t size); +OPTICK_API bool AttachFile(File::Type type, const char* name, const char* path); +OPTICK_API bool AttachFile(File::Type type, const char* name, const wchar_t* path); +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +struct EventDescription; +struct Frame; +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +struct EventTime +{ + static const int64_t INVALID_TIMESTAMP = (int64_t)-1; + + int64_t start; + int64_t finish; + + OPTICK_INLINE void Start() { start = Optick::GetHighPrecisionTime(); } + OPTICK_INLINE void Stop() { finish = Optick::GetHighPrecisionTime(); } +}; +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +struct EventData : public EventTime +{ + const EventDescription* description; + + bool operator<(const EventData& other) const + { + if (start != other.start) + return start < other.start; + + // Reversed order for finish intervals (parent first) + return finish > other.finish; + } +}; +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +struct OPTICK_API SyncData : public EventTime +{ + uint64_t newThreadId; + uint64_t oldThreadId; + uint8_t core; + int8_t reason; +}; +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +struct OPTICK_API FiberSyncData : public EventTime +{ + uint64_t threadId; + + static void AttachToThread(EventStorage* storage, uint64_t threadId); + static void DetachFromThread(EventStorage* storage); +}; +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +template<class T> +struct TagData +{ + const EventDescription* description; + int64_t timestamp; + T data; + TagData() {} + TagData(const EventDescription& desc, T d) : description(&desc), timestamp(Optick::GetHighPrecisionTime()), data(d) {} +}; +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +struct OPTICK_API EventDescription +{ + // HOT \\ + // Have to place "hot" variables at the beginning of the class (here will be some padding) + // COLD // + + const char* name; + const char* file; + uint32_t line; + uint32_t index; + uint32_t color; + uint32_t filter; + float budget; + + static EventDescription* Create(const char* eventName, const char* fileName, const unsigned long fileLine, const unsigned long eventColor = Color::Null, const unsigned long filter = 0); + static EventDescription* CreateShared(const char* eventName, const char* fileName = nullptr, const unsigned long fileLine = 0, const unsigned long eventColor = Color::Null, const unsigned long filter = 0); + + EventDescription(); +private: + friend class EventDescriptionBoard; + EventDescription& operator=(const EventDescription&); +}; +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +struct OPTICK_API Event +{ + EventData* data; + + static EventData* Start(const EventDescription& description); + static void Stop(EventData& data); + + static void Push(const char* name); + static void Push(const EventDescription& description); + static void Pop(); + + static void Add(EventStorage* storage, const EventDescription* description, int64_t timestampStart, int64_t timestampFinish); + static void Push(EventStorage* storage, const EventDescription* description, int64_t timestampStart); + static void Pop(EventStorage* storage, int64_t timestampStart); + + + Event(const EventDescription& description) + { + data = Start(description); + } + + ~Event() + { + if (data) + Stop(*data); + } +}; +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +OPTICK_INLINE Optick::EventDescription* CreateDescription(const char* functionName, const char* fileName, int fileLine, const char* eventName = nullptr, const ::Optick::Category::Type category = ::Optick::Category::None) +{ + return ::Optick::EventDescription::Create(eventName != nullptr ? eventName : functionName, fileName, (unsigned long)fileLine, ::Optick::Category::GetColor(category), ::Optick::Category::GetMask(category)); +} +OPTICK_INLINE Optick::EventDescription* CreateDescription(const char* functionName, const char* fileName, int fileLine, const ::Optick::Category::Type category) +{ + return ::Optick::EventDescription::Create(functionName, fileName, (unsigned long)fileLine, ::Optick::Category::GetColor(category), ::Optick::Category::GetMask(category)); +} +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +struct OPTICK_API GPUEvent +{ + EventData* data; + + static EventData* Start(const EventDescription& description); + static void Stop(EventData& data); + + GPUEvent(const EventDescription& description) + { + data = Start(description); + } + + ~GPUEvent() + { + if (data) + Stop(*data); + } +}; +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +struct OPTICK_API Tag +{ + static void Attach(const EventDescription& description, float val); + static void Attach(const EventDescription& description, int32_t val); + static void Attach(const EventDescription& description, uint32_t val); + static void Attach(const EventDescription& description, uint64_t val); + static void Attach(const EventDescription& description, float val[3]); + static void Attach(const EventDescription& description, const char* val); + + // Derived + static void Attach(const EventDescription& description, float x, float y, float z) + { + float p[3] = { x, y, z }; Attach(description, p); + } + +}; +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +struct ThreadScope +{ + bool keepAlive; + + ThreadScope(const char* name, bool bKeepAlive = false) : keepAlive(bKeepAlive) + { + RegisterThread(name); + } + + ThreadScope(const wchar_t* name) + { + RegisterThread(name); + } + + ~ThreadScope() + { + UnRegisterThread(keepAlive); + } +}; +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +enum OPTICK_API GPUQueueType +{ + GPU_QUEUE_GRAPHICS, + GPU_QUEUE_COMPUTE, + GPU_QUEUE_TRANSFER, + GPU_QUEUE_VSYNC, + + GPU_QUEUE_COUNT, +}; +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +struct OPTICK_API GPUContext +{ + void* cmdBuffer; + GPUQueueType queue; + int node; + GPUContext(void* c = nullptr, GPUQueueType q = GPU_QUEUE_GRAPHICS, int n = 0) : cmdBuffer(c), queue(q), node(n) {} +}; +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +OPTICK_API void InitGpuD3D12(void* device, void** cmdQueues, uint32_t numQueues); +OPTICK_API void InitGpuVulkan(void* vkDevices, void* vkPhysicalDevices, void* vkQueues, uint32_t* cmdQueuesFamily, uint32_t numQueues); +OPTICK_API void GpuFlip(void* swapChain); +OPTICK_API GPUContext SetGpuContext(GPUContext context); +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +struct OPTICK_API GPUContextScope +{ + GPUContext prevContext; + + GPUContextScope(ID3D12CommandList* cmdList, GPUQueueType queue = GPU_QUEUE_GRAPHICS, int node = 0) + { + prevContext = SetGpuContext(GPUContext(cmdList, queue, node)); + } + + GPUContextScope(VkCommandBuffer cmdBuffer, GPUQueueType queue = GPU_QUEUE_GRAPHICS, int node = 0) + { + prevContext = SetGpuContext(GPUContext(cmdBuffer, queue, node)); + } + + ~GPUContextScope() + { + SetGpuContext(prevContext); + } +}; +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +struct FrameType +{ + enum Type + { + CPU, + GPU, + Render, + COUNT, + }; +}; +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +OPTICK_API const EventDescription* GetFrameDescription(FrameType::Type frame); +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +} + +#define OPTICK_UNUSED(x) (void)(x) +// Workaround for gcc compiler +#define OPTICK_VA_ARGS(...) , ##__VA_ARGS__ + +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +// Scoped profiling event which automatically grabs current function name. +// Use tis macro 95% of the time. +// Example A: +// void Function() +// { +// OPTICK_EVENT(); +// ... code ... +// } +// or +// void Function() +// { +// OPTICK_EVENT("CustomFunctionName"); +// ... code ... +// } +// Notes: +// Optick captures full name of the function including name space and arguments. +// Full name is usually shortened in the Optick GUI in order to highlight the most important bits. +#define OPTICK_EVENT(...) static ::Optick::EventDescription* OPTICK_CONCAT(autogen_description_, __LINE__) = nullptr; \ + if (OPTICK_CONCAT(autogen_description_, __LINE__) == nullptr) OPTICK_CONCAT(autogen_description_, __LINE__) = ::Optick::CreateDescription(OPTICK_FUNC, __FILE__, __LINE__ OPTICK_VA_ARGS(__VA_ARGS__)); \ + ::Optick::Event OPTICK_CONCAT(autogen_event_, __LINE__)( *(OPTICK_CONCAT(autogen_description_, __LINE__)) ); + +// Backward compatibility with previous versions of Optick +//#if !defined(PROFILE) +//#define PROFILE OPTICK_EVENT() +//#endif + +// Scoped profiling macro with predefined color. +// Use this macro for high-level function calls (e.g. AI, Physics, Audio, Render etc.). +// Example: +// void UpdateAI() +// { +// OPTICK_CATEGORY("UpdateAI", Optick::Category::AI); +// ... code ... +// } +// +// Macro could automatically capture current function name: +// void UpdateAI() +// { +// OPTICK_CATEGORY(OPTICK_FUNC, Optick::Category::AI); +// ... code ... +// } +#define OPTICK_CATEGORY(NAME, CATEGORY) OPTICK_EVENT(NAME, CATEGORY) + +// Profiling event for Main Loop update. +// You need to call this function in the beginning of the each new frame. +// Example: +// while (true) +// { +// OPTICK_FRAME("MainThread"); +// ... code ... +// } +#define OPTICK_FRAME(FRAME_NAME) static ::Optick::ThreadScope mainThreadScope(FRAME_NAME); \ + OPTICK_UNUSED(mainThreadScope); \ + uint32_t frameNumber = ::Optick::NextFrame(); \ + ::Optick::Event OPTICK_CONCAT(autogen_event_, __LINE__)(*::Optick::GetFrameDescription(::Optick::FrameType::CPU)); \ + OPTICK_TAG("Frame", frameNumber); + + +// Thread registration macro. +// Example: +// void WorkerThread(...) +// { +// OPTICK_THREAD("Worker"); +// while (isRunning) +// { +// ... +// } +// } +#define OPTICK_THREAD(THREAD_NAME) ::Optick::ThreadScope brofilerThreadScope(THREAD_NAME); \ + OPTICK_UNUSED(brofilerThreadScope); \ + + +// Thread registration macros. +// Useful for integration with custom job-managers. +#define OPTICK_START_THREAD(FRAME_NAME) ::Optick::RegisterThread(FRAME_NAME); +#define OPTICK_STOP_THREAD() ::Optick::UnRegisterThread(false); + +// Attaches a custom data-tag. +// Supported types: int32, uint32, uint64, vec3, string (cut to 32 characters) +// Example: +// OPTICK_TAG("PlayerName", name[index]); +// OPTICK_TAG("Health", 100); +// OPTICK_TAG("Score", 0x80000000u); +// OPTICK_TAG("Height(cm)", 176.3f); +// OPTICK_TAG("Address", (uint64)*this); +// OPTICK_TAG("Position", 123.0f, 456.0f, 789.0f); +#define OPTICK_TAG(NAME, ...) static ::Optick::EventDescription* OPTICK_CONCAT(autogen_tag_, __LINE__) = nullptr; \ + if (OPTICK_CONCAT(autogen_tag_, __LINE__) == nullptr) OPTICK_CONCAT(autogen_tag_, __LINE__) = ::Optick::EventDescription::Create( NAME, __FILE__, __LINE__ ); \ + ::Optick::Tag::Attach(*OPTICK_CONCAT(autogen_tag_, __LINE__), __VA_ARGS__); \ + +// Scoped macro with DYNAMIC name. +// Optick holds a copy of the provided name. +// Each scope does a search in hashmap for the name. +// Please use variations with STATIC names where it's possible. +// Use this macro for quick prototyping or intergratoin with other profiling systems (e.g. UE4) +// Example: +// const char* name = ... ; +// OPTICK_EVENT_DYNAMIC(name); +#define OPTICK_EVENT_DYNAMIC(NAME) OPTICK_CUSTOM_EVENT(::Optick::EventDescription::CreateShared(NAME, __FILE__, __LINE__)); +// Push\Pop profiling macro with DYNAMIC name. +#define OPTICK_PUSH_DYNAMIC(NAME) ::Optick::Event::Push(NAME); + +// Push\Pop profiling macro with STATIC name. +// Please avoid using Push\Pop approach in favor for scoped macros. +// For backward compatibility with some engines. +// Example: +// OPTICK_PUSH("ScopeName"); +// ... +// OPTICK_POP(); +#define OPTICK_PUSH(NAME) static ::Optick::EventDescription* OPTICK_CONCAT(autogen_description_, __LINE__) = nullptr; \ + if (OPTICK_CONCAT(autogen_description_, __LINE__) == nullptr) OPTICK_CONCAT(autogen_description_, __LINE__) = ::Optick::EventDescription::Create( NAME, __FILE__, __LINE__ ); \ + ::Optick::Event::Push(*OPTICK_CONCAT(autogen_description_, __LINE__)); +#define OPTICK_POP() ::Optick::Event::Pop(); + + +// Scoped macro with predefined Optick::EventDescription. +// Use these events instead of DYNAMIC macros to minimize overhead. +// Common use-case: integrating Optick with internal script languages (e.g. Lua, Actionscript(Scaleform), etc.). +// Example: +// Generating EventDescription once during initialization: +// Optick::EventDescription* description = Optick::EventDescription::CreateShared("FunctionName"); +// +// Then we could just use a pointer to cached description later for profiling: +// OPTICK_CUSTOM_EVENT(description); +#define OPTICK_CUSTOM_EVENT(DESCRIPTION) ::Optick::Event OPTICK_CONCAT(autogen_event_, __LINE__)( *DESCRIPTION ); \ + +// Registration of a custom EventStorage (e.g. GPU, IO, etc.) +// Use it to present any extra information on the timeline. +// Example: +// Optick::EventStorage* IOStorage = Optick::RegisterStorage("I/O"); +// Notes: +// Registration of a new storage is thread-safe. +#define OPTICK_STORAGE_REGISTER(STORAGE_NAME) ::Optick::RegisterStorage(STORAGE_NAME); + +// Adding events to the custom storage. +// Helps to integrate Optick into already existing profiling systems (e.g. GPU Profiler, I/O profiler, etc.). +// Example: +// //Registering a storage - should be done once during initialization +// static Optick::EventStorage* IOStorage = Optick::RegisterStorage("I/O"); +// +// int64_t cpuTimestampStart = Optick::GetHighPrecisionTime(); +// ... +// int64_t cpuTimestampFinish = Optick::GetHighPrecisionTime(); +// +// //Creating a shared event-description +// static Optick::EventDescription* IORead = Optick::EventDescription::CreateShared("IO Read"); +// +// OPTICK_STORAGE_EVENT(IOStorage, IORead, cpuTimestampStart, cpuTimestampFinish); +// Notes: +// It's not thread-safe to add events to the same storage from multiple threads. +// Please guarantee thread-safety on the higher level if access from multiple threads to the same storage is required. +#define OPTICK_STORAGE_EVENT(STORAGE, DESCRIPTION, CPU_TIMESTAMP_START, CPU_TIMESTAMP_FINISH) if (::Optick::IsActive()) { ::Optick::Event::Add(STORAGE, DESCRIPTION, CPU_TIMESTAMP_START, CPU_TIMESTAMP_FINISH); } +#define OPTICK_STORAGE_PUSH(STORAGE, DESCRIPTION, CPU_TIMESTAMP_START) if (::Optick::IsActive()) { ::Optick::Event::Push(STORAGE, DESCRIPTION, CPU_TIMESTAMP_START); } +#define OPTICK_STORAGE_POP(STORAGE, CPU_TIMESTAMP_FINISH) if (::Optick::IsActive()) { ::Optick::Event::Pop(STORAGE, CPU_TIMESTAMP_FINISH); } + + +// Registers state change callback +// If callback returns false - the call is repeated the next frame +#define OPTICK_SET_STATE_CHANGED_CALLBACK(CALLBACK) ::Optick::SetStateChangedCallback(CALLBACK); + + +// GPU events +#define OPTICK_GPU_INIT_D3D12(DEVICE, CMD_QUEUES, NUM_CMD_QUEUS) ::Optick::InitGpuD3D12(DEVICE, CMD_QUEUES, NUM_CMD_QUEUS); +#define OPTICK_GPU_INIT_VULKAN(DEVICES, PHYSICAL_DEVICES, CMD_QUEUES, CMD_QUEUES_FAMILY, NUM_CMD_QUEUS) ::Optick::InitGpuVulkan(DEVICES, PHYSICAL_DEVICES, CMD_QUEUES, CMD_QUEUES_FAMILY, NUM_CMD_QUEUS); + +// Setup GPU context: +// Params: +// (CommandBuffer\CommandList, [Optional] Optick::GPUQueue queue, [Optional] int NodeIndex) +// Examples: +// OPTICK_GPU_CONTEXT(cmdBuffer); - all OPTICK_GPU_EVENT will use the same command buffer within the scope +// OPTICK_GPU_CONTEXT(cmdBuffer, Optick::GPU_QUEUE_COMPUTE); - all events will use the same command buffer and queue for the scope +// OPTICK_GPU_CONTEXT(cmdBuffer, Optick::GPU_QUEUE_COMPUTE, gpuIndex); - all events will use the same command buffer and queue for the scope +#define OPTICK_GPU_CONTEXT(...) ::Optick::GPUContextScope OPTICK_CONCAT(gpu_autogen_context_, __LINE__)(__VA_ARGS__); \ + (void)OPTICK_CONCAT(gpu_autogen_context_, __LINE__); + +#define OPTICK_GPU_EVENT(NAME) OPTICK_EVENT(NAME); \ + static ::Optick::EventDescription* OPTICK_CONCAT(gpu_autogen_description_, __LINE__) = nullptr; \ + if (OPTICK_CONCAT(gpu_autogen_description_, __LINE__) == nullptr) OPTICK_CONCAT(gpu_autogen_description_, __LINE__) = ::Optick::EventDescription::Create( NAME, __FILE__, __LINE__ ); \ + ::Optick::GPUEvent OPTICK_CONCAT(gpu_autogen_event_, __LINE__)( *(OPTICK_CONCAT(gpu_autogen_description_, __LINE__)) ); \ + +#define OPTICK_GPU_FLIP(SWAP_CHAIN) ::Optick::GpuFlip(SWAP_CHAIN); + +#else +#define OPTICK_EVENT(...) +#define OPTICK_CATEGORY(NAME, COLOR) +#define OPTICK_FRAME(NAME) +#define OPTICK_THREAD(FRAME_NAME) +#define OPTICK_START_THREAD(FRAME_NAME) +#define OPTICK_STOP_THREAD() +#define OPTICK_TAG(NAME, DATA) +#define OPTICK_EVENT_DYNAMIC(NAME) +#define OPTICK_PUSH_DYNAMIC(NAME) +#define OPTICK_PUSH(NAME) +#define OPTICK_POP() +#define OPTICK_CUSTOM_EVENT(DESCRIPTION) +#define OPTICK_STORAGE_REGISTER(STORAGE_NAME) +#define OPTICK_STORAGE_EVENT(STORAGE, DESCRIPTION, CPU_TIMESTAMP_START, CPU_TIMESTAMP_FINISH) +#define OPTICK_STORAGE_PUSH(STORAGE, DESCRIPTION, CPU_TIMESTAMP_START) +#define OPTICK_STORAGE_POP(STORAGE, CPU_TIMESTAMP_FINISH) +#define OPTICK_SET_STATE_CHANGED_CALLBACK(CALLBACK) +#define OPTICK_GPU_INIT_D3D12(DEVICE, CMD_QUEUES, NUM_CMD_QUEUS) +#define OPTICK_GPU_INIT_VULKAN(DEVICES, PHYSICAL_DEVICES, CMD_QUEUES, CMD_QUEUES_FAMILY, NUM_CMD_QUEUS) +#define OPTICK_GPU_CONTEXT(...) +#define OPTICK_GPU_EVENT(NAME) +#define OPTICK_GPU_FLIP(SWAP_CHAIN) +#endif diff --git a/external/optick/optick_common.h b/external/optick/optick_common.h new file mode 100644 index 0000000..4468911 --- /dev/null +++ b/external/optick/optick_common.h @@ -0,0 +1,142 @@ +#pragma once + +#include "optick.config.h" + +#if USE_OPTICK + +#include "optick.h" + +#include <cstdio> +#include <stdarg.h> +#include <stddef.h> +#include <stdint.h> +#include <stdlib.h> + +#if defined(OPTICK_MSVC) +#ifndef WIN32_LEAN_AND_MEAN +#define WIN32_LEAN_AND_MEAN +#endif +#define NOMINMAX +#include <windows.h> +#endif + +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +// Types +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +typedef signed char int8; +typedef unsigned char uint8; +typedef unsigned char byte; +typedef short int16; +typedef unsigned short uint16; +typedef int int32; +typedef unsigned int uint32; +#if defined(OPTICK_MSVC) +typedef __int64 int64; +typedef unsigned __int64 uint64; +#elif defined(OPTICK_GCC) +typedef int64_t int64; +typedef uint64_t uint64; +#else +#error Compiler is not supported +#endif +static_assert(sizeof(int8) == 1, "Invalid type size, int8"); +static_assert(sizeof(uint8) == 1, "Invalid type size, uint8"); +static_assert(sizeof(byte) == 1, "Invalid type size, byte"); +static_assert(sizeof(int16) == 2, "Invalid type size, int16"); +static_assert(sizeof(uint16) == 2, "Invalid type size, uint16"); +static_assert(sizeof(int32) == 4, "Invalid type size, int32"); +static_assert(sizeof(uint32) == 4, "Invalid type size, uint32"); +static_assert(sizeof(int64) == 8, "Invalid type size, int64"); +static_assert(sizeof(uint64) == 8, "Invalid type size, uint64"); +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +typedef uint64 ThreadID; +static const ThreadID INVALID_THREAD_ID = (ThreadID)-1; +typedef uint32 ProcessID; +static const ProcessID INVALID_PROCESS_ID = (ProcessID)-1; +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +// Memory +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +#if defined(OPTICK_MSVC) +#define OPTICK_ALIGN(N) __declspec( align( N ) ) +#elif defined(OPTICK_GCC) +#define OPTICK_ALIGN(N) __attribute__((aligned(N))) +#else +#error Can not define OPTICK_ALIGN. Unknown platform. +#endif +#define OPTICK_CACHE_LINE_SIZE 64 +#define OPTICK_ALIGN_CACHE OPTICK_ALIGN(OPTICK_CACHE_LINE_SIZE) +#define OPTICK_ARRAY_SIZE(ARR) (sizeof(ARR)/sizeof((ARR)[0])) +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +#if defined(OPTICK_MSVC) +#define OPTICK_NOINLINE __declspec(noinline) +#elif defined(OPTICK_GCC) +#define OPTICK_NOINLINE __attribute__((__noinline__)) +#else +#error Compiler is not supported +#endif +//////////////////////////////////////////////////////////////////////// +// OPTICK_THREAD_LOCAL +//////////////////////////////////////////////////////////////////////// +#if defined(OPTICK_MSVC) +#define OPTICK_THREAD_LOCAL __declspec(thread) +#elif defined(OPTICK_GCC) +#define OPTICK_THREAD_LOCAL __thread +#else +#error Can not define OPTICK_THREAD_LOCAL. Unknown platform. +#endif + +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +// Asserts +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +#if defined(OPTICK_MSVC) +#define OPTICK_DEBUG_BREAK __debugbreak() +#elif defined(OPTICK_GCC) +#define OPTICK_DEBUG_BREAK __builtin_trap() +#else + #error Can not define OPTICK_DEBUG_BREAK. Unknown platform. +#endif +#define OPTICK_UNUSED(x) (void)(x) +#ifdef _DEBUG + #define OPTICK_ASSERT(arg, description) if (!(arg)) { OPTICK_DEBUG_BREAK; } + #define OPTICK_FAILED(description) { OPTICK_DEBUG_BREAK; } +#else + #define OPTICK_ASSERT(arg, description) + #define OPTICK_FAILED(description) +#endif +#define OPTICK_VERIFY(arg, description, operation) if (!(arg)) { OPTICK_DEBUG_BREAK; operation; } +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +// Safe functions +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +#if defined(OPTICK_LINUX) || defined(OPTICK_OSX) +template<size_t sizeOfBuffer> +inline int sprintf_s(char(&buffer)[sizeOfBuffer], const char* format, ...) +{ + va_list ap; + va_start(ap, format); + int result = vsnprintf(buffer, sizeOfBuffer, format, ap); + va_end(ap); + return result; +} +#endif + +#if defined(OPTICK_GCC) +template<size_t sizeOfBuffer> +inline int wcstombs_s(char(&buffer)[sizeOfBuffer], const wchar_t* src, size_t maxCount) +{ + return wcstombs(buffer, src, maxCount); +} +#endif + +#if defined(OPTICK_MSVC) +template<size_t sizeOfBuffer> +inline int wcstombs_s(char(&buffer)[sizeOfBuffer], const wchar_t* src, size_t maxCount) +{ + size_t converted = 0; + return wcstombs_s(&converted, buffer, src, maxCount); +} +#endif +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + +#endif //USE_OPTICK
\ No newline at end of file diff --git a/external/optick/optick_core.cpp b/external/optick/optick_core.cpp new file mode 100644 index 0000000..1d533d0 --- /dev/null +++ b/external/optick/optick_core.cpp @@ -0,0 +1,1657 @@ +#include "optick.config.h" + +#if USE_OPTICK + +#include "optick_core.h" +#include "optick_server.h" + +#include <algorithm> +#include <fstream> + +////////////////////////////////////////////////////////////////////////// +// Start of the Platform-specific stuff +////////////////////////////////////////////////////////////////////////// +#if defined(OPTICK_MSVC) +#include "optick_core.win.h" +#endif +#if defined(OPTICK_LINUX) +#include "optick_core.linux.h" +#endif +#if defined(OPTICK_OSX) +#include "optick_core.macos.h" +#endif +#if defined(OPTICK_PS4) +#include "optick_core.ps4.h" +#endif +////////////////////////////////////////////////////////////////////////// +// End of the Platform-specific stuff +////////////////////////////////////////////////////////////////////////// + +extern "C" Optick::EventData* NextEvent() +{ + if (Optick::EventStorage* storage = Optick::Core::storage) + { + return &storage->NextEvent(); + } + + return nullptr; +} + +namespace Optick +{ +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +void* (*Memory::allocate)(size_t) = operator new; +void (*Memory::deallocate)(void* p) = operator delete; +std::atomic<uint64_t> Memory::memAllocated; +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +uint64_t MurmurHash64A(const void * key, int len, uint64_t seed) +{ + const uint64_t m = 0xc6a4a7935bd1e995; + const int r = 47; + + uint64_t h = seed ^ (len * m); + + const uint64_t * data = (const uint64_t *)key; + const uint64_t * end = data + (len / 8); + + while (data != end) + { + uint64_t k = *data++; + + k *= m; + k ^= k >> r; + k *= m; + + h ^= k; + h *= m; + } + + const unsigned char * data2 = (const unsigned char*)data; + + switch (len & 7) + { + case 7: h ^= uint64_t(data2[6]) << 48; + case 6: h ^= uint64_t(data2[5]) << 40; + case 5: h ^= uint64_t(data2[4]) << 32; + case 4: h ^= uint64_t(data2[3]) << 24; + case 3: h ^= uint64_t(data2[2]) << 16; + case 2: h ^= uint64_t(data2[1]) << 8; + case 1: h ^= uint64_t(data2[0]); + h *= m; + }; + + h ^= h >> r; + h *= m; + h ^= h >> r; + + return h; +} +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +uint64_t StringHash::CalcHash(const char* str) +{ + return MurmurHash64A(str, (int)strlen(str), 0); +} +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +// Base 64 +// https://renenyffenegger.ch/notes/development/Base64/Encoding-and-decoding-base-64-with-cpp +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +static inline bool is_base64(unsigned char c) { + return (isalnum(c) || (c == '+') || (c == '/')); +} +string base64_decode(string const& encoded_string) { + static string base64_chars = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/"; + int in_len = (int)encoded_string.size(); + int i = 0; + int j = 0; + int in_ = 0; + unsigned char char_array_4[4], char_array_3[3]; + string ret; + + while (in_len-- && (encoded_string[in_] != '=') && is_base64(encoded_string[in_])) { + char_array_4[i++] = encoded_string[in_]; in_++; + if (i == 4) { + for (i = 0; i < 4; i++) + char_array_4[i] = (unsigned char)base64_chars.find(char_array_4[i]); + + char_array_3[0] = (char_array_4[0] << 2) + ((char_array_4[1] & 0x30) >> 4); + char_array_3[1] = ((char_array_4[1] & 0xf) << 4) + ((char_array_4[2] & 0x3c) >> 2); + char_array_3[2] = ((char_array_4[2] & 0x3) << 6) + char_array_4[3]; + + for (i = 0; (i < 3); i++) + ret += char_array_3[i]; + i = 0; + } + } + + if (i) { + for (j = i; j < 4; j++) + char_array_4[j] = 0; + + for (j = 0; j < 4; j++) + char_array_4[j] = (unsigned char)base64_chars.find(char_array_4[j]); + + char_array_3[0] = (char_array_4[0] << 2) + ((char_array_4[1] & 0x30) >> 4); + char_array_3[1] = ((char_array_4[1] & 0xf) << 4) + ((char_array_4[2] & 0x3c) >> 2); + char_array_3[2] = ((char_array_4[2] & 0x3) << 6) + char_array_4[3]; + + for (j = 0; (j < i - 1); j++) ret += char_array_3[j]; + } + + return ret; +} +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +// Get current time in milliseconds +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +int64 GetTimeMilliSeconds() +{ + return Platform::GetTime() * 1000 / Platform::GetFrequency(); +} +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +int64 TicksToMs(int64 ticks) +{ + return ticks * 1000 / Platform::GetFrequency(); +} +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +int64 TicksToUs(int64 ticks) +{ + return ticks * 1000000 / Platform::GetFrequency(); +} +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +template<class T> +OutputDataStream& operator<<(OutputDataStream& stream, const TagData<T>& ob) +{ + return stream << ob.timestamp << ob.description->index << ob.data; +} +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +OutputDataStream& operator<<(OutputDataStream& os, const Symbol * const symbol) +{ + OPTICK_VERIFY(symbol, "Can't serialize NULL symbol!", return os); + return os << symbol->address << symbol->function << symbol->file << symbol->line; +} +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +OutputDataStream& operator<<(OutputDataStream& os, const Module& module) +{ + return os << module.path << (uint64)module.address << (uint64)module.size; +} +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +// VS TODO: Replace with random access iterator for MemoryPool +template<class T, uint32 SIZE> +void SortMemoryPool(MemoryPool<T, SIZE>& memoryPool) +{ + size_t count = memoryPool.Size(); + if (count == 0) + return; + + vector<T> memoryArray; + memoryArray.resize(count); + memoryPool.ToArray(&memoryArray[0]); + + std::sort(memoryArray.begin(), memoryArray.end()); + + memoryPool.Clear(true); + + for (const T& item : memoryArray) + memoryPool.Add(item); +} + +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +EventDescription* EventDescription::Create(const char* eventName, const char* fileName, const unsigned long fileLine, const unsigned long eventColor /*= Color::Null*/, const unsigned long filter /*= 0*/) +{ + return EventDescriptionBoard::Get().CreateDescription(eventName, fileName, fileLine, eventColor, filter); +} +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +EventDescription* EventDescription::CreateShared(const char* eventName, const char* fileName, const unsigned long fileLine, const unsigned long eventColor /*= Color::Null*/, const unsigned long filter /*= 0*/) +{ + return EventDescriptionBoard::Get().CreateSharedDescription(eventName, fileName, fileLine, eventColor, filter); +} +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +EventDescription::EventDescription() : name(""), file(""), line(0), color(0) +{ +} +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +EventDescription& EventDescription::operator=(const EventDescription&) +{ + OPTICK_FAILED("It is pointless to copy EventDescription. Please, check you logic!"); return *this; +} +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +EventData* Event::Start(const EventDescription& description) +{ + EventData* result = nullptr; + + if (EventStorage* storage = Core::storage) + { + result = &storage->NextEvent(); + result->description = &description; + result->Start(); + } + return result; +} +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +void Event::Stop(EventData& data) +{ + if (EventStorage* storage = Core::storage) + { + data.Stop(); + } +} +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +void OPTICK_INLINE PushEvent(EventStorage* pStorage, const EventDescription* description, int64_t timestampStart) +{ + if (EventStorage* storage = pStorage) + { + EventData& result = storage->NextEvent(); + result.description = description; + result.start = timestampStart; + result.finish = EventTime::INVALID_TIMESTAMP; + storage->pushPopEventStack[storage->pushPopEventStackIndex++] = &result; + } +} +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +void OPTICK_INLINE PopEvent(EventStorage* pStorage, int64_t timestampFinish) +{ + if (EventStorage* storage = pStorage) + if (storage->pushPopEventStackIndex > 0) + storage->pushPopEventStack[--storage->pushPopEventStackIndex]->finish = timestampFinish; +} +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +void Event::Push(const char* name) +{ + if (EventStorage* storage = Core::storage) + { + EventDescription* desc = EventDescription::CreateShared(name); + Push(*desc); + } +} +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +void Event::Push(const EventDescription& description) +{ + PushEvent(Core::storage, &description, GetHighPrecisionTime()); +} +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +void Event::Pop() +{ + PopEvent(Core::storage, GetHighPrecisionTime()); +} +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +void Event::Add(EventStorage* storage, const EventDescription* description, int64_t timestampStart, int64_t timestampFinish) +{ + EventData& data = storage->eventBuffer.Add(); + data.description = description; + data.start = timestampStart; + data.finish = timestampFinish; +} +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +void Event::Push(EventStorage* storage, const EventDescription* description, int64_t timestampStart) +{ + PushEvent(storage, description, timestampStart); +} +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +void Event::Pop(EventStorage* storage, int64_t timestampFinish) +{ + PopEvent(storage, timestampFinish); +} +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +EventData* GPUEvent::Start(const EventDescription& description) +{ + EventData* result = nullptr; + + if (EventStorage* storage = Core::storage) + result = storage->gpuStorage.Start(description); + + return result; +} +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +void GPUEvent::Stop(EventData& data) +{ + if (EventStorage* storage = Core::storage) + storage->gpuStorage.Stop(data); +} +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +void FiberSyncData::AttachToThread(EventStorage* storage, uint64_t threadId) +{ + if (storage) + { + FiberSyncData& data = storage->fiberSyncBuffer.Add(); + data.Start(); + data.finish = EventTime::INVALID_TIMESTAMP; + data.threadId = threadId; + } +} +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +void FiberSyncData::DetachFromThread(EventStorage* storage) +{ + if (storage) + { + if (FiberSyncData* syncData = storage->fiberSyncBuffer.Back()) + { + syncData->Stop(); + } + } +} +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +void Tag::Attach(const EventDescription& description, float val) +{ + if (EventStorage* storage = Core::storage) + if (storage->currentMode & Mode::TAGS) + storage->tagFloatBuffer.Add(TagFloat(description, val)); +} +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +void Tag::Attach(const EventDescription& description, int32_t val) +{ + if (EventStorage* storage = Core::storage) + if (storage->currentMode & Mode::TAGS) + storage->tagS32Buffer.Add(TagS32(description, val)); +} +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +void Tag::Attach(const EventDescription& description, uint32_t val) +{ + if (EventStorage* storage = Core::storage) + if (storage->currentMode & Mode::TAGS) + storage->tagU32Buffer.Add(TagU32(description, val)); +} +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +void Tag::Attach(const EventDescription& description, uint64_t val) +{ + if (EventStorage* storage = Core::storage) + if (storage->currentMode & Mode::TAGS) + storage->tagU64Buffer.Add(TagU64(description, val)); +} +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +void Tag::Attach(const EventDescription& description, float val[3]) +{ + if (EventStorage* storage = Core::storage) + if (storage->currentMode & Mode::TAGS) + storage->tagPointBuffer.Add(TagPoint(description, val)); +} +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +void Tag::Attach(const EventDescription& description, const char* val) +{ + if (EventStorage* storage = Core::storage) + if (storage->currentMode & Mode::TAGS) + storage->tagStringBuffer.Add(TagString(description, val)); +} +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +OutputDataStream & operator<<(OutputDataStream &stream, const EventDescription &ob) +{ + byte flags = 0; + return stream << ob.name << ob.file << ob.line << ob.filter << ob.color << (float)0.0f << flags; +} +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +OutputDataStream& operator<<(OutputDataStream& stream, const EventTime& ob) +{ + return stream << ob.start << ob.finish; +} +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +OutputDataStream& operator<<(OutputDataStream& stream, const EventData& ob) +{ + return stream << (EventTime)(ob) << (ob.description ? ob.description->index : (uint32)-1); +} +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +OutputDataStream& operator<<(OutputDataStream& stream, const SyncData& ob) +{ + return stream << (EventTime)(ob) << ob.core << ob.reason << ob.newThreadId; +} +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +OutputDataStream& operator<<(OutputDataStream& stream, const FiberSyncData& ob) +{ + return stream << (EventTime)(ob) << ob.threadId; +} +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +static std::mutex& GetBoardLock() +{ + // Initialize as static local variable to prevent problems with static initialization order + static std::mutex lock; + return lock; +} +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +EventDescriptionBoard& EventDescriptionBoard::Get() +{ + static EventDescriptionBoard instance; + return instance; +} +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +const EventDescriptionList& EventDescriptionBoard::GetEvents() const +{ + return boardDescriptions; +} +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +EventDescription* EventDescriptionBoard::CreateDescription(const char* name, const char* file /*= nullptr*/, uint32_t line /*= 0*/, uint32_t color /*= Color::Null*/, uint32_t filter /*= 0*/) +{ + std::lock_guard<std::mutex> lock(GetBoardLock()); + + size_t index = boardDescriptions.Size(); + + EventDescription& desc = boardDescriptions.Add(); + desc.index = (uint32)index; + desc.name = name; + desc.file = file; + desc.line = line; + desc.color = color; + desc.filter = filter; + + return &desc; +} +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +EventDescription* EventDescriptionBoard::CreateSharedDescription(const char* name, const char* file /*= nullptr*/, uint32_t line /*= 0*/, uint32_t color /*= Color::Null*/, uint32_t filter /*= 0*/) +{ + StringHash nameHash(name); + + std::lock_guard<std::mutex> lock(sharedLock); + + std::pair<DescriptionMap::iterator, bool> cached = sharedDescriptions.insert({ nameHash, nullptr }); + + if (cached.second) + { + const char* nameCopy = sharedNames.Add(name, strlen(name) + 1, false); + cached.first->second = CreateDescription(nameCopy, file, line, color, filter); + } + + return cached.first->second; +} +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +OutputDataStream& operator << (OutputDataStream& stream, const EventDescriptionBoard& ob) +{ + std::lock_guard<std::mutex> lock(GetBoardLock()); + stream << ob.GetEvents(); + return stream; +} +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +ProcessDescription::ProcessDescription(const char* processName, ProcessID pid, uint64 key) : name(processName), processID(pid), uniqueKey(key) +{ +} + +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +ThreadDescription::ThreadDescription(const char* threadName, ThreadID tid, ProcessID pid, int32 _maxDepth /*= 1*/, int32 _priority /*= 0*/, uint32 _mask /*= 0*/) + : name(threadName), threadID(tid), processID(pid), maxDepth(_maxDepth), priority(_priority), mask(_mask) +{ +} + +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +int64_t GetHighPrecisionTime() +{ + return Platform::GetTime(); +} + +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +int64_t GetHighPrecisionFrequency() +{ + return Platform::GetFrequency(); +} + +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +OutputDataStream & operator<<(OutputDataStream &stream, const SysCallData &ob) +{ + return stream << (const EventData&)ob << ob.threadID << ob.id; +} +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +SysCallData& SysCallCollector::Add() +{ + return syscallPool.Add(); +} +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +void SysCallCollector::Clear() +{ + syscallPool.Clear(false); +} +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +bool SysCallCollector::Serialize(OutputDataStream& stream) +{ + stream << syscallPool; + + if (!syscallPool.IsEmpty()) + { + syscallPool.Clear(false); + return true; + } + + return false; +} +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +void CallstackCollector::Add(const CallstackDesc& desc) +{ + if (uint64* storage = callstacksPool.TryAdd(desc.count + 3)) + { + storage[0] = desc.threadID; + storage[1] = desc.timestamp; + storage[2] = desc.count; + + for (uint64 i = 0; i < desc.count; ++i) + { + storage[3 + i] = desc.callstack[desc.count - i - 1]; + } + } + else + { + uint64& item0 = callstacksPool.Add(); + uint64& item1 = callstacksPool.Add(); + uint64& item2 = callstacksPool.Add(); + + item0 = desc.threadID; + item1 = desc.timestamp; + item2 = desc.count; + + for (uint64 i = 0; i < desc.count; ++i) + { + callstacksPool.Add() = desc.callstack[desc.count - i - 1]; + } + } +} +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +void CallstackCollector::Clear() +{ + callstacksPool.Clear(false); +} +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +bool CallstackCollector::SerializeModules(OutputDataStream& stream) +{ + if (SymbolEngine* symEngine = Core::Get().symbolEngine) + { + stream << symEngine->GetModules(); + return true; + } + else + { + stream << (int)0; + } + return false; +} +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +bool CallstackCollector::SerializeSymbols(OutputDataStream& stream) +{ + typedef unordered_set<uint64> SymbolSet; + SymbolSet symbolSet; + + for (CallstacksPool::const_iterator it = callstacksPool.begin(); it != callstacksPool.end();) + { + CallstacksPool::const_iterator startIt = it; + OPTICK_UNUSED(startIt); + + uint64 threadID = *it; + OPTICK_UNUSED(threadID); + ++it; //Skip ThreadID + uint64 timestamp = *it; + OPTICK_UNUSED + (timestamp); + ++it; //Skip Timestamp + uint64 count = *it; + count = (count & 0xFF); + ++it; //Skip Count + + bool isBadAddrFound = false; + + for (uint64 i = 0; i < count; ++i) + { + uint64 address = *it; + ++it; + + if (address == 0) + { + isBadAddrFound = true; + } + + if (!isBadAddrFound) + { + symbolSet.insert(address); + } + } + } + + SymbolEngine* symEngine = Core::Get().symbolEngine; + + vector<const Symbol*> symbols; + symbols.reserve(symbolSet.size()); + + size_t callstackIndex = 0; + + Core::Get().DumpProgress("Resolving addresses... "); + + if (symEngine) + { + for (auto it = symbolSet.begin(); it != symbolSet.end(); ++it) + { + callstackIndex++; + + uint64 address = *it; + if (const Symbol* symbol = symEngine->GetSymbol(address)) + { + symbols.push_back(symbol); + } + } + } + + stream << symbols; + return true; +} +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +bool CallstackCollector::SerializeCallstacks(OutputDataStream& stream) +{ + stream << callstacksPool; + + if (!callstacksPool.IsEmpty()) + { + callstacksPool.Clear(false); + return true; + } + + return false; +} +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +bool CallstackCollector::IsEmpty() const +{ + return callstacksPool.IsEmpty(); +} +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +OutputDataStream & operator<<(OutputDataStream &stream, const SwitchContextDesc &ob) +{ + return stream << ob.timestamp << ob.oldThreadId << ob.newThreadId << ob.cpuId << ob.reason; +} +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +void SwitchContextCollector::Add(const SwitchContextDesc& desc) +{ + switchContextPool.Add() = desc; +} +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +void SwitchContextCollector::Clear() +{ + switchContextPool.Clear(false); +} +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +bool SwitchContextCollector::Serialize(OutputDataStream& stream) +{ + stream << switchContextPool; + + if (!switchContextPool.IsEmpty()) + { + switchContextPool.Clear(false); + return true; + } + + return false; +} +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + + +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +#if defined(OPTICK_MSVC) +#define CPUID(INFO, ID) __cpuid(INFO, ID) +#include <intrin.h> +#elif defined(OPTICK_GCC) +#include <cpuid.h> +#define CPUID(INFO, ID) __cpuid(ID, INFO[0], INFO[1], INFO[2], INFO[3]) +#else +#error Platform is not supported! +#endif +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +string GetCPUName() +{ + int cpuInfo[4] = { -1 }; + char cpuBrandString[0x40] = { 0 }; + CPUID(cpuInfo, 0x80000000); + unsigned nExIds = cpuInfo[0]; + for (unsigned i = 0x80000000; i <= nExIds; ++i) + { + CPUID(cpuInfo, i); + if (i == 0x80000002) + memcpy(cpuBrandString, cpuInfo, sizeof(cpuInfo)); + else if (i == 0x80000003) + memcpy(cpuBrandString + 16, cpuInfo, sizeof(cpuInfo)); + else if (i == 0x80000004) + memcpy(cpuBrandString + 32, cpuInfo, sizeof(cpuInfo)); + } + return string(cpuBrandString); +} +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +Core& Core::Get() +{ + static Core instance; + return instance; +} +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +void Core::StartCapture() +{ + pendingState = State::START_CAPTURE; +} +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +void Core::StopCapture() +{ + pendingState = State::STOP_CAPTURE; +} +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +void Core::CancelCapture() +{ + pendingState = State::CANCEL_CAPTURE; +} +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +void Core::DumpCapture() +{ + pendingState = State::DUMP_CAPTURE; +} + +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +void Core::DumpProgress(const char* message) +{ + progressReportedLastTimestampMS = GetTimeMilliSeconds(); + + OutputDataStream stream; + stream << message; + + Server::Get().Send(DataResponse::ReportProgress, stream); +} + + +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +void Core::DumpEvents(EventStorage& entry, const EventTime& timeSlice, ScopeData& scope) +{ + if (!entry.eventBuffer.IsEmpty()) + { + const EventData* rootEvent = nullptr; + + entry.eventBuffer.ForEach([&](const EventData& data) + { + if (data.finish >= data.start && data.start >= timeSlice.start && timeSlice.finish >= data.finish) + { + if (!rootEvent) + { + rootEvent = &data; + scope.InitRootEvent(*rootEvent); + } + else if (rootEvent->finish < data.finish) + { + scope.Send(); + + rootEvent = &data; + scope.InitRootEvent(*rootEvent); + } + else + { + scope.AddEvent(data); + } + } + }); + + scope.Send(); + + entry.eventBuffer.Clear(false); + } +} + +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +void Core::DumpTags(EventStorage& entry, ScopeData& scope) +{ + if (!entry.tagFloatBuffer.IsEmpty() || + !entry.tagS32Buffer.IsEmpty() || + !entry.tagU32Buffer.IsEmpty() || + !entry.tagU64Buffer.IsEmpty() || + !entry.tagPointBuffer.IsEmpty() || + !entry.tagStringBuffer.IsEmpty()) + { + OutputDataStream tagStream; + tagStream << scope.header.boardNumber << scope.header.threadNumber; + tagStream + << (uint32)0 + << entry.tagFloatBuffer + << entry.tagU32Buffer + << entry.tagS32Buffer + << entry.tagU64Buffer + << entry.tagPointBuffer + << (uint32)0 + << (uint32)0 + << entry.tagStringBuffer; + Server::Get().Send(DataResponse::TagsPack, tagStream); + + entry.ClearTags(false); + } +} + +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +void Core::DumpThread(ThreadEntry& entry, const EventTime& timeSlice, ScopeData& scope) +{ + // We need to sort events for all the custom thread storages + if (entry.description.threadID == INVALID_THREAD_ID) + entry.Sort(); + + // Events + DumpEvents(entry.storage, timeSlice, scope); + DumpTags(entry.storage, scope); + OPTICK_ASSERT(entry.storage.fiberSyncBuffer.IsEmpty(), "Fiber switch events in native threads?"); +} + +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +void Core::DumpFiber(FiberEntry& entry, const EventTime& timeSlice, ScopeData& scope) +{ + // Events + DumpEvents(entry.storage, timeSlice, scope); + + if (!entry.storage.fiberSyncBuffer.IsEmpty()) + { + OutputDataStream fiberSynchronizationStream; + fiberSynchronizationStream << scope.header.boardNumber; + fiberSynchronizationStream << scope.header.fiberNumber; + fiberSynchronizationStream << entry.storage.fiberSyncBuffer; + Server::Get().Send(DataResponse::FiberSynchronizationData, fiberSynchronizationStream); + } +} +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +EventTime CalculateRange(const ThreadEntry& entry, const EventDescription* rootDescription) +{ + EventTime timeSlice = { INT64_MAX, INT64_MIN }; + entry.storage.eventBuffer.ForEach([&](const EventData& data) + { + if (data.description == rootDescription) + { + timeSlice.start = std::min(timeSlice.start, data.start); + timeSlice.finish = std::max(timeSlice.finish, data.finish); + } + }); + return timeSlice; +} +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +void Core::DumpFrames(uint32 mode) +{ + std::lock_guard<std::recursive_mutex> lock(threadsLock); + + if (frames.empty() || threads.empty()) + return; + + ++boardNumber; + + DumpProgress("Generating summary..."); + + GenerateCommonSummary(); + DumpSummary(); + + DumpProgress("Collecting Frame Events..."); + + + ThreadID mainThreadID = Platform::GetThreadID(); + uint32 mainThreadIndex = 0; + for (size_t i = 0; i < threads.size(); ++i) + if (threads[i]->description.threadID == mainThreadID) + mainThreadIndex = (uint32)i; + + EventTime timeSlice = CalculateRange(*threads[mainThreadIndex], GetFrameDescription(FrameType::CPU)); + if (timeSlice.start >= timeSlice.finish) + { + timeSlice.start = frames.front().start; + timeSlice.finish = frames.back().finish; + } + + DumpBoard(mode, timeSlice, mainThreadIndex); + + ScopeData threadScope; + threadScope.header.boardNumber = boardNumber; + threadScope.header.fiberNumber = -1; + + if (gpuProfiler) + gpuProfiler->Dump(mode); + + for (size_t i = 0; i < threads.size(); ++i) + { + threadScope.header.threadNumber = (uint32)i; + DumpThread(*threads[i], timeSlice, threadScope); + } + + ScopeData fiberScope; + fiberScope.header.boardNumber = (uint32)boardNumber; + fiberScope.header.threadNumber = -1; + for (size_t i = 0; i < fibers.size(); ++i) + { + fiberScope.header.fiberNumber = (uint32)i; + DumpFiber(*fibers[i], timeSlice, fiberScope); + } + + frames.clear(); + CleanupThreadsAndFibers(); + + { + DumpProgress("Serializing SwitchContexts"); + OutputDataStream switchContextsStream; + switchContextsStream << boardNumber; + switchContextCollector.Serialize(switchContextsStream); + Server::Get().Send(DataResponse::SynchronizationData, switchContextsStream); + } + + { + DumpProgress("Serializing SysCalls"); + OutputDataStream callstacksStream; + callstacksStream << boardNumber; + syscallCollector.Serialize(callstacksStream); + Server::Get().Send(DataResponse::SyscallPack, callstacksStream); + } + + if (!callstackCollector.IsEmpty()) + { + DumpProgress("Resolving callstacks"); + OutputDataStream symbolsStream; + symbolsStream << boardNumber; + callstackCollector.SerializeModules(symbolsStream); + callstackCollector.SerializeSymbols(symbolsStream); + Server::Get().Send(DataResponse::CallstackDescriptionBoard, symbolsStream); + + DumpProgress("Serializing callstacks"); + OutputDataStream callstacksStream; + callstacksStream << boardNumber; + callstackCollector.SerializeCallstacks(callstacksStream); + Server::Get().Send(DataResponse::CallstackPack, callstacksStream); + } + + Server::Get().Send(DataResponse::NullFrame, OutputDataStream::Empty); +} +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +void Core::DumpSummary() +{ + OutputDataStream stream; + + // Board Number + stream << boardNumber; + + // Frames + double frequency = (double)Platform::GetFrequency(); + stream << (uint32_t)frames.size(); + for (const EventTime& frame : frames) + { + double frameTimeMs = 1000.0 * (frame.finish - frame.start) / frequency; + stream << (float)frameTimeMs; + } + + // Summary + stream << (uint32_t)summary.size(); + for (size_t i = 0; i < summary.size(); ++i) + stream << summary[i].first << summary[i].second; + summary.clear(); + + // Attachments + stream << (uint32_t)attachments.size(); + for (const Attachment& att : attachments) + stream << (uint32_t)att.type << att.name << att.data; + attachments.clear(); + + // Send + Server::Get().Send(DataResponse::SummaryPack, stream); +} +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +void Core::CleanupThreadsAndFibers() +{ + std::lock_guard<std::recursive_mutex> lock(threadsLock); + + for (ThreadList::iterator it = threads.begin(); it != threads.end();) + { + if (!(*it)->isAlive) + { + Memory::Delete(*it); + it = threads.erase(it); + } + else + { + ++it; + } + } +} + +void Core::DumpBoard(uint32 mode, EventTime timeSlice, uint32 mainThreadIndex) +{ + OutputDataStream boardStream; + + boardStream << boardNumber; + boardStream << Platform::GetFrequency(); + boardStream << (uint64)0; // Origin + boardStream << (uint32)0; // Precision + boardStream << timeSlice; + boardStream << threads; + boardStream << fibers; + boardStream << mainThreadIndex; + boardStream << EventDescriptionBoard::Get(); + boardStream << (uint32)0; // Tags + boardStream << (uint32)0; // Run + boardStream << (uint32)0; // Filters + boardStream << (uint32)0; // ThreadDescs + boardStream << mode; // Mode + boardStream << processDescs; + boardStream << threadDescs; + boardStream << (uint32)Platform::GetProcessID(); + boardStream << (uint32)std::thread::hardware_concurrency(); + Server::Get().Send(DataResponse::FrameDescriptionBoard, boardStream); + + // Cleanup + processDescs.clear(); + threadDescs.clear(); +} +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +void Core::GenerateCommonSummary() +{ + AttachSummary("Platform", Platform::GetName()); + AttachSummary("CPU", GetCPUName().c_str()); + if (gpuProfiler) + AttachSummary("GPU", gpuProfiler->GetName().c_str()); +} +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +Core::Core() + : progressReportedLastTimestampMS(0) + , boardNumber(0) + , frameNumber(0) + , stateCallback(nullptr) + , currentState(State::DUMP_CAPTURE) + , pendingState(State::DUMP_CAPTURE) + , currentMode(Mode::OFF) + , symbolEngine(nullptr) + , tracer(nullptr) + , gpuProfiler(nullptr) +{ +#if OPTICK_ENABLE_TRACING + tracer = Platform::GetTrace(); + symbolEngine = Platform::GetSymbolEngine(); +#endif + + frameDescriptions[FrameType::CPU] = EventDescription::Create("CPU Frame", __FILE__, __LINE__); + frameDescriptions[FrameType::GPU] = EventDescription::Create("GPU Frame", __FILE__, __LINE__); + frameDescriptions[FrameType::Render] = EventDescription::Create("Render Frame", __FILE__, __LINE__); +} +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +bool Core::UpdateState() +{ + if (currentState != pendingState) + { + State::Type nextState = pendingState; + if (pendingState == State::DUMP_CAPTURE && currentState == State::START_CAPTURE) + nextState = State::STOP_CAPTURE; + + if ((stateCallback != nullptr) && !stateCallback(nextState)) + return false; + + switch (nextState) + { + case State::START_CAPTURE: + Activate((Mode::Type)settings.mode); + break; + + case State::STOP_CAPTURE: + case State::CANCEL_CAPTURE: + Activate(Mode::OFF); + break; + + case State::DUMP_CAPTURE: + DumpFrames(); + break; + } + currentState = nextState; + return true; + } + return false; +} + + +uint32_t Core::Update() +{ + std::lock_guard<std::recursive_mutex> lock(coreLock); + + if (currentMode != Mode::OFF) + { + if (!frames.empty()) + frames.back().Stop(); + + if (settings.frameLimit > 0 && frames.size() >= settings.frameLimit) + DumpCapture(); + + if (settings.timeLimitUs > 0) + { + if (TicksToUs(frames.back().finish - frames.front().start) >= settings.timeLimitUs) + DumpCapture(); + } + + if (settings.spikeLimitUs > 0) + { + if (TicksToUs(frames.back().finish - frames.back().start) >= settings.spikeLimitUs) + DumpCapture(); + } + + if (IsTimeToReportProgress()) + DumpCapturingProgress(); + } + + UpdateEvents(); + + while (UpdateState()) {} + + if (currentMode != Mode::OFF) + { + frames.push_back(EventTime()); + frames.back().Start(); + } + + return ++frameNumber; +} +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +void Core::UpdateEvents() +{ + Server::Get().Update(); +} + +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +bool Core::ReportSwitchContext(const SwitchContextDesc& desc) +{ + switchContextCollector.Add(desc); + return true; +} + +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +bool Core::ReportStackWalk(const CallstackDesc& desc) +{ + callstackCollector.Add(desc); + return true; +} +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +void Core::Activate(Mode::Type mode) +{ + if (mode != currentMode) + { + Mode::Type prevMode = currentMode; + currentMode = mode; + + { + std::lock_guard<std::recursive_mutex> lock(threadsLock); + for(auto it = threads.begin(); it != threads.end(); ++it) + { + ThreadEntry* entry = *it; + entry->Activate(mode); + } + } + + + if (mode != Mode::OFF) + { + CaptureStatus::Type status = CaptureStatus::ERR_TRACER_FAILED; + + if (tracer && (mode & Mode::TRACER)) + { + std::lock_guard<std::recursive_mutex> lock(threadsLock); + tracer->SetPassword(settings.password.c_str()); + status = tracer->Start(mode, settings.samplingFrequency, threads); + // Let's retry with more narrow setup + if (status != CaptureStatus::OK && (mode & Mode::AUTOSAMPLING)) + status = tracer->Start((Mode::Type)(mode & ~Mode::AUTOSAMPLING), settings.samplingFrequency, threads); + } + + if (gpuProfiler && (mode & Mode::GPU)) + gpuProfiler->Start(mode); + + SendHandshakeResponse(status); + } + else + { + if (tracer) + tracer->Stop(); + + if (gpuProfiler) + gpuProfiler->Stop(prevMode); + } + } +} +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +void Core::DumpCapturingProgress() +{ + stringstream stream; + + if (currentMode != Mode::OFF) + { + size_t memUsedKb = Memory::GetAllocatedSize() >> 10; + float memUsedMb = memUsedKb / 1024.0f; + // VS TODO: Format to 3 digits + stream << "Capturing Frame " << (uint32)frames.size() << "..." << std::endl << "Memory Used: " << memUsedMb << " Mb"; + } + + DumpProgress(stream.str().c_str()); +} +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +bool Core::IsTimeToReportProgress() const +{ + return GetTimeMilliSeconds() > progressReportedLastTimestampMS + 200; +} +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +void Core::SendHandshakeResponse(CaptureStatus::Type status) +{ + OutputDataStream stream; + stream << (uint32)status; + stream << Platform::GetName(); + stream << Server::Get().GetHostName(); + Server::Get().Send(DataResponse::Handshake, stream); +} +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +bool Core::IsRegistredThread(ThreadID id) +{ + std::lock_guard<std::recursive_mutex> lock(threadsLock); + + for (ThreadList::iterator it = threads.begin(); it != threads.end(); ++it) + { + ThreadEntry* entry = *it; + if (entry->description.threadID == id) + { + return true; + } + } + return false; +} + + +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +ThreadEntry* Core::RegisterThread(const ThreadDescription& description, EventStorage** slot) +{ + std::lock_guard<std::recursive_mutex> lock(threadsLock); + + ThreadEntry* entry = Memory::New<ThreadEntry>(description, slot); + threads.push_back(entry); + + if ((currentMode != Mode::OFF) && slot != nullptr) + *slot = &entry->storage; + + return entry; +} +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +bool Core::UnRegisterThread(ThreadID threadID, bool keepAlive) +{ + std::lock_guard<std::recursive_mutex> lock(threadsLock); + + for (ThreadList::iterator it = threads.begin(); it != threads.end(); ++it) + { + ThreadEntry* entry = *it; + if (entry->description.threadID == threadID && entry->isAlive) + { + if ((currentMode == Mode::OFF) && !keepAlive) + { + Memory::Delete(entry); + threads.erase(it); + return true; + } + else + { + entry->isAlive = false; + return true; + } + } + } + + return false; +} +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +bool Core::RegisterFiber(const FiberDescription& description, EventStorage** slot) +{ + std::lock_guard<std::recursive_mutex> lock(coreLock); + FiberEntry* entry = Memory::New<FiberEntry>(description); + fibers.push_back(entry); + entry->storage.isFiberStorage = true; + *slot = &entry->storage; + return true; +} +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +bool Core::RegisterProcessDescription(const ProcessDescription& description) +{ + processDescs.push_back(description); + return false; +} +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +bool Core::RegisterThreadDescription(const ThreadDescription& description) +{ + threadDescs.push_back(description); + return false; +} +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +bool Core::SetStateChangedCallback(StateCallback cb) +{ + stateCallback = cb; + return stateCallback != nullptr; +} +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +bool Core::AttachSummary(const char* key, const char* value) +{ + summary.push_back(make_pair(string(key), string(value))); + return true; +} +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +bool Core::AttachFile(File::Type type, const char* name, const uint8_t* data, uint32_t size) +{ + if (size > 0) + { + attachments.push_back(Attachment(type, name)); + Attachment& attachment = attachments.back(); + attachment.data.resize(size); + memcpy(&attachment.data[0], data, size); + return true; + } + return false; +} +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +bool Core::AttachFile(File::Type type, const char* name, std::istream& stream) +{ + std::streampos beg = stream.tellg(); + stream.seekg(0, std::ios::end); + std::streampos end = stream.tellg(); + stream.seekg(beg, std::ios::beg); + + size_t size =(size_t)(end - beg); + void* buffer = Memory::Alloc(size); + + stream.read((char*)buffer, size); + bool result = AttachFile(type, name, (uint8*)buffer, (uint32_t)size); + + Memory::Free(buffer); + return result; + +} +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +bool Core::AttachFile(File::Type type, const char* name, const char* path) +{ + std::ifstream stream(path, std::ios::binary); + return AttachFile(type, name, stream); +} +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +bool Core::AttachFile(File::Type type, const char* name, const wchar_t* path) +{ +#if defined(OPTICK_MSVC) + std::ifstream stream(path, std::ios::binary); + return AttachFile(type, name, stream); +#else + char p[256] = { 0 }; + wcstombs(p, path, sizeof(p)); + std::ifstream stream(p, std::ios::binary); + return AttachFile(type, name, stream); +#endif +} +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +void Core::InitGPUProfiler(GPUProfiler* profiler) +{ + OPTICK_ASSERT(gpuProfiler == nullptr, "Can't reinitialize GPU profiler! Not supported yet!"); + Memory::Delete<GPUProfiler>(gpuProfiler); + gpuProfiler = profiler; +} +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +bool Core::SetSettings(const CaptureSettings& captureSettings) +{ + settings = captureSettings; + + //if (tracer) + //{ + // string decoded = base64_decode(encodedPassword); + // tracer->SetPassword(decoded.c_str()); + // return true; + //} + return false; +} +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +const EventDescription* Core::GetFrameDescription(FrameType::Type frame) const +{ + return frameDescriptions[frame]; +} +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +Core::~Core() +{ + std::lock_guard<std::recursive_mutex> lock(threadsLock); + + for (ThreadList::iterator it = threads.begin(); it != threads.end(); ++it) + { + Memory::Delete(*it); + } + threads.clear(); + + for (FiberList::iterator it = fibers.begin(); it != fibers.end(); ++it) + { + Memory::Delete(*it); + } + fibers.clear(); +} +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +const vector<ThreadEntry*>& Core::GetThreads() const +{ + return threads; +} +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +OPTICK_THREAD_LOCAL EventStorage* Core::storage = nullptr; +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +ScopeHeader::ScopeHeader() : boardNumber(0), threadNumber(0) +{ + +} +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +OutputDataStream& operator<<(OutputDataStream& stream, const ScopeHeader& header) +{ + return stream << header.boardNumber << header.threadNumber << header.fiberNumber << header.event; +} +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +OutputDataStream& operator<<(OutputDataStream& stream, const ScopeData& ob) +{ + return stream << ob.header << ob.categories << ob.events; +} +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +OutputDataStream& operator<<(OutputDataStream& stream, const ThreadDescription& description) +{ + return stream << description.threadID << description.processID << description.name << description.maxDepth << description.priority << description.mask; +} +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +OutputDataStream& operator<<(OutputDataStream& stream, const ThreadEntry* entry) +{ + return stream << entry->description; +} +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +OutputDataStream& operator<<(OutputDataStream& stream, const FiberDescription& description) +{ + return stream << description.id; +} +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +OutputDataStream& operator<<(OutputDataStream& stream, const FiberEntry* entry) +{ + return stream << entry->description; +} +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +OutputDataStream& operator<<(OutputDataStream& stream, const ProcessDescription& description) +{ + return stream << description.processID << description.name << description.uniqueKey; +} +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +OPTICK_API bool SetStateChangedCallback(StateCallback cb) +{ + return Core::Get().SetStateChangedCallback(cb); +} +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +OPTICK_API bool AttachSummary(const char* key, const char* value) +{ + return Core::Get().AttachSummary(key, value); +} +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +OPTICK_API bool AttachFile(File::Type type, const char* name, const uint8_t* data, uint32_t size) +{ + return Core::Get().AttachFile(type, name, data, size); +} +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +OPTICK_API bool AttachFile(File::Type type, const char* name, const char* path) +{ + return Core::Get().AttachFile(type, name, path); +} +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +OPTICK_API bool AttachFile(File::Type type, const char* name, const wchar_t* path) +{ + return Core::Get().AttachFile(type, name, path); +} +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +OutputDataStream& operator<<(OutputDataStream& stream, const Point& ob) +{ + return stream << ob.x << ob.y << ob.z; +} +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +OPTICK_API uint32_t NextFrame() +{ + return Core::NextFrame(); +} +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +OPTICK_API bool IsActive(Mode::Type mode /*= Mode::INSTRUMENTATION_EVENTS*/) +{ + return (Core::Get().currentMode & mode) != 0; +} +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +OPTICK_API EventStorage** GetEventStorageSlotForCurrentThread() +{ + return &Core::Get().storage; +} +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +OPTICK_API bool IsFiberStorage(EventStorage* fiberStorage) +{ + return fiberStorage->isFiberStorage; +} +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +OPTICK_API bool RegisterThread(const char* name) +{ + return Core::Get().RegisterThread(ThreadDescription(name, Platform::GetThreadID(), Platform::GetProcessID()), &Core::storage) != nullptr; +} +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +OPTICK_API bool RegisterThread(const wchar_t* name) +{ + const int THREAD_NAME_LENGTH = 128; + char mbName[THREAD_NAME_LENGTH]; + wcstombs_s(mbName, name, THREAD_NAME_LENGTH); + + return Core::Get().RegisterThread(ThreadDescription(mbName, Platform::GetThreadID(), Platform::GetProcessID()), &Core::storage) != nullptr; +} +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +OPTICK_API bool UnRegisterThread(bool keepAlive) +{ + return Core::Get().UnRegisterThread(Platform::GetThreadID(), keepAlive); +} +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +OPTICK_API bool RegisterFiber(uint64 fiberId, EventStorage** slot) +{ + return Core::Get().RegisterFiber(FiberDescription(fiberId), slot); +} +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +OPTICK_API EventStorage* RegisterStorage(const char* name, uint64_t threadID, ThreadMask::Type type) +{ + ThreadEntry* entry = Core::Get().RegisterThread(ThreadDescription(name, threadID, Platform::GetProcessID(), 1, 0, type), nullptr); + return entry ? &entry->storage : nullptr; +} +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +OPTICK_API void GpuFlip(void* swapChain) +{ + if (GPUProfiler* gpuProfiler = Core::Get().gpuProfiler) + gpuProfiler->Flip(swapChain); +} +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +OPTICK_API GPUContext SetGpuContext(GPUContext context) +{ + if (EventStorage* storage = Core::storage) + { + GPUContext prevContext = storage->gpuStorage.context; + storage->gpuStorage.context = context; + return prevContext; + } + return GPUContext(); +} +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +OPTICK_API const EventDescription* GetFrameDescription(FrameType::Type frame) +{ + return Core::Get().GetFrameDescription(frame); + +} +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +EventStorage::EventStorage(): currentMode(Mode::OFF), pushPopEventStackIndex(0), isFiberStorage(false) +{ + +} +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +void ThreadEntry::Activate(Mode::Type mode) +{ + if (!isAlive) + return; + + if (mode != Mode::OFF) + storage.Clear(true); + + if (threadTLS != nullptr) + { + storage.currentMode = mode; + *threadTLS = mode != Mode::OFF ? &storage : nullptr; + } +} +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +void ThreadEntry::Sort() +{ + SortMemoryPool(storage.eventBuffer); +} +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +bool IsSleepOnlyScope(const ScopeData& scope) +{ + //if (!scope.categories.empty()) + // return false; + + const vector<EventData>& events = scope.events; + for(auto it = events.begin(); it != events.end(); ++it) + { + const EventData& data = *it; + + if (data.description->color != Color::White) + { + return false; + } + } + + return true; +} +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +void ScopeData::Send() +{ + if (!events.empty() || !categories.empty()) + { + if (!IsSleepOnlyScope(*this)) + { + OutputDataStream frameStream; + frameStream << *this; + Server::Get().Send(DataResponse::EventFrame, frameStream); + } + } + + Clear(); +} +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +void ScopeData::Clear() +{ + events.clear(); + categories.clear(); +} +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +void EventStorage::GPUStorage::Clear(bool preserveMemory) +{ + for (size_t i = 0; i < gpuBuffer.size(); ++i) + for (int j = 0; j < GPU_QUEUE_COUNT; ++j) + gpuBuffer[i][j].Clear(preserveMemory); +} +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +EventData* EventStorage::GPUStorage::Start(const EventDescription &desc) +{ + if (GPUProfiler* gpuProfiler = Core::Get().gpuProfiler) + { + EventData& result = gpuBuffer[context.node][context.queue].Add(); + result.description = &desc; + result.start = EventTime::INVALID_TIMESTAMP; + result.finish = EventTime::INVALID_TIMESTAMP; + gpuProfiler->QueryTimestamp(context.cmdBuffer, &result.start); + return &result; + } + return nullptr; +} +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +void EventStorage::GPUStorage::Stop(EventData& data) +{ + if (GPUProfiler* gpuProfiler = Core::Get().gpuProfiler) + { + gpuProfiler->QueryTimestamp(context.cmdBuffer, &data.finish); + } +} +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +} + +#endif //USE_OPTICK
\ No newline at end of file diff --git a/external/optick/optick_core.h b/external/optick/optick_core.h new file mode 100644 index 0000000..9ddf46b --- /dev/null +++ b/external/optick/optick_core.h @@ -0,0 +1,568 @@ +#pragma once +#include "optick.config.h" + +#if USE_OPTICK + +#include <mutex> +#include <thread> + +#include "optick_common.h" + +#include "optick_memory.h" +#include "optick_message.h" +#include "optick_serialization.h" + +#include "optick_gpu.h" + +#include <atomic> + +// We expect to have 1k unique strings going through Optick at once +// The chances to hit a collision are 1 in 10 trillion (odds of a meteor landing on your house) +// We should be quite safe here :) +// https://preshing.com/20110504/hash-collision-probabilities/ +// Feel free to add a seed and wait for another strike if armageddon starts +namespace Optick +{ + struct StringHash + { + uint64 hash; + + StringHash(size_t h) : hash(h) {} + StringHash(const char* str) : hash(CalcHash(str)) {} + + bool operator==(const StringHash& other) const { return hash == other.hash; } + bool operator<(const StringHash& other) const { return hash < other.hash; } + + static uint64 CalcHash(const char* str); + }; +} + +// Overriding default hash function to return hash value directly +namespace std +{ + template<> + struct hash<Optick::StringHash> + { + size_t operator()(const Optick::StringHash& x) const + { + return (size_t)x.hash; + } + }; +} + +namespace Optick +{ +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +struct Trace; +struct SymbolEngine; +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +struct ScopeHeader +{ + EventTime event; + uint32 boardNumber; + int32 threadNumber; + int32 fiberNumber; + + ScopeHeader(); +}; +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +OutputDataStream& operator << ( OutputDataStream& stream, const ScopeHeader& ob); +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +struct ScopeData +{ + ScopeHeader header; + vector<EventData> categories; + vector<EventData> events; + + void AddEvent(const EventData& data) + { + events.push_back(data); + if (data.description->color != Color::Null) + { + categories.push_back(data); + } + } + + void InitRootEvent(const EventData& data) + { + header.event = data; + AddEvent(data); + } + + void Send(); + void Clear(); +}; +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +#if defined(OPTICK_MSVC) +#pragma warning( push ) +#pragma warning( disable : 4996 ) +#endif //OPTICK_MSVC +template<int N> +struct OptickString +{ + char data[N]; + OptickString() {} + OptickString<N>& operator=(const char* text) { strncpy(data, text ? text : "null", N - 1); data[N - 1] = 0; return *this; } + OptickString(const char* text) { *this = text; } +}; +#if defined(OPTICK_MSVC) +#pragma warning( pop ) +#endif +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +struct Point +{ + float x, y, z; + Point() {} + Point(float _x, float _y, float _z) : x(_x), y(_y), z(_z) {} + Point(float pos[3]) : x(pos[0]), y(pos[1]), z(pos[2]) {} +}; +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +template<int N> +OutputDataStream& operator<<(OutputDataStream &stream, const OptickString<N>& ob) +{ + size_t length = strnlen(ob.data, N); + stream << (uint32)length; + return stream.Write(ob.data, length); +} +OutputDataStream& operator<<(OutputDataStream& stream, const Point& ob); +OutputDataStream& operator<<(OutputDataStream& stream, const ScopeData& ob); +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +typedef MemoryPool<EventData, 1024> EventBuffer; +typedef MemoryPool<const EventData*, 32> CategoryBuffer; +typedef MemoryPool<SyncData, 1024> SynchronizationBuffer; +typedef MemoryPool<FiberSyncData, 1024> FiberSyncBuffer; +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +typedef OptickString<32> ShortString; +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +typedef TagData<float> TagFloat; +typedef TagData<int32> TagS32; +typedef TagData<uint32> TagU32; +typedef TagData<uint64> TagU64; +typedef TagData<Point> TagPoint; +typedef TagData<ShortString> TagString; +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +typedef MemoryPool<TagFloat, 1024> TagFloatBuffer; +typedef MemoryPool<TagS32, 1024> TagS32Buffer; +typedef MemoryPool<TagU32, 1024> TagU32Buffer; +typedef MemoryPool<TagU64, 1024> TagU64Buffer; +typedef MemoryPool<TagPoint, 64> TagPointBuffer; +typedef MemoryPool<TagString, 1024> TagStringBuffer; +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + + +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +// Base64 +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +string base64_decode(string const& encoded_string); +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + + +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +// Board +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +typedef MemoryPool<EventDescription, 4096> EventDescriptionList; +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +class EventDescriptionBoard +{ + // List of stored Event Descriptions + EventDescriptionList boardDescriptions; + + // Shared Descriptions + typedef unordered_map<StringHash, EventDescription*> DescriptionMap; + DescriptionMap sharedDescriptions; + MemoryBuffer<64 * 1024> sharedNames; + std::mutex sharedLock; + +public: + EventDescription* CreateDescription(const char* name, const char* file = nullptr, uint32_t line = 0, uint32_t color = Color::Null, uint32_t filter = 0); + EventDescription* CreateSharedDescription(const char* name, const char* file = nullptr, uint32_t line = 0, uint32_t color = Color::Null, uint32_t filter = 0); + + static EventDescriptionBoard& Get(); + + const EventDescriptionList& GetEvents() const; + + friend OutputDataStream& operator << (OutputDataStream& stream, const EventDescriptionBoard& ob); +}; +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +struct EventStorage +{ + Mode::Type currentMode; + EventBuffer eventBuffer; + FiberSyncBuffer fiberSyncBuffer; + + TagFloatBuffer tagFloatBuffer; + TagS32Buffer tagS32Buffer; + TagU32Buffer tagU32Buffer; + TagU64Buffer tagU64Buffer; + TagPointBuffer tagPointBuffer; + TagStringBuffer tagStringBuffer; + + struct GPUStorage + { + static const int MAX_GPU_NODES = 2; + array<array<EventBuffer, GPU_QUEUE_COUNT>, MAX_GPU_NODES> gpuBuffer; + GPUContext context; + + void Clear(bool preserveMemory); + + EventData* Start(const EventDescription& desc); + void Stop(EventData& data); + }; + GPUStorage gpuStorage; + + uint32 pushPopEventStackIndex; + array<EventData*, 32> pushPopEventStack; + + bool isFiberStorage; + + EventStorage(); + + OPTICK_INLINE EventData& NextEvent() + { + return eventBuffer.Add(); + } + + // Free all temporary memory + void Clear(bool preserveContent) + { + currentMode = Mode::OFF; + eventBuffer.Clear(preserveContent); + fiberSyncBuffer.Clear(preserveContent); + gpuStorage.Clear(preserveContent); + ClearTags(preserveContent); + + while (pushPopEventStackIndex) + { + pushPopEventStack[--pushPopEventStackIndex] = nullptr; + } + } + + void ClearTags(bool preserveContent) + { + tagFloatBuffer.Clear(preserveContent); + tagS32Buffer.Clear(preserveContent); + tagU32Buffer.Clear(preserveContent); + tagU64Buffer.Clear(preserveContent); + tagPointBuffer.Clear(preserveContent); + tagStringBuffer.Clear(preserveContent); + } + + void Reset() + { + Clear(true); + } +}; +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +struct ProcessDescription +{ + string name; + ProcessID processID; + uint64 uniqueKey; + ProcessDescription(const char* processName, ProcessID pid, uint64 key); +}; +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +struct ThreadDescription +{ + string name; + ThreadID threadID; + ProcessID processID; + int32 maxDepth; + int32 priority; + uint32 mask; + + ThreadDescription(const char* threadName, ThreadID tid, ProcessID pid, int32 maxDepth = 1, int32 priority = 0, uint32 mask = 0); +}; +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +struct FiberDescription +{ + uint64 id; + + FiberDescription(uint64 _id) + : id(_id) + {} +}; +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +struct ThreadEntry +{ + ThreadDescription description; + EventStorage storage; + EventStorage** threadTLS; + + bool isAlive; + + ThreadEntry(const ThreadDescription& desc, EventStorage** tls) : description(desc), threadTLS(tls), isAlive(true) {} + void Activate(Mode::Type mode); + void Sort(); +}; + +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +struct FiberEntry +{ + FiberDescription description; + EventStorage storage; + + FiberEntry(const FiberDescription& desc) : description(desc) {} +}; + + +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +typedef vector<ThreadEntry*> ThreadList; +typedef vector<FiberEntry*> FiberList; + +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +struct SysCallData : EventData +{ + uint64 id; + uint64 threadID; +}; +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +OutputDataStream &operator << (OutputDataStream &stream, const SysCallData &ob); +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +class SysCallCollector +{ + typedef MemoryPool<SysCallData, 1024 * 32> SysCallPool; +public: + SysCallPool syscallPool; + + SysCallData& Add(); + void Clear(); + + bool Serialize(OutputDataStream& stream); +}; +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +struct CallstackDesc +{ + uint64 threadID; + uint64 timestamp; + uint64* callstack; + uint8 count; +}; +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +class CallstackCollector +{ + // Packed callstack list: {ThreadID, Timestamp, Count, Callstack[Count]} + typedef MemoryPool<uint64, 1024 * 32> CallstacksPool; + CallstacksPool callstacksPool; +public: + void Add(const CallstackDesc& desc); + void Clear(); + + bool SerializeModules(OutputDataStream& stream); + bool SerializeSymbols(OutputDataStream& stream); + bool SerializeCallstacks(OutputDataStream& stream); + + bool IsEmpty() const; +}; +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +struct SwitchContextDesc +{ + int64_t timestamp; + uint64 oldThreadId; + uint64 newThreadId; + uint8 cpuId; + uint8 reason; +}; +////////////////////////////////////////////////////////////////////////// +OutputDataStream &operator << (OutputDataStream &stream, const SwitchContextDesc &ob); +////////////////////////////////////////////////////////////////////////// +class SwitchContextCollector +{ + typedef MemoryPool<SwitchContextDesc, 1024 * 32> SwitchContextPool; + SwitchContextPool switchContextPool; +public: + void Add(const SwitchContextDesc& desc); + void Clear(); + bool Serialize(OutputDataStream& stream); +}; +////////////////////////////////////////////////////////////////////////// + + +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +struct CaptureStatus +{ + enum Type + { + OK = 0, + ERR_TRACER_ALREADY_EXISTS = 1, + ERR_TRACER_ACCESS_DENIED = 2, + ERR_TRACER_FAILED = 3, + ERR_TRACER_INVALID_PASSWORD = 4, + }; +}; +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + +class Core +{ + std::recursive_mutex coreLock; + std::recursive_mutex threadsLock; + + ThreadList threads; + FiberList fibers; + + int64 progressReportedLastTimestampMS; + + vector<EventTime> frames; + uint32 boardNumber; + + CallstackCollector callstackCollector; + SwitchContextCollector switchContextCollector; + + vector<std::pair<string, string>> summary; + + std::atomic<uint32_t> frameNumber; + + struct Attachment + { + string name; + vector<uint8_t> data; + File::Type type; + Attachment(File::Type t, const char* n) : name(n), type(t) {} + }; + list<Attachment> attachments; + + StateCallback stateCallback; + + vector<ProcessDescription> processDescs; + vector<ThreadDescription> threadDescs; + + array<const EventDescription*, FrameType::COUNT> frameDescriptions; + + State::Type currentState; + State::Type pendingState; + + CaptureSettings settings; + + void UpdateEvents(); + uint32_t Update(); + bool UpdateState(); + + Core(); + ~Core(); + + static Core notThreadSafeInstance; + + void DumpCapturingProgress(); + void SendHandshakeResponse(CaptureStatus::Type status); + + + void DumpEvents(EventStorage& entry, const EventTime& timeSlice, ScopeData& scope); + void DumpTags(EventStorage& entry, ScopeData& scope); + void DumpThread(ThreadEntry& entry, const EventTime& timeSlice, ScopeData& scope); + void DumpFiber(FiberEntry& entry, const EventTime& timeSlice, ScopeData& scope); + + void CleanupThreadsAndFibers(); + + void DumpBoard(uint32 mode, EventTime timeSlice, uint32 mainThreadIndex); + + void GenerateCommonSummary(); +public: + void Activate(Mode::Type mode); + volatile Mode::Type currentMode; + + // Active Frame (is used as buffer) + static OPTICK_THREAD_LOCAL EventStorage* storage; + + // Resolves symbols + SymbolEngine* symbolEngine; + + // Controls GPU activity + // Graphics graphics; + + // System scheduler trace + Trace* tracer; + + // SysCall Collector + SysCallCollector syscallCollector; + + // GPU Profiler + GPUProfiler* gpuProfiler; + + // Returns thread collection + const vector<ThreadEntry*>& GetThreads() const; + + // Request to start a new capture + void StartCapture(); + + // Request to stop an active capture + void StopCapture(); + + // Request to stop an active capture + void CancelCapture(); + + // Requests to dump current capture + void DumpCapture(); + + // Report switch context event + bool ReportSwitchContext(const SwitchContextDesc& desc); + + // Report switch context event + bool ReportStackWalk(const CallstackDesc& desc); + + // Serialize and send current profiling progress + void DumpProgress(const char* message = ""); + + // Too much time from last report + bool IsTimeToReportProgress() const; + + // Serialize and send frames + void DumpFrames(uint32 mode = Mode::DEFAULT); + + // Serialize and send frames + void DumpSummary(); + + // Registers thread and create EventStorage + ThreadEntry* RegisterThread(const ThreadDescription& description, EventStorage** slot); + + // UnRegisters thread + bool UnRegisterThread(ThreadID threadId, bool keepAlive = false); + + // Check is registered thread + bool IsRegistredThread(ThreadID id); + + // Registers finer and create EventStorage + bool RegisterFiber(const FiberDescription& description, EventStorage** slot); + + // Registers ProcessDescription + bool RegisterProcessDescription(const ProcessDescription& description); + + // Registers ThreaDescription (used for threads from other processes) + bool RegisterThreadDescription(const ThreadDescription& description); + + // Sets state change callback + bool SetStateChangedCallback(StateCallback cb); + + // Attaches a key-value pair to the next capture + bool AttachSummary(const char* key, const char* value); + + // Attaches a screenshot to the current capture + bool AttachFile(File::Type type, const char* name, const uint8_t* data, uint32_t size); + bool AttachFile(File::Type type, const char* name, std::istream& stream); + bool AttachFile(File::Type type, const char* name, const char* path); + bool AttachFile(File::Type type, const char* name, const wchar_t* path); + + // Initalizes GPU profiler + void InitGPUProfiler(GPUProfiler* profiler); + + // Initializes root password for the device + bool SetSettings(const CaptureSettings& settings); + + // Current Frame Number (since the game started) + uint32_t GetCurrentFrame() const { return frameNumber; } + + // Returns Frame Description + const EventDescription* GetFrameDescription(FrameType::Type frame) const; + + // NOT Thread Safe singleton (performance) + static Core& Get(); + + // Main Update Function + static uint32_t NextFrame() { return Get().Update(); } +}; +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +} + +#endif //USE_OPTICK
\ No newline at end of file diff --git a/external/optick/optick_core.linux.h b/external/optick/optick_core.linux.h new file mode 100644 index 0000000..e0f4b49 --- /dev/null +++ b/external/optick/optick_core.linux.h @@ -0,0 +1,410 @@ +#pragma once +#if defined(__linux__) + +#include "optick.config.h" +#if USE_OPTICK + +#include "optick_core.platform.h" + +#include <sys/syscall.h> +#include <sys/time.h> +#include <sys/types.h> +#include <pthread.h> +#include <unistd.h> + +namespace Optick +{ + const char* Platform::GetName() + { + return "Linux"; + } + + ThreadID Platform::GetThreadID() + { + return syscall(SYS_gettid); + } + + ProcessID Platform::GetProcessID() + { + return (ProcessID)getpid(); + } + + int64 Platform::GetFrequency() + { + return 1000000000; + } + + int64 Platform::GetTime() + { + struct timespec ts; + clock_gettime(CLOCK_MONOTONIC, &ts); + return ts.tv_sec * 1000000000LL + ts.tv_nsec; + } +} + +#if OPTICK_ENABLE_TRACING + +#include "optick_memory.h" + +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +namespace ft +{ + //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + struct base_event + { + int64_t timestamp; + short common_type; + uint8_t cpu_id; + base_event(short type) : timestamp(-1), common_type(type), cpu_id(uint8_t(-1)) {} +}; + //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + template<short TYPE> + struct event : public base_event + { + static const short type = TYPE; + event() : base_event(TYPE) {} + }; + //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + struct process_state + { + enum type + { + Unknown, + //D Uninterruptible sleep(usually IO) + UninterruptibleSleep, + //R Running or runnable(on run queue) + Running, + //S Interruptible sleep(waiting for an event to complete) + InterruptibleSleep, + //T Stopped, either by a job control signal or because it is being traced. + Stopped, + //X dead(should never be seen) + Dead, + //Z Defunct(“zombie”) process, terminated but not reaped by its parent. + Zombie, + }; + }; + //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + struct sched_switch : public event<305> + { + char prev_comm[16]; + pid_t prev_pid; + int prev_prio; + process_state::type prev_state; + char next_comm[16]; + pid_t next_pid; + int next_prio; + }; + //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +} // namespace ft +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + +namespace Optick +{ +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +static const char* KERNEL_TRACING_PATH = "/sys/kernel/debug/tracing"; +static const char* FTRACE_TRACE = "trace"; +static const char* FTRACE_TRACING_ON = "tracing_on"; +static const char* FTRACE_TRACE_CLOCK = "trace_clock"; +static const char* FTRACE_OPTIONS_IRQ_INFO = "options/irq-info"; +static const char* FTRACE_SCHED_SWITCH = "events/sched/sched_switch/enable"; +static const uint8_t PROCESS_STATE_REASON_START = 38; +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +class FTrace : public Trace +{ + bool isActive; + string password; + + bool Parse(const char* line); + bool ProcessEvent(const ft::base_event& ev); + + void Set(const char* name, bool value); + void Set(const char* name, const char* value); + void Exec(const char* cmd); +public: + + FTrace(); + ~FTrace(); + + virtual void SetPassword(const char* pwd) override { password = pwd; } + virtual CaptureStatus::Type Start(Mode::Type mode, int frequency, const ThreadList& threads) override; + virtual bool Stop() override; +}; +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +FTrace g_FTrace; +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +struct Parser +{ + const char* cursor; + const char* finish; + size_t length; + + Parser(const char* b) : cursor(b), finish(b + strlen(b)) {} + + bool Skip(size_t count) + { + if ((size_t)(finish - cursor) > count) + { + cursor += count; + return true; + } + return false; + } + + bool Skip(const char* text, char* output = nullptr, size_t size = 0) + { + if (const char* ptr = strstr(cursor, text)) + { + if (output != nullptr) + { + size_t count = std::min(size - 1, (size_t)(ptr - cursor)); + strncpy(output, cursor, count); + output[count] = '\0'; + } + cursor = ptr + strlen(text); + return true; + } + return false; + } + + void SkipSpaces() + { + while (cursor != finish && (*cursor == ' ' || *cursor == '\t' || *cursor == '\n')) + ++cursor; + } + + bool Starts(const char* text) const + { + return strncmp(cursor, text, strlen(text)) == 0; + } + + int GetInt() const + { + return atoi(cursor); + } + + char GetChar() const + { + return *cursor; + } +}; +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +CaptureStatus::Type FTrace::Start(Mode::Type mode, int /*frequency*/, const ThreadList& /*threads*/) +{ + if (!isActive) + { + // Disable tracing + Set(FTRACE_TRACING_ON, false); + // Cleanup old data + Set(FTRACE_TRACE, ""); + // Set clock type + Set(FTRACE_TRACE_CLOCK, "mono"); + // Disable irq info + Set(FTRACE_OPTIONS_IRQ_INFO, false); + // Enable switch events + Set(FTRACE_SCHED_SWITCH, (mode & Mode::SWITCH_CONTEXT) != 0); + + // Enable tracing + Set(FTRACE_TRACING_ON, true); + + isActive = true; + } + + return CaptureStatus::OK; +} +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +bool FTrace::Stop() +{ + if (!isActive) + { + return false; + } + + // Reset variables + Set(FTRACE_TRACING_ON, false); + Set(FTRACE_SCHED_SWITCH, false); + + // Parsing the output + char buffer[256] = { 0 }; + sprintf_s(buffer, "echo \'%s\' | sudo -S sh -c \'cat %s/%s\'", password.c_str(), KERNEL_TRACING_PATH, FTRACE_TRACE); + if (FILE* pipe = popen(buffer, "r")) + { + char* line = NULL; + size_t len = 0; + while ((getline(&line, &len, pipe)) != -1) + Parse(line); + fclose(pipe); + } + + // Cleanup data + Set(FTRACE_TRACE, ""); + + isActive = false; + + return true; +} +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +bool FTrace::Parse(const char * line) +{ + // sched_switch: + // ConsoleApp-8687 [000] 181944.352057: sched_switch: prev_comm=ConsoleApp prev_pid=8687 prev_prio=120 prev_state=S ==> next_comm=ConsoleApp next_pid=8686 next_prio=120 + + Parser p(line); + if (p.Starts("#")) + return true; + + if (!p.Skip(16)) + return false; + + if (!p.Skip("[")) + return false; + + int cpu = p.GetInt(); + if (!p.Skip("]")) + return false; + + int64 timestampInt = p.GetInt(); + if (!p.Skip(".")) + return false; + + int64 timestampFraq = p.GetInt(); + if (!p.Skip(": ")) + return false; + + int64 timestamp = ((timestampInt * 1000000) + timestampFraq) * 1000; + + if (p.Starts("sched_switch:")) + { + ft::sched_switch ev; + ev.cpu_id = cpu; + ev.timestamp = timestamp; + + if (!p.Skip("prev_comm=")) + return false; + + if (!p.Skip(" prev_pid=", ev.prev_comm, OPTICK_ARRAY_SIZE(ev.prev_comm))) + return false; + + ev.prev_pid = p.GetInt(); + + if (!p.Skip(" prev_prio=")) + return false; + + ev.prev_prio = p.GetInt(); + + if (!p.Skip(" prev_state=")) + return false; + + switch (p.GetChar()) + { + case 'D': + ev.prev_state = ft::process_state::UninterruptibleSleep; + break; + + case 'R': + ev.prev_state = ft::process_state::Running; + break; + + case 'S': + ev.prev_state = ft::process_state::InterruptibleSleep; + break; + + case 'T': + ev.prev_state = ft::process_state::Stopped; + break; + + case 'X': + ev.prev_state = ft::process_state::Dead; + break; + + case 'Z': + ev.prev_state = ft::process_state::Zombie; + break; + + default: + ev.prev_state = ft::process_state::Unknown; + break; + } + + if (!p.Skip("==> next_comm=")) + return false; + + if (!p.Skip(" next_pid=", ev.next_comm, OPTICK_ARRAY_SIZE(ev.prev_comm))) + return false; + + ev.next_pid = p.GetInt(); + + if (!p.Skip(" next_prio=")) + return false; + + ev.next_prio = p.GetInt(); + + return ProcessEvent(ev); + } + return true; +} +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +bool FTrace::ProcessEvent(const ft::base_event& ev) +{ + switch (ev.common_type) + { + case ft::sched_switch::type: + { + const ft::sched_switch& switchEv = (const ft::sched_switch&)ev; + SwitchContextDesc desc; + desc.reason = switchEv.prev_state + PROCESS_STATE_REASON_START; + desc.cpuId = switchEv.cpu_id; + desc.oldThreadId = (uint64)switchEv.prev_pid; + desc.newThreadId = (uint64)switchEv.next_pid; + desc.timestamp = switchEv.timestamp; + Core::Get().ReportSwitchContext(desc); + return true; + } + break; + } + + return false; +} +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +void FTrace::Set(const char * name, bool value) +{ + Set(name, value ? "1" : "0"); +} +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +void FTrace::Set(const char* name, const char* value) +{ + char buffer[256] = { 0 }; + sprintf_s(buffer, "echo %s > %s/%s", value, KERNEL_TRACING_PATH, name); + Exec(buffer); +} +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +void FTrace::Exec(const char* cmd) +{ + char buffer[256] = { 0 }; + sprintf_s(buffer, "echo \'%s\' | sudo -S sh -c \'%s\'", password.c_str(), cmd); + std::system(buffer); +} +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +FTrace::FTrace() : isActive(false) +{ +} +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +FTrace::~FTrace() +{ + Stop(); +} +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +Trace* Platform::GetTrace() +{ + return &g_FTrace; +} +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +SymbolEngine* Platform::GetSymbolEngine() +{ + return nullptr; +} +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +} +#endif //OPTICK_ENABLE_TRACING +#endif //USE_OPTICK +#endif //__linux__
\ No newline at end of file diff --git a/external/optick/optick_core.macos.h b/external/optick/optick_core.macos.h new file mode 100644 index 0000000..3d19bfd --- /dev/null +++ b/external/optick/optick_core.macos.h @@ -0,0 +1,289 @@ +#pragma once +#if defined(__APPLE_CC__) + +#include "optick.config.h" +#if USE_OPTICK + +#include "optick_core.platform.h" + +#include <mach/mach_time.h> +#include <sys/time.h> +#include <sys/types.h> +#include <pthread.h> +#include <unistd.h> + +namespace Optick +{ + const char* Platform::GetName() + { + return "MacOS"; + } + + ThreadID Platform::GetThreadID() + { + uint64_t tid; + pthread_threadid_np(pthread_self(), &tid); + return tid; + } + + ProcessID Platform::GetProcessID() + { + return (ProcessID)getpid(); + } + + int64 Platform::GetFrequency() + { + return 1000000000; + } + + int64 Platform::GetTime() + { + struct timespec ts; + clock_gettime(CLOCK_REALTIME, &ts); + return ts.tv_sec * 1000000000LL + ts.tv_nsec; + } +} + +#if OPTICK_ENABLE_TRACING + +#include "optick_core.h" + +namespace Optick +{ +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +class DTrace : public Trace +{ + static const bool isSilent = true; + + std::thread processThread; + string password; + + enum State + { + STATE_IDLE, + STATE_RUNNING, + STATE_ABORT, + }; + + volatile State state; + volatile int64 timeout; + + struct CoreState + { + ProcessID pid; + ThreadID tid; + int prio; + bool IsValid() const { return tid != INVALID_THREAD_ID; } + CoreState() : pid(INVALID_PROCESS_ID), tid(INVALID_THREAD_ID), prio(0) {} + }; + static const int MAX_CPU_CORES = 256; + array<CoreState, MAX_CPU_CORES> cores; + + static void AsyncProcess(DTrace* trace); + void Process(); + + bool CheckRootAccess(); + + enum ParseResult + { + PARSE_OK, + PARSE_TIMEOUT, + PARSE_FAILED, + }; + ParseResult Parse(const char* line); +public: + + DTrace(); + + virtual void SetPassword(const char* pwd) override { password = pwd; } + virtual CaptureStatus::Type Start(Mode::Type mode, int frequency, const ThreadList& threads) override; + virtual bool Stop() override; +}; +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +DTrace g_DTrace; +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +DTrace::DTrace() : state(STATE_IDLE), timeout(0) +{ +} +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +bool DTrace::CheckRootAccess() +{ + char cmd[256] = { 0 }; + sprintf_s(cmd, "echo \'%s\' | sudo -S echo %s", password.c_str(), isSilent ? "2> /dev/null" : ""); + return system(cmd) == 0; +} +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +CaptureStatus::Type DTrace::Start(Mode::Type mode, int /*frequency*/, const ThreadList& /*threads*/) +{ + if (state == STATE_IDLE && (mode & Mode::SWITCH_CONTEXT) != 0) + { + if (!CheckRootAccess()) + return CaptureStatus::ERR_TRACER_INVALID_PASSWORD; + + state = STATE_RUNNING; + timeout = INT64_MAX; + cores.fill(CoreState()); + processThread = std::thread(AsyncProcess, this); + } + + return CaptureStatus::OK; +} +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +bool DTrace::Stop() +{ + if (state != STATE_RUNNING) + { + return false; + } + + timeout = Platform::GetTime(); + processThread.join(); + state = STATE_IDLE; + + return true; +} +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +FILE* popen2(const char *program, const char *type, pid_t* outPid) +{ + FILE *iop; + int pdes[2]; + pid_t pid; + if ((*type != 'r' && *type != 'w') || type[1] != '\0') { + errno = EINVAL; + return (NULL); + } + + if (pipe(pdes) < 0) { + return (NULL); + } + + switch (pid = fork()) { + case -1: /* Error. */ + (void)close(pdes[0]); + (void)close(pdes[1]); + return (NULL); + /* NOTREACHED */ + case 0: /* Child. */ + { + if (*type == 'r') { + (void)close(pdes[0]); + if (pdes[1] != STDOUT_FILENO) { + (void)dup2(pdes[1], STDOUT_FILENO); + (void)close(pdes[1]); + } + } + else { + (void)close(pdes[1]); + if (pdes[0] != STDIN_FILENO) { + (void)dup2(pdes[0], STDIN_FILENO); + (void)close(pdes[0]); + } + } + execl("/bin/sh", "sh", "-c", program, NULL); + perror("execl"); + exit(1); + /* NOTREACHED */ + } + } + /* Parent; assume fdopen can't fail. */ + if (*type == 'r') { + iop = fdopen(pdes[0], type); + (void)close(pdes[1]); + } + else { + iop = fdopen(pdes[1], type); + (void)close(pdes[0]); + } + + if (outPid) + *outPid = pid; + + return (iop); +} +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +void DTrace::Process() +{ + const char* command = "dtrace -n fbt::thread_dispatch:return'\\''{printf(\"@%d %d %d %d\", pid, tid, curthread->sched_pri, walltimestamp)}'\\''"; + + char buffer[256] = { 0 }; + sprintf_s(buffer, "echo \'%s\' | sudo -S sh -c \'%s\' %s", password.c_str(), command, isSilent ? "2> /dev/null" : ""); + pid_t pid; + if (FILE* pipe = popen2(buffer, "r", &pid)) + { + char* line = NULL; + size_t len = 0; + while (state == STATE_RUNNING && (getline(&line, &len, pipe)) != -1) + { + if (Parse(line) == PARSE_TIMEOUT) + break; + } + fclose(pipe); + + int internal_stat; + waitpid(pid, &internal_stat, 0); + } + else + { + OPTICK_FAILED("Failed to open communication pipe!"); + } +} +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +DTrace::ParseResult DTrace::Parse(const char* line) +{ + if (const char* cmd = strchr(line, '@')) + { + int cpu = atoi(line); + + CoreState currState; + + currState.pid = atoi(cmd + 1); + cmd = strchr(cmd, ' ') + 1; + + currState.tid = atoi(cmd); + cmd = strchr(cmd, ' ') + 1; + + currState.prio = atoi(cmd); + cmd = strchr(cmd, ' ') + 1; + + int64_t timestamp = (int64_t)atoll(cmd); + + if (timestamp > timeout) + return PARSE_TIMEOUT; + + const CoreState& prevState = cores[cpu]; + + if (prevState.IsValid()) + { + SwitchContextDesc desc; + desc.reason = 0; + desc.cpuId = cpu; + desc.oldThreadId = prevState.tid; + desc.newThreadId = currState.tid; + desc.timestamp = timestamp; + Core::Get().ReportSwitchContext(desc); + } + + cores[cpu] = currState; + } + return PARSE_FAILED; +} +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +void DTrace::AsyncProcess(DTrace *trace) { + trace->Process(); +} + +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +Trace* Platform::GetTrace() +{ + return &g_DTrace; +} +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +SymbolEngine* Platform::GetSymbolEngine() +{ + return nullptr; +} +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +} +#endif //OPTICK_ENABLE_TRACING +#endif //USE_OPTICK +#endif //__APPLE_CC__
\ No newline at end of file diff --git a/external/optick/optick_core.platform.h b/external/optick/optick_core.platform.h new file mode 100644 index 0000000..683376d --- /dev/null +++ b/external/optick/optick_core.platform.h @@ -0,0 +1,92 @@ +#pragma once +#include "optick.config.h" + +#if USE_OPTICK + +#include "optick_common.h" +#include "optick_memory.h" + +////////////////////////////////////////////////////////////////////////// +// Platform-specific stuff +////////////////////////////////////////////////////////////////////////// +namespace Optick +{ + struct Trace; + struct Module; + struct Symbol; + struct SymbolEngine; + + //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + // Platform API + //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + struct Platform + { + // Platform Name + static OPTICK_INLINE const char* GetName(); + // Thread ID (system thread id) + static OPTICK_INLINE ThreadID GetThreadID(); + // Process ID + static OPTICK_INLINE ProcessID GetProcessID(); + // CPU Frequency + static OPTICK_INLINE int64 GetFrequency(); + // CPU Time (Ticks) + static OPTICK_INLINE int64 GetTime(); + // System Tracer + static OPTICK_INLINE Trace* GetTrace(); + // Symbol Resolver + static OPTICK_INLINE SymbolEngine* GetSymbolEngine(); + }; + + //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + // Tracing API + //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + struct Trace + { + virtual void SetPassword(const char* /*pwd*/) {}; + virtual CaptureStatus::Type Start(Mode::Type mode, int frequency, const ThreadList& threads) = 0; + virtual bool Stop() = 0; + virtual ~Trace() {}; + }; + + + //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + // Symbol API + //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + + //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + struct Module + { + string path; + void* address; + size_t size; + Module(const char* p, void* a, size_t s) : path(p), address(a), size(s) {} + }; + //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + struct Symbol + { + uint64 address; + uint64 offset; + wstring file; + wstring function; + uint32 line; + Symbol() + : address(0) + , offset(0) + , line(0) + {} + }; + //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + struct SymbolEngine + { + // Get list of loaded modules + virtual const vector<Module>& GetModules() = 0; + + // Get Symbol from address + virtual const Symbol* GetSymbol(uint64 dwAddress) = 0; + + virtual ~SymbolEngine() {}; + }; +} +////////////////////////////////////////////////////////////////////////// + +#endif //USE_OPTICK
\ No newline at end of file diff --git a/external/optick/optick_core.win.h b/external/optick/optick_core.win.h new file mode 100644 index 0000000..0d8a11a --- /dev/null +++ b/external/optick/optick_core.win.h @@ -0,0 +1,1664 @@ +#pragma once +#if defined(_MSC_VER) + +#include "optick.config.h" + +#if USE_OPTICK + +#include "optick_core.platform.h" + +namespace Optick +{ + const char* Platform::GetName() + { + #if defined(OPTICK_PC) + return "Windows"; + #else + return "XBox"; + #endif + } + + ThreadID Platform::GetThreadID() + { + return GetCurrentThreadId(); + } + + ProcessID Platform::GetProcessID() + { + return GetCurrentProcessId(); + } + + int64 Platform::GetFrequency() + { + LARGE_INTEGER frequency; + QueryPerformanceFrequency(&frequency); + return frequency.QuadPart; + } + + int64 Platform::GetTime() + { + LARGE_INTEGER largeInteger; + QueryPerformanceCounter(&largeInteger); + return largeInteger.QuadPart; + } +} + +#if OPTICK_ENABLE_TRACING +#include <psapi.h> +#include "optick_core.h" + +/* +Event Tracing Functions - API +https://msdn.microsoft.com/en-us/library/windows/desktop/aa363795(v=vs.85).aspx +*/ + +#define DECLARE_ETW (!OPTICK_PC) + +#if DECLARE_ETW +// Copied from Windows SDK +#ifndef WMIAPI +#ifndef MIDL_PASS +#ifdef _WMI_SOURCE_ +#define WMIAPI __stdcall +#else +#define WMIAPI DECLSPEC_IMPORT __stdcall +#endif // _WMI_SOURCE +#endif // MIDL_PASS +#endif // WMIAPI +#define INITGUID +#include <guiddef.h> +#if defined(_NTDDK_) || defined(_NTIFS_) || defined(_WMIKM_) +#define _EVNTRACE_KERNEL_MODE +#endif +#if !defined(_EVNTRACE_KERNEL_MODE) +#include <wmistr.h> +#endif + +#if _MSC_VER <= 1600 +#define EVENT_DESCRIPTOR_DEF +#define EVENT_HEADER_DEF +#define EVENT_HEADER_EXTENDED_DATA_ITEM_DEF +#define EVENT_RECORD_DEF +#endif + +#ifndef _TRACEHANDLE_DEFINED +#define _TRACEHANDLE_DEFINED +typedef ULONG64 TRACEHANDLE, *PTRACEHANDLE; +#endif + +// +// EventTraceGuid is used to identify a event tracing session +// +DEFINE_GUID( /* 68fdd900-4a3e-11d1-84f4-0000f80464e3 */ + EventTraceGuid, + 0x68fdd900, + 0x4a3e, + 0x11d1, + 0x84, 0xf4, 0x00, 0x00, 0xf8, 0x04, 0x64, 0xe3 +); + +// +// SystemTraceControlGuid. Used to specify event tracing for kernel +// +DEFINE_GUID( /* 9e814aad-3204-11d2-9a82-006008a86939 */ + SystemTraceControlGuid, + 0x9e814aad, + 0x3204, + 0x11d2, + 0x9a, 0x82, 0x00, 0x60, 0x08, 0xa8, 0x69, 0x39 +); + +// +// EventTraceConfigGuid. Used to report system configuration records +// +DEFINE_GUID( /* 01853a65-418f-4f36-aefc-dc0f1d2fd235 */ + EventTraceConfigGuid, + 0x01853a65, + 0x418f, + 0x4f36, + 0xae, 0xfc, 0xdc, 0x0f, 0x1d, 0x2f, 0xd2, 0x35 +); + +// +// DefaultTraceSecurityGuid. Specifies the default event tracing security +// +DEFINE_GUID( /* 0811c1af-7a07-4a06-82ed-869455cdf713 */ + DefaultTraceSecurityGuid, + 0x0811c1af, + 0x7a07, + 0x4a06, + 0x82, 0xed, 0x86, 0x94, 0x55, 0xcd, 0xf7, 0x13 +); + + +/////////////////////////////////////////////////////////////////////////////// +#define PROCESS_TRACE_MODE_REAL_TIME 0x00000100 +#define PROCESS_TRACE_MODE_RAW_TIMESTAMP 0x00001000 +#define PROCESS_TRACE_MODE_EVENT_RECORD 0x10000000 +/////////////////////////////////////////////////////////////////////////////// +#define EVENT_HEADER_FLAG_EXTENDED_INFO 0x0001 +#define EVENT_HEADER_FLAG_PRIVATE_SESSION 0x0002 +#define EVENT_HEADER_FLAG_STRING_ONLY 0x0004 +#define EVENT_HEADER_FLAG_TRACE_MESSAGE 0x0008 +#define EVENT_HEADER_FLAG_NO_CPUTIME 0x0010 +#define EVENT_HEADER_FLAG_32_BIT_HEADER 0x0020 +#define EVENT_HEADER_FLAG_64_BIT_HEADER 0x0040 +#define EVENT_HEADER_FLAG_CLASSIC_HEADER 0x0100 +#define EVENT_HEADER_FLAG_PROCESSOR_INDEX 0x0200 +/////////////////////////////////////////////////////////////////////////////// +#define KERNEL_LOGGER_NAMEW L"NT Kernel Logger" +/////////////////////////////////////////////////////////////////////////////// +#define EVENT_TRACE_REAL_TIME_MODE 0x00000100 // Real time mode on +/////////////////////////////////////////////////////////////////////////////// +#define EVENT_TRACE_CONTROL_STOP 1 +/////////////////////////////////////////////////////////////////////////////// + +// +// Enable flags for Kernel Events +// +#define EVENT_TRACE_FLAG_PROCESS 0x00000001 // process start & end +#define EVENT_TRACE_FLAG_THREAD 0x00000002 // thread start & end +#define EVENT_TRACE_FLAG_IMAGE_LOAD 0x00000004 // image load + +#define EVENT_TRACE_FLAG_DISK_IO 0x00000100 // physical disk IO +#define EVENT_TRACE_FLAG_DISK_FILE_IO 0x00000200 // requires disk IO + +#define EVENT_TRACE_FLAG_MEMORY_PAGE_FAULTS 0x00001000 // all page faults +#define EVENT_TRACE_FLAG_MEMORY_HARD_FAULTS 0x00002000 // hard faults only + +#define EVENT_TRACE_FLAG_NETWORK_TCPIP 0x00010000 // tcpip send & receive + +#define EVENT_TRACE_FLAG_REGISTRY 0x00020000 // registry calls +#define EVENT_TRACE_FLAG_DBGPRINT 0x00040000 // DbgPrint(ex) Calls + +// +// Enable flags for Kernel Events on Vista and above +// +#define EVENT_TRACE_FLAG_PROCESS_COUNTERS 0x00000008 // process perf counters +#define EVENT_TRACE_FLAG_CSWITCH 0x00000010 // context switches +#define EVENT_TRACE_FLAG_DPC 0x00000020 // deffered procedure calls +#define EVENT_TRACE_FLAG_INTERRUPT 0x00000040 // interrupts +#define EVENT_TRACE_FLAG_SYSTEMCALL 0x00000080 // system calls + +#define EVENT_TRACE_FLAG_DISK_IO_INIT 0x00000400 // physical disk IO initiation +#define EVENT_TRACE_FLAG_ALPC 0x00100000 // ALPC traces +#define EVENT_TRACE_FLAG_SPLIT_IO 0x00200000 // split io traces (VolumeManager) + +#define EVENT_TRACE_FLAG_DRIVER 0x00800000 // driver delays +#define EVENT_TRACE_FLAG_PROFILE 0x01000000 // sample based profiling +#define EVENT_TRACE_FLAG_FILE_IO 0x02000000 // file IO +#define EVENT_TRACE_FLAG_FILE_IO_INIT 0x04000000 // file IO initiation + +#define EVENT_TRACE_FLAG_PMC_PROFILE 0x80000000 // sample based profiling (PMC) - NOT CONFIRMED! + +// +// Enable flags for Kernel Events on Win7 and above +// +#define EVENT_TRACE_FLAG_DISPATCHER 0x00000800 // scheduler (ReadyThread) +#define EVENT_TRACE_FLAG_VIRTUAL_ALLOC 0x00004000 // VM operations + +// +// Enable flags for Kernel Events on Win8 and above +// +#define EVENT_TRACE_FLAG_VAMAP 0x00008000 // map/unmap (excluding images) +#define EVENT_TRACE_FLAG_NO_SYSCONFIG 0x10000000 // Do not do sys config rundown + +/////////////////////////////////////////////////////////////////////////////// + +#pragma warning(push) +#pragma warning (disable:4201) + +#ifndef EVENT_DESCRIPTOR_DEF +#define EVENT_DESCRIPTOR_DEF +typedef struct _EVENT_DESCRIPTOR { + + USHORT Id; + UCHAR Version; + UCHAR Channel; + UCHAR Level; + UCHAR Opcode; + USHORT Task; + ULONGLONG Keyword; + +} EVENT_DESCRIPTOR, *PEVENT_DESCRIPTOR; +typedef const EVENT_DESCRIPTOR *PCEVENT_DESCRIPTOR; +#endif +/////////////////////////////////////////////////////////////////////////////// +#ifndef EVENT_HEADER_DEF +#define EVENT_HEADER_DEF +typedef struct _EVENT_HEADER { + + USHORT Size; + USHORT HeaderType; + USHORT Flags; + USHORT EventProperty; + ULONG ThreadId; + ULONG ProcessId; + LARGE_INTEGER TimeStamp; + GUID ProviderId; + EVENT_DESCRIPTOR EventDescriptor; + union { + struct { + ULONG KernelTime; + ULONG UserTime; + } DUMMYSTRUCTNAME; + ULONG64 ProcessorTime; + + } DUMMYUNIONNAME; + GUID ActivityId; + +} EVENT_HEADER, *PEVENT_HEADER; +#endif +/////////////////////////////////////////////////////////////////////////////// +#ifndef EVENT_HEADER_EXTENDED_DATA_ITEM_DEF +#define EVENT_HEADER_EXTENDED_DATA_ITEM_DEF +typedef struct _EVENT_HEADER_EXTENDED_DATA_ITEM { + + USHORT Reserved1; // Reserved for internal use + USHORT ExtType; // Extended info type + struct { + USHORT Linkage : 1; // Indicates additional extended + // data item + USHORT Reserved2 : 15; + }; + USHORT DataSize; // Size of extended info data + ULONGLONG DataPtr; // Pointer to extended info data + +} EVENT_HEADER_EXTENDED_DATA_ITEM, *PEVENT_HEADER_EXTENDED_DATA_ITEM; +#endif +/////////////////////////////////////////////////////////////////////////////// +#ifndef ETW_BUFFER_CONTEXT_DEF +#define ETW_BUFFER_CONTEXT_DEF +typedef struct _ETW_BUFFER_CONTEXT { + union { + struct { + UCHAR ProcessorNumber; + UCHAR Alignment; + } DUMMYSTRUCTNAME; + USHORT ProcessorIndex; + } DUMMYUNIONNAME; + USHORT LoggerId; +} ETW_BUFFER_CONTEXT, *PETW_BUFFER_CONTEXT; +#endif +/////////////////////////////////////////////////////////////////////////////// +#ifndef EVENT_RECORD_DEF +#define EVENT_RECORD_DEF +typedef struct _EVENT_RECORD { + EVENT_HEADER EventHeader; + ETW_BUFFER_CONTEXT BufferContext; + USHORT ExtendedDataCount; + + USHORT UserDataLength; + PEVENT_HEADER_EXTENDED_DATA_ITEM ExtendedData; + PVOID UserData; + PVOID UserContext; +} EVENT_RECORD, *PEVENT_RECORD; +#endif +/////////////////////////////////////////////////////////////////////////////// +typedef struct _EVENT_TRACE_PROPERTIES { + WNODE_HEADER Wnode; + // + // data provided by caller + ULONG BufferSize; // buffer size for logging (kbytes) + ULONG MinimumBuffers; // minimum to preallocate + ULONG MaximumBuffers; // maximum buffers allowed + ULONG MaximumFileSize; // maximum logfile size (in MBytes) + ULONG LogFileMode; // sequential, circular + ULONG FlushTimer; // buffer flush timer, in seconds + ULONG EnableFlags; // trace enable flags + union { + LONG AgeLimit; // unused + LONG FlushThreshold; // Number of buffers to fill before flushing + } DUMMYUNIONNAME; + + // data returned to caller + ULONG NumberOfBuffers; // no of buffers in use + ULONG FreeBuffers; // no of buffers free + ULONG EventsLost; // event records lost + ULONG BuffersWritten; // no of buffers written to file + ULONG LogBuffersLost; // no of logfile write failures + ULONG RealTimeBuffersLost; // no of rt delivery failures + HANDLE LoggerThreadId; // thread id of Logger + ULONG LogFileNameOffset; // Offset to LogFileName + ULONG LoggerNameOffset; // Offset to LoggerName +} EVENT_TRACE_PROPERTIES, *PEVENT_TRACE_PROPERTIES; + +typedef struct _EVENT_TRACE_HEADER { // overlays WNODE_HEADER + USHORT Size; // Size of entire record + union { + USHORT FieldTypeFlags; // Indicates valid fields + struct { + UCHAR HeaderType; // Header type - internal use only + UCHAR MarkerFlags; // Marker - internal use only + } DUMMYSTRUCTNAME; + } DUMMYUNIONNAME; + union { + ULONG Version; + struct { + UCHAR Type; // event type + UCHAR Level; // trace instrumentation level + USHORT Version; // version of trace record + } Class; + } DUMMYUNIONNAME2; + ULONG ThreadId; // Thread Id + ULONG ProcessId; // Process Id + LARGE_INTEGER TimeStamp; // time when event happens + union { + GUID Guid; // Guid that identifies event + ULONGLONG GuidPtr; // use with WNODE_FLAG_USE_GUID_PTR + } DUMMYUNIONNAME3; + union { + struct { + ULONG KernelTime; // Kernel Mode CPU ticks + ULONG UserTime; // User mode CPU ticks + } DUMMYSTRUCTNAME; + ULONG64 ProcessorTime; // Processor Clock + struct { + ULONG ClientContext; // Reserved + ULONG Flags; // Event Flags + } DUMMYSTRUCTNAME2; + } DUMMYUNIONNAME4; +} EVENT_TRACE_HEADER, *PEVENT_TRACE_HEADER; + +typedef struct _EVENT_TRACE { + EVENT_TRACE_HEADER Header; // Event trace header + ULONG InstanceId; // Instance Id of this event + ULONG ParentInstanceId; // Parent Instance Id. + GUID ParentGuid; // Parent Guid; + PVOID MofData; // Pointer to Variable Data + ULONG MofLength; // Variable Datablock Length + union { + ULONG ClientContext; + ETW_BUFFER_CONTEXT BufferContext; + } DUMMYUNIONNAME; +} EVENT_TRACE, *PEVENT_TRACE; + +typedef struct _TRACE_LOGFILE_HEADER { + ULONG BufferSize; // Logger buffer size in Kbytes + union { + ULONG Version; // Logger version + struct { + UCHAR MajorVersion; + UCHAR MinorVersion; + UCHAR SubVersion; + UCHAR SubMinorVersion; + } VersionDetail; + } DUMMYUNIONNAME; + ULONG ProviderVersion; // defaults to NT version + ULONG NumberOfProcessors; // Number of Processors + LARGE_INTEGER EndTime; // Time when logger stops + ULONG TimerResolution; // assumes timer is constant!!! + ULONG MaximumFileSize; // Maximum in Mbytes + ULONG LogFileMode; // specify logfile mode + ULONG BuffersWritten; // used to file start of Circular File + union { + GUID LogInstanceGuid; // For RealTime Buffer Delivery + struct { + ULONG StartBuffers; // Count of buffers written at start. + ULONG PointerSize; // Size of pointer type in bits + ULONG EventsLost; // Events losts during log session + ULONG CpuSpeedInMHz; // Cpu Speed in MHz + } DUMMYSTRUCTNAME; + } DUMMYUNIONNAME2; +#if defined(_WMIKM_) + PWCHAR LoggerName; + PWCHAR LogFileName; + RTL_TIME_ZONE_INFORMATION TimeZone; +#else + LPWSTR LoggerName; + LPWSTR LogFileName; + TIME_ZONE_INFORMATION TimeZone; +#endif + LARGE_INTEGER BootTime; + LARGE_INTEGER PerfFreq; // Reserved + LARGE_INTEGER StartTime; // Reserved + ULONG ReservedFlags; // ClockType + ULONG BuffersLost; +} TRACE_LOGFILE_HEADER, *PTRACE_LOGFILE_HEADER; + +typedef enum _TRACE_QUERY_INFO_CLASS { + TraceGuidQueryList, + TraceGuidQueryInfo, + TraceGuidQueryProcess, + TraceStackTracingInfo, // Win7 + TraceSystemTraceEnableFlagsInfo, + TraceSampledProfileIntervalInfo, + TraceProfileSourceConfigInfo, + TraceProfileSourceListInfo, + TracePmcEventListInfo, + TracePmcCounterListInfo, + MaxTraceSetInfoClass +} TRACE_QUERY_INFO_CLASS, TRACE_INFO_CLASS; + +typedef struct _CLASSIC_EVENT_ID { + GUID EventGuid; + UCHAR Type; + UCHAR Reserved[7]; +} CLASSIC_EVENT_ID, *PCLASSIC_EVENT_ID; + +typedef struct _TRACE_PROFILE_INTERVAL { + ULONG Source; + ULONG Interval; +} TRACE_PROFILE_INTERVAL, *PTRACE_PROFILE_INTERVAL; + +typedef struct _EVENT_TRACE_LOGFILEW +EVENT_TRACE_LOGFILEW, *PEVENT_TRACE_LOGFILEW; + +typedef ULONG(WINAPI * PEVENT_TRACE_BUFFER_CALLBACKW) +(PEVENT_TRACE_LOGFILEW Logfile); + +typedef VOID(WINAPI *PEVENT_CALLBACK)(PEVENT_TRACE pEvent); + +typedef struct _EVENT_RECORD +EVENT_RECORD, *PEVENT_RECORD; + +typedef VOID(WINAPI *PEVENT_RECORD_CALLBACK) (PEVENT_RECORD EventRecord); + +struct _EVENT_TRACE_LOGFILEW { + LPWSTR LogFileName; // Logfile Name + LPWSTR LoggerName; // LoggerName + LONGLONG CurrentTime; // timestamp of last event + ULONG BuffersRead; // buffers read to date + union { + // Mode of the logfile + ULONG LogFileMode; + // Processing flags used on Vista and above + ULONG ProcessTraceMode; + } DUMMYUNIONNAME; + EVENT_TRACE CurrentEvent; // Current Event from this stream. + TRACE_LOGFILE_HEADER LogfileHeader; // logfile header structure + PEVENT_TRACE_BUFFER_CALLBACKW // callback before each buffer + BufferCallback; // is read + // + // following variables are filled for BufferCallback. + // + ULONG BufferSize; + ULONG Filled; + ULONG EventsLost; + // + // following needs to be propaged to each buffer + // + union { + // Callback with EVENT_TRACE + PEVENT_CALLBACK EventCallback; + // Callback with EVENT_RECORD on Vista and above + PEVENT_RECORD_CALLBACK EventRecordCallback; + } DUMMYUNIONNAME2; + + ULONG IsKernelTrace; // TRUE for kernel logfile + + PVOID Context; // reserved for internal use +}; + +#pragma warning(pop) + +#define PEVENT_TRACE_BUFFER_CALLBACK PEVENT_TRACE_BUFFER_CALLBACKW +#define EVENT_TRACE_LOGFILE EVENT_TRACE_LOGFILEW +#define PEVENT_TRACE_LOGFILE PEVENT_TRACE_LOGFILEW +#define KERNEL_LOGGER_NAME KERNEL_LOGGER_NAMEW +#define GLOBAL_LOGGER_NAME GLOBAL_LOGGER_NAMEW +#define EVENT_LOGGER_NAME EVENT_LOGGER_NAMEW + +EXTERN_C +ULONG +WMIAPI +ProcessTrace( + _In_reads_(HandleCount) PTRACEHANDLE HandleArray, + _In_ ULONG HandleCount, + _In_opt_ LPFILETIME StartTime, + _In_opt_ LPFILETIME EndTime +); + +EXTERN_C +ULONG +WMIAPI +StartTraceW( + _Out_ PTRACEHANDLE TraceHandle, + _In_ LPCWSTR InstanceName, + _Inout_ PEVENT_TRACE_PROPERTIES Properties +); + +EXTERN_C +ULONG +WMIAPI +ControlTraceW( + _In_ TRACEHANDLE TraceHandle, + _In_opt_ LPCWSTR InstanceName, + _Inout_ PEVENT_TRACE_PROPERTIES Properties, + _In_ ULONG ControlCode +); + +EXTERN_C +TRACEHANDLE +WMIAPI +OpenTraceW( + _Inout_ PEVENT_TRACE_LOGFILEW Logfile +); + +EXTERN_C +ULONG +WMIAPI +CloseTrace( + _In_ TRACEHANDLE TraceHandle +); + +EXTERN_C +ULONG +WMIAPI +TraceSetInformation( + _In_ TRACEHANDLE SessionHandle, + _In_ TRACE_INFO_CLASS InformationClass, + _In_reads_bytes_(InformationLength) PVOID TraceInformation, + _In_ ULONG InformationLength +); + +EXTERN_C +ULONG +WMIAPI +TraceQueryInformation( + _In_ TRACEHANDLE SessionHandle, + _In_ TRACE_INFO_CLASS InformationClass, + _Out_writes_bytes_(InformationLength) PVOID TraceInformation, + _In_ ULONG InformationLength, + _Out_opt_ PULONG ReturnLength +); + +////////////////////////////////////////////////////////////////////////// +#define RegisterTraceGuids RegisterTraceGuidsW +#define StartTrace StartTraceW +#define ControlTrace ControlTraceW +#define StopTrace StopTraceW +#define QueryTrace QueryTraceW +#define UpdateTrace UpdateTraceW +#define FlushTrace FlushTraceW +#define QueryAllTraces QueryAllTracesW +#define OpenTrace OpenTraceW +////////////////////////////////////////////////////////////////////////// +#else +#define INITGUID // Causes definition of SystemTraceControlGuid in evntrace.h. +#include <wmistr.h> +#include <evntrace.h> +#include <strsafe.h> +#include <evntcons.h> +#endif //DECLARE_ETW + +namespace Optick +{ +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +class ETW : public Trace +{ + static const int ETW_BUFFER_SIZE = 1024 << 10; // 1Mb + static const int ETW_BUFFER_COUNT = 32; + static const int ETW_MAXIMUM_SESSION_NAME = 1024; + + EVENT_TRACE_PROPERTIES *traceProperties; + EVENT_TRACE_LOGFILE logFile; + TRACEHANDLE traceSessionHandle; + TRACEHANDLE openedHandle; + + HANDLE processThreadHandle; + DWORD currentProcessId; + + bool isActive; + + static DWORD WINAPI RunProcessTraceThreadFunction(LPVOID parameter); + static void AdjustPrivileges(); + + unordered_map<uint64_t, const EventDescription*> syscallDescriptions; + + void ResolveSysCalls(); +public: + + unordered_set<uint64> activeThreadsIDs; + + ETW(); + ~ETW(); + + virtual CaptureStatus::Type Start(Mode::Type mode, int frequency, const ThreadList& threads) override; + virtual bool Stop() override; + + DWORD GetProcessID() const { return currentProcessId; } +}; +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +struct CSwitch +{ + // New thread ID after the switch. + uint32 NewThreadId; + + // Previous thread ID. + uint32 OldThreadId; + + // Thread priority of the new thread. + int8 NewThreadPriority; + + // Thread priority of the previous thread. + int8 OldThreadPriority; + + //The index of the C-state that was last used by the processor. A value of 0 represents the lightest idle state with higher values representing deeper C-states. + uint8 PreviousCState; + + // Not used. + int8 SpareByte; + + // Wait reason for the previous thread. The following are the possible values: + // 0 Executive + // 1 FreePage + // 2 PageIn + // 3 PoolAllocation + // 4 DelayExecution + // 5 Suspended + // 6 UserRequest + // 7 WrExecutive + // 8 WrFreePage + // 9 WrPageIn + // 10 WrPoolAllocation + // 11 WrDelayExecution + // 12 WrSuspended + // 13 WrUserRequest + // 14 WrEventPair + // 15 WrQueue + // 16 WrLpcReceive + // 17 WrLpcReply + // 18 WrVirtualMemory + // 19 WrPageOut + // 20 WrRendezvous + // 21 WrKeyedEvent + // 22 WrTerminated + // 23 WrProcessInSwap + // 24 WrCpuRateControl + // 25 WrCalloutStack + // 26 WrKernel + // 27 WrResource + // 28 WrPushLock + // 29 WrMutex + // 30 WrQuantumEnd + // 31 WrDispatchInt + // 32 WrPreempted + // 33 WrYieldExecution + // 34 WrFastMutex + // 35 WrGuardedMutex + // 36 WrRundown + // 37 MaximumWaitReason + int8 OldThreadWaitReason; + + // Wait mode for the previous thread. The following are the possible values: + // 0 KernelMode + // 1 UserMode + int8 OldThreadWaitMode; + + // State of the previous thread. The following are the possible state values: + // 0 Initialized + // 1 Ready + // 2 Running + // 3 Standby + // 4 Terminated + // 5 Waiting + // 6 Transition + // 7 DeferredReady (added for Windows Server 2003) + int8 OldThreadState; + + // Ideal wait time of the previous thread. + int8 OldThreadWaitIdealProcessor; + + // Wait time for the new thread. + uint32 NewThreadWaitTime; + + // Reserved. + uint32 Reserved; + + static const byte OPCODE = 36; +}; + +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +struct StackWalk_Event +{ + // Original event time stamp from the event header + uint64 EventTimeStamp; + + // The process identifier of the original event + uint32 StackProcess; + + // The thread identifier of the original event + uint32 StackThread; + + // Callstack head + uint64 Stack0; + + static const byte OPCODE = 32; +}; + +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +struct Thread_TypeGroup1 +{ + // Process identifier of the thread involved in the event. + uint32 ProcessId; + // Thread identifier of the thread involved in the event. + uint32 TThreadId; + // Base address of the thread's stack. + uint64 StackBase; + // Limit of the thread's stack. + uint64 StackLimit; + // Base address of the thread's user-mode stack. + uint64 UserStackBase; + // Limit of the thread's user-mode stack. + uint64 UserStackLimit; + // The set of processors on which the thread is allowed to run. + uint32 Affinity; + // Starting address of the function to be executed by this thread. + uint64 Win32StartAddr; + // Thread environment block base address. + uint64 TebBase; + // Identifies the service if the thread is owned by a service; otherwise, zero. + uint32 SubProcessTag; + // The scheduler priority of the thread + uint8 BasePriority; + // A memory page priority hint for memory pages accessed by the thread. + uint8 PagePriority; + // An IO priority hint for scheduling IOs generated by the thread. + uint8 IoPriority; + // Not used. + uint8 ThreadFlags; + + enum struct Opcode : uint8 + { + Start = 1, + End = 2, + DCStart = 3, + DCEnd = 4, + }; +}; + +size_t GetSIDSize(uint8* ptr) +{ + size_t result = 0; + + int sid = *((int*)ptr); + + if (sid != 0) + { + size_t tokenSize = 16; + ptr += tokenSize; + result += tokenSize; + result += 8 + (4 * ((SID*)ptr)->SubAuthorityCount); + } + else + { + result = 4; + } + + return result; +} + +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +// https://github.com/Microsoft/perfview/blob/688a8564062d51321bbab53cd71d9e174a77d2ce/src/TraceEvent/TraceEvent.cs +struct Process_TypeGroup1 +{ + // The address of the process object in the kernel. + uint64 UniqueProcessKey; + // Global process identifier that you can use to identify a process. + uint32 ProcessId; + // Unique identifier of the process that creates this process. + uint32 ParentId; + // Unique identifier that an operating system generates when it creates a new session. + uint32 SessionId; + // Exit status of the stopped process. + int32 ExitStatus; + // The physical address of the page table of the process. + uint64 DirectoryTableBase; + // (?) uint8 Flags; + // object UserSID; + // string ImageFileName; + // wstring CommandLine; + + static size_t GetSIDOffset(PEVENT_RECORD pEvent) + { + if (pEvent->EventHeader.EventDescriptor.Version >= 4) + return 36; + + if (pEvent->EventHeader.EventDescriptor.Version == 3) + return 32; + + return 24; + } + + const char* GetProcessName(PEVENT_RECORD pEvent) const + { + OPTICK_ASSERT((pEvent->EventHeader.Flags & EVENT_HEADER_FLAG_64_BIT_HEADER) != 0, "32-bit is not supported! Disable OPTICK_ENABLE_TRACING on 32-bit platform if needed!"); + size_t sidOffset = GetSIDOffset(pEvent); + size_t sidSize = GetSIDSize((uint8*)this + sidOffset); + return (char*)this + sidOffset + sidSize; + } + + enum struct Opcode + { + Start = 1, + End = 2, + DCStart = 3, + DCEnd = 4, + Defunct = 39, + }; +}; + +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +struct SampledProfile +{ + uint32 InstructionPointer; + uint32 ThreadId; + uint32 Count; + + static const byte OPCODE = 46; +}; +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +struct SysCallEnter +{ + uintptr_t SysCallAddress; + + static const byte OPCODE = 51; +}; +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +struct SysCallExit +{ + uint32 SysCallNtStatus; + + static const byte OPCODE = 52; +}; + + +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +// ce1dbfb4-137e-4da6-87b0-3f59aa102cbc +DEFINE_GUID(SampledProfileGuid, 0xce1dbfb4, 0x137e, 0x4da6, 0x87, 0xb0, 0x3f, 0x59, 0xaa, 0x10, 0x2c, 0xbc); + +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +// 3d6fa8d1-fe05-11d0-9dda-00c04fd7ba7c +// https://docs.microsoft.com/en-us/windows/desktop/etw/thread +DEFINE_GUID(ThreadGuid, 0x3d6fa8d1, 0xfe05, 0x11d0, 0x9d, 0xda, 0x00, 0xc0, 0x4f, 0xd7, 0xba, 0x7c); + +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +// 3d6fa8d0-fe05-11d0-9dda-00c04fd7ba7c +// https://docs.microsoft.com/en-us/windows/desktop/etw/process +DEFINE_GUID(ProcessGuid, 0x3d6fa8d0, 0xfe05, 0x11d0, 0x9d, 0xda, 0x00, 0xc0, 0x4f, 0xd7, 0xba, 0x7c); + +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +const int MAX_CPU_CORES = 256; +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +struct ETWRuntime +{ + array<ThreadID, MAX_CPU_CORES> activeCores; + vector<std::pair<uint8_t, SysCallData*>> activeSyscalls; + + ETWRuntime() + { + Reset(); + } + + void Reset() + { + activeCores.fill(INVALID_THREAD_ID); + activeSyscalls.resize(0);; + } +}; +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +ETWRuntime g_ETWRuntime; +ETW g_ETW; +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +void WINAPI OnRecordEvent(PEVENT_RECORD eventRecord) +{ + //static uint8 cpuCoreIsExecutingThreadFromOurProcess[256] = { 0 }; + + const byte opcode = eventRecord->EventHeader.EventDescriptor.Opcode; + + if (opcode == CSwitch::OPCODE) + { + if (sizeof(CSwitch) == eventRecord->UserDataLength) + { + CSwitch* pSwitchEvent = (CSwitch*)eventRecord->UserData; + + SwitchContextDesc desc; + desc.reason = pSwitchEvent->OldThreadWaitReason; + desc.cpuId = eventRecord->BufferContext.ProcessorNumber; + desc.oldThreadId = (uint64)pSwitchEvent->OldThreadId; + desc.newThreadId = (uint64)pSwitchEvent->NewThreadId; + desc.timestamp = eventRecord->EventHeader.TimeStamp.QuadPart; + Core::Get().ReportSwitchContext(desc); + + // Assign ThreadID to the cores + if (g_ETW.activeThreadsIDs.find(desc.newThreadId) != g_ETW.activeThreadsIDs.end()) + { + g_ETWRuntime.activeCores[desc.cpuId] = desc.newThreadId; + } + else if (g_ETW.activeThreadsIDs.find(desc.oldThreadId) != g_ETW.activeThreadsIDs.end()) + { + g_ETWRuntime.activeCores[desc.cpuId] = INVALID_THREAD_ID; + } + } + } + else if (opcode == StackWalk_Event::OPCODE) + { + if (eventRecord->UserData && eventRecord->UserDataLength >= sizeof(StackWalk_Event)) + { + //TODO: Support x86 windows kernels + const size_t osKernelPtrSize = sizeof(uint64); + + StackWalk_Event* pStackWalkEvent = (StackWalk_Event*)eventRecord->UserData; + uint32 count = 1 + (eventRecord->UserDataLength - sizeof(StackWalk_Event)) / osKernelPtrSize; + + if (count && pStackWalkEvent->StackThread != 0) + { + if (pStackWalkEvent->StackProcess == g_ETW.GetProcessID()) + { + CallstackDesc desc; + desc.threadID = pStackWalkEvent->StackThread; + desc.timestamp = pStackWalkEvent->EventTimeStamp; + + static_assert(osKernelPtrSize == sizeof(uint64), "Incompatible types!"); + desc.callstack = &pStackWalkEvent->Stack0; + + desc.count = (uint8)count; + Core::Get().ReportStackWalk(desc); + } + } + } + } + else if (opcode == SampledProfile::OPCODE) + { + SampledProfile* pEvent = (SampledProfile*)eventRecord->UserData; + OPTICK_UNUSED(pEvent); + } + else if (opcode == SysCallEnter::OPCODE) + { + if (eventRecord->UserDataLength >= sizeof(SysCallEnter)) + { + uint8_t cpuId = eventRecord->BufferContext.ProcessorNumber; + uint64_t threadId = g_ETWRuntime.activeCores[cpuId]; + + if (threadId != INVALID_THREAD_ID) + { + SysCallEnter* pEventEnter = (SysCallEnter*)eventRecord->UserData; + + SysCallData& sysCall = Core::Get().syscallCollector.Add(); + sysCall.start = eventRecord->EventHeader.TimeStamp.QuadPart; + sysCall.finish = EventTime::INVALID_TIMESTAMP; + sysCall.threadID = threadId; + sysCall.id = pEventEnter->SysCallAddress; + sysCall.description = nullptr; + + g_ETWRuntime.activeSyscalls.push_back(std::make_pair(cpuId, &sysCall)); + } + } + } + else if (opcode == SysCallExit::OPCODE) + { + if (eventRecord->UserDataLength >= sizeof(SysCallExit)) + { + uint8_t cpuId = eventRecord->BufferContext.ProcessorNumber; + if (g_ETWRuntime.activeCores[cpuId] != INVALID_THREAD_ID) + { + for (int i = (int)g_ETWRuntime.activeSyscalls.size() - 1; i >= 0; --i) + { + if (g_ETWRuntime.activeSyscalls[i].first == cpuId) + { + g_ETWRuntime.activeSyscalls[i].second->finish = eventRecord->EventHeader.TimeStamp.QuadPart; + g_ETWRuntime.activeSyscalls.erase(g_ETWRuntime.activeSyscalls.begin() + i); + break; + } + } + } + } + } + else + { + // VS TODO: We might have a situation where a thread was deleted and the new thread was created with the same threadID + // Ignoring for now - profiling sessions are quite short - not critical + if (IsEqualGUID(eventRecord->EventHeader.ProviderId, ThreadGuid)) + { + if (eventRecord->UserDataLength >= sizeof(Thread_TypeGroup1)) + { + const Thread_TypeGroup1* pThreadEvent = (const Thread_TypeGroup1*)eventRecord->UserData; + Core::Get().RegisterThreadDescription(ThreadDescription("", pThreadEvent->TThreadId, pThreadEvent->ProcessId, 1, pThreadEvent->BasePriority)); + } + + } + else if (IsEqualGUID(eventRecord->EventHeader.ProviderId, ProcessGuid)) + { + if (eventRecord->UserDataLength >= sizeof(Process_TypeGroup1)) + { + const Process_TypeGroup1* pProcessEvent = (const Process_TypeGroup1*)eventRecord->UserData; + Core::Get().RegisterProcessDescription(ProcessDescription(pProcessEvent->GetProcessName(eventRecord), pProcessEvent->ProcessId, pProcessEvent->UniqueProcessKey)); + } + } + } +} +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +static ULONG WINAPI OnBufferRecord(_In_ PEVENT_TRACE_LOGFILE Buffer) +{ + OPTICK_UNUSED(Buffer); + return true; +} +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +const TRACEHANDLE INVALID_TRACEHANDLE = (TRACEHANDLE)-1; + +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +DWORD WINAPI ETW::RunProcessTraceThreadFunction(LPVOID parameter) +{ + Core::Get().RegisterThreadDescription(ThreadDescription("[Optick] ETW", GetCurrentThreadId(), GetCurrentProcessId())); + ETW* etw = (ETW*)parameter; + ULONG status = ProcessTrace(&etw->openedHandle, 1, 0, 0); + OPTICK_UNUSED(status); + return 0; +} + +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +void ETW::AdjustPrivileges() +{ +#if OPTICK_PC + HANDLE token = 0; + if (OpenProcessToken(GetCurrentProcess(), TOKEN_ADJUST_PRIVILEGES | TOKEN_QUERY, &token)) + { + TOKEN_PRIVILEGES tokenPrivileges; + memset(&tokenPrivileges, 0, sizeof(tokenPrivileges)); + tokenPrivileges.PrivilegeCount = 1; + tokenPrivileges.Privileges[0].Attributes = SE_PRIVILEGE_ENABLED; + LookupPrivilegeValue(NULL, SE_SYSTEM_PROFILE_NAME, &tokenPrivileges.Privileges[0].Luid); + + AdjustTokenPrivileges(token, FALSE, &tokenPrivileges, 0, (PTOKEN_PRIVILEGES)NULL, 0); + CloseHandle(token); + } +#endif +} + +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +void ETW::ResolveSysCalls() +{ + if (SymbolEngine* symEngine = Platform::GetSymbolEngine()) + { + Core::Get().syscallCollector.syscallPool.ForEach([this, symEngine](SysCallData& data) + { + auto it = syscallDescriptions.find(data.id); + if (it == syscallDescriptions.end()) + { + const Symbol* symbol = symEngine->GetSymbol(data.id); + if (symbol != nullptr) + { + string name(symbol->function.begin(), symbol->function.end()); + + data.description = EventDescription::CreateShared(name.c_str(), "SysCall", (long)data.id); + syscallDescriptions.insert(std::pair<const uint64_t, const EventDescription *>(data.id, data.description)); + } + } + else + { + data.description = it->second; + } + }); + } +} +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + +ETW::ETW() + : isActive(false) + , traceSessionHandle(INVALID_TRACEHANDLE) + , openedHandle(INVALID_TRACEHANDLE) + , processThreadHandle(INVALID_HANDLE_VALUE) + , traceProperties(nullptr) +{ + currentProcessId = GetCurrentProcessId(); +} + +CaptureStatus::Type ETW::Start(Mode::Type mode, int frequency, const ThreadList& threads) +{ + if (!isActive) + { + AdjustPrivileges(); + + g_ETWRuntime.Reset(); + + activeThreadsIDs.clear(); + for (auto it = threads.begin(); it != threads.end(); ++it) + { + ThreadEntry* entry = *it; + if (entry->isAlive) + { + activeThreadsIDs.insert(entry->description.threadID); + } + } + + + ULONG bufferSize = sizeof(EVENT_TRACE_PROPERTIES) + (ETW_MAXIMUM_SESSION_NAME + MAX_PATH) * sizeof(WCHAR); + if (traceProperties == nullptr) + traceProperties = (EVENT_TRACE_PROPERTIES*)Memory::Alloc(bufferSize); + ZeroMemory(traceProperties, bufferSize); + traceProperties->Wnode.BufferSize = bufferSize; + traceProperties->LoggerNameOffset = sizeof(EVENT_TRACE_PROPERTIES); + StringCchCopyW((LPWSTR)((PCHAR)traceProperties + traceProperties->LoggerNameOffset), ETW_MAXIMUM_SESSION_NAME, KERNEL_LOGGER_NAMEW); + traceProperties->EnableFlags = 0; + + traceProperties->BufferSize = ETW_BUFFER_SIZE; + traceProperties->MinimumBuffers = ETW_BUFFER_COUNT; + + if (mode & Mode::SWITCH_CONTEXT) + { + traceProperties->EnableFlags |= EVENT_TRACE_FLAG_CSWITCH; + } + + if (mode & Mode::AUTOSAMPLING) + { + traceProperties->EnableFlags |= EVENT_TRACE_FLAG_PROFILE; + } + + if (mode & Mode::SYS_CALLS) + { + traceProperties->EnableFlags |= EVENT_TRACE_FLAG_SYSTEMCALL; + } + + if (mode & Mode::OTHER_PROCESSES) + { + traceProperties->EnableFlags |= EVENT_TRACE_FLAG_PROCESS; + traceProperties->EnableFlags |= EVENT_TRACE_FLAG_THREAD; + } + + traceProperties->LogFileMode = EVENT_TRACE_REAL_TIME_MODE; + traceProperties->Wnode.Flags = WNODE_FLAG_TRACED_GUID; + // + // https://msdn.microsoft.com/en-us/library/windows/desktop/aa364160(v=vs.85).aspx + // Clock resolution = QPC + traceProperties->Wnode.ClientContext = 1; + traceProperties->Wnode.Guid = SystemTraceControlGuid; + + // ERROR_BAD_LENGTH(24): The Wnode.BufferSize member of Properties specifies an incorrect size. Properties does not have sufficient space allocated to hold a copy of SessionName. + // ERROR_ALREADY_EXISTS(183): A session with the same name or GUID is already running. + // ERROR_ACCESS_DENIED(5): Only users with administrative privileges, users in the Performance Log Users group, and services running as LocalSystem, LocalService, NetworkService can control event tracing sessions. + // ERROR_INVALID_PARAMETER(87) + // ERROR_BAD_PATHNAME(161) + // ERROR_DISK_FULL(112) + // ERROR_NO_SUCH_PRIVILEGE(1313) + int retryCount = 4; + ULONG status = CaptureStatus::OK; + + while (--retryCount >= 0) + { + status = StartTrace(&traceSessionHandle, KERNEL_LOGGER_NAME, traceProperties); + + switch (status) + { + case ERROR_NO_SUCH_PRIVILEGE: + AdjustPrivileges(); + break; + + case ERROR_ALREADY_EXISTS: + ControlTrace(0, KERNEL_LOGGER_NAME, traceProperties, EVENT_TRACE_CONTROL_STOP); + break; + + case ERROR_ACCESS_DENIED: + return CaptureStatus::ERR_TRACER_ACCESS_DENIED; + + case ERROR_SUCCESS: + retryCount = 0; + break; + + default: + return CaptureStatus::ERR_TRACER_FAILED; + } + } + + if (status != ERROR_SUCCESS) + { + return CaptureStatus::ERR_TRACER_FAILED; + } + + CLASSIC_EVENT_ID callstackSamples[4]; + int callstackCountSamplesCount = 0; + + if (mode & Mode::AUTOSAMPLING) + { + callstackSamples[callstackCountSamplesCount].EventGuid = SampledProfileGuid; + callstackSamples[callstackCountSamplesCount].Type = SampledProfile::OPCODE; + ++callstackCountSamplesCount; + } + + if (mode & Mode::SYS_CALLS) + { + callstackSamples[callstackCountSamplesCount].EventGuid = SampledProfileGuid; + callstackSamples[callstackCountSamplesCount].Type = SysCallEnter::OPCODE; + ++callstackCountSamplesCount; + } + + /* + callstackSamples[callstackCountSamplesCount].EventGuid = CSwitchProfileGuid; + callstackSamples[callstackCountSamplesCount].Type = CSwitch::OPCODE; + ++callstackCountSamplesCount; + */ + + + /* + https://msdn.microsoft.com/en-us/library/windows/desktop/dd392328%28v=vs.85%29.aspx?f=255&MSPPError=-2147217396 + Typically, on 64-bit computers, you cannot capture the kernel stack in certain contexts when page faults are not allowed. To enable walking the kernel stack on x64, set + the DisablePagingExecutive Memory Management registry value to 1. The DisablePagingExecutive registry value is located under the following registry key: + HKEY_LOCAL_MACHINE\System\CurrentControlSet\Control\Session Manager\Memory Management + */ + if (callstackCountSamplesCount > 0) + { + status = TraceSetInformation(traceSessionHandle, TraceStackTracingInfo, &callstackSamples[0], sizeof(CLASSIC_EVENT_ID) * callstackCountSamplesCount); + if (status != ERROR_SUCCESS) + { + OPTICK_FAILED("TraceSetInformation - failed"); + return CaptureStatus::ERR_TRACER_FAILED; + } + } + + if (mode & Mode::AUTOSAMPLING) + { + TRACE_PROFILE_INTERVAL itnerval = { 0 }; + memset(&itnerval, 0, sizeof(TRACE_PROFILE_INTERVAL)); + int step = 10000 * 1000 / frequency; // 1ms = 10000 steps + itnerval.Interval = step; // std::max(1221, std::min(step, 10000)); + // The SessionHandle is irrelevant for this information class and must be zero, else the function returns ERROR_INVALID_PARAMETER. + status = TraceSetInformation(0, TraceSampledProfileIntervalInfo, &itnerval, sizeof(TRACE_PROFILE_INTERVAL)); + OPTICK_ASSERT(status == ERROR_SUCCESS, "TraceSetInformation - failed"); + } + + ZeroMemory(&logFile, sizeof(EVENT_TRACE_LOGFILE)); + logFile.LoggerName = KERNEL_LOGGER_NAME; + logFile.ProcessTraceMode = (PROCESS_TRACE_MODE_REAL_TIME | PROCESS_TRACE_MODE_EVENT_RECORD | PROCESS_TRACE_MODE_RAW_TIMESTAMP); + logFile.EventRecordCallback = OnRecordEvent; + logFile.BufferCallback = OnBufferRecord; + openedHandle = OpenTrace(&logFile); + if (openedHandle == INVALID_TRACEHANDLE) + { + OPTICK_FAILED("OpenTrace - failed"); + return CaptureStatus::ERR_TRACER_FAILED; + } + + DWORD threadID; + processThreadHandle = CreateThread(0, 0, RunProcessTraceThreadFunction, this, 0, &threadID); + + isActive = true; + } + + return CaptureStatus::OK; +} +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +bool ETW::Stop() +{ + if (!isActive) + { + return false; + } + + ULONG controlTraceResult = ControlTrace(openedHandle, KERNEL_LOGGER_NAME, traceProperties, EVENT_TRACE_CONTROL_STOP); + + // ERROR_CTX_CLOSE_PENDING(7007L): The call was successful. The ProcessTrace function will stop after it has processed all real-time events in its buffers (it will not receive any new events). + // ERROR_BUSY(170L): Prior to Windows Vista, you cannot close the trace until the ProcessTrace function completes. + // ERROR_INVALID_HANDLE(6L): One of the following is true: TraceHandle is NULL. TraceHandle is INVALID_HANDLE_VALUE. + ULONG closeTraceStatus = CloseTrace(openedHandle); + + // Wait for ProcessThread to finish + WaitForSingleObject(processThreadHandle, INFINITE); + BOOL wasThreadClosed = CloseHandle(processThreadHandle); + + isActive = false; + + //VS TODO: Disabling resolving of the syscalls - we can't use then as EventDescriptions at the moment + //ResolveSysCalls(); + + activeThreadsIDs.clear(); + + return wasThreadClosed && (closeTraceStatus == ERROR_SUCCESS) && (controlTraceResult == ERROR_SUCCESS); +} +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +ETW::~ETW() +{ + Stop(); + Memory::Free(traceProperties); + traceProperties = nullptr; +} +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +Trace* Platform::GetTrace() +{ + return &g_ETW; +} +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +} + + +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +// Symbol Resolving +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +#define USE_DBG_HELP (OPTICK_PC) + +#if USE_DBG_HELP +#include <DbgHelp.h> +#pragma comment( lib, "DbgHelp.Lib" ) +#endif + +#include "optick_serialization.h" + +#if OPTICK_PC +#include <psapi.h> +#else +// Forward declare kernel functions +#pragma pack(push,8) +typedef struct _MODULEINFO { + LPVOID lpBaseOfDll; + DWORD SizeOfImage; + LPVOID EntryPoint; +} MODULEINFO, *LPMODULEINFO; +#pragma pack(pop) +#ifndef EnumProcessModulesEx +#define EnumProcessModulesEx K32EnumProcessModulesEx +EXTERN_C DWORD WINAPI K32EnumProcessModulesEx(HANDLE hProcess, HMODULE *lphModule, DWORD cb, LPDWORD lpcbNeeded, DWORD dwFilterFlag); +#endif +#ifndef GetModuleInformation +#define GetModuleInformation K32GetModuleInformation +EXTERN_C DWORD WINAPI K32GetModuleInformation(HANDLE hProcess, HMODULE hModule, LPMODULEINFO lpmodinfo, DWORD cb); +#endif + +#ifndef GetModuleFileNameExA +#define GetModuleFileNameExA K32GetModuleFileNameExA +EXTERN_C DWORD WINAPI K32GetModuleFileNameExA(HANDLE hProcess, HMODULE hModule, LPSTR lpFilename, DWORD nSize); +#endif +#endif + +namespace Optick +{ +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +//void ReportLastError() +//{ +// LPVOID lpMsgBuf; +// DWORD dw = GetLastError(); +// +// FormatMessage(FORMAT_MESSAGE_ALLOCATE_BUFFER | FORMAT_MESSAGE_FROM_SYSTEM | FORMAT_MESSAGE_IGNORE_INSERTS, +// NULL, dw, MAKELANGID(LANG_NEUTRAL, SUBLANG_DEFAULT), +// (LPTSTR)&lpMsgBuf, 0, NULL); +// +// MessageBox(NULL, (LPCTSTR)lpMsgBuf, TEXT("Error"), MB_OK); +// LocalFree(lpMsgBuf); +//} +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +typedef array<uintptr_t, 512> CallStackBuffer; +typedef unordered_map<uint64, Symbol> SymbolCache; +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +class WinSymbolEngine : public SymbolEngine +{ + HANDLE hProcess; + + bool isInitialized; + + bool needRestorePreviousSettings; + uint32 previousOptions; + static const size_t MAX_SEARCH_PATH_LENGTH = 2048; + char previousSearchPath[MAX_SEARCH_PATH_LENGTH]; + + SymbolCache cache; + vector<Module> modules; + + void InitSystemModules(); + void InitApplicationModules(); +public: + WinSymbolEngine(); + ~WinSymbolEngine(); + + void Init(); + void Close(); + + // Get Symbol from PDB file + virtual const Symbol * GetSymbol(uint64 dwAddress) override; + virtual const vector<Module>& GetModules() override; +}; +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +WinSymbolEngine::WinSymbolEngine() : isInitialized(false), hProcess(GetCurrentProcess()), needRestorePreviousSettings(false), previousOptions(0) +{ +} +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +WinSymbolEngine::~WinSymbolEngine() +{ + Close(); +} +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +const Symbol* WinSymbolEngine::GetSymbol(uint64 address) +{ + if (address == 0) + return nullptr; + + Init(); + + Symbol& symbol = cache[address]; + + if (symbol.address != 0) + return &symbol; + + if (!isInitialized) + return nullptr; + + symbol.address = address; + +#if USE_DBG_HELP + DWORD64 dwAddress = static_cast<DWORD64>(address); + + // FileName and Line + IMAGEHLP_LINEW64 lineInfo; + memset(&lineInfo, 0, sizeof(IMAGEHLP_LINEW64)); + lineInfo.SizeOfStruct = sizeof(lineInfo); + DWORD dwDisp; + if (SymGetLineFromAddrW64(hProcess, dwAddress, &dwDisp, &lineInfo)) + { + symbol.file = lineInfo.FileName; + symbol.line = lineInfo.LineNumber; + } + + const size_t length = (sizeof(SYMBOL_INFOW) + MAX_SYM_NAME * sizeof(WCHAR) + sizeof(ULONG64) - 1) / sizeof(ULONG64) + 1; + + // Function Name + ULONG64 buffer[length]; + PSYMBOL_INFOW dbgSymbol = (PSYMBOL_INFOW)buffer; + memset(dbgSymbol, 0, sizeof(buffer)); + dbgSymbol->SizeOfStruct = sizeof(SYMBOL_INFOW); + dbgSymbol->MaxNameLen = MAX_SYM_NAME; + + DWORD64 offset = 0; + if (SymFromAddrW(hProcess, dwAddress, &offset, dbgSymbol)) + { + symbol.function.resize(dbgSymbol->NameLen); + memcpy(&symbol.function[0], &dbgSymbol->Name[0], sizeof(WCHAR) * dbgSymbol->NameLen); + } + + symbol.offset = static_cast<uintptr_t>(offset); +#endif + + return &symbol; +} +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +const vector<Module>& WinSymbolEngine::GetModules() +{ + if (modules.empty()) + { + InitSystemModules(); + InitApplicationModules(); + } + return modules; +} +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +// const char* USER_SYMBOL_SEARCH_PATH = "http://msdl.microsoft.com/download/symbols"; +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +void WinSymbolEngine::Init() +{ + if (!isInitialized) + { +#if USE_DBG_HELP + previousOptions = SymGetOptions(); + + memset(previousSearchPath, 0, MAX_SEARCH_PATH_LENGTH); + SymGetSearchPath(hProcess, previousSearchPath, MAX_SEARCH_PATH_LENGTH); + + SymSetOptions(SymGetOptions() | SYMOPT_LOAD_LINES | SYMOPT_DEFERRED_LOADS | SYMOPT_UNDNAME | SYMOPT_INCLUDE_32BIT_MODULES | SYMOPT_LOAD_ANYTHING); + if (!SymInitialize(hProcess, NULL, TRUE)) + { + needRestorePreviousSettings = true; + SymCleanup(hProcess); + + if (SymInitialize(hProcess, NULL, TRUE)) + isInitialized = true; + } + else + { + isInitialized = true; + } + + const vector<Module>& loadedModules = GetModules(); + for (size_t i = 0; i < loadedModules.size(); ++i) + { + const Module& module = loadedModules[i]; + SymLoadModule64(hProcess, NULL, module.path.c_str(), NULL, (DWORD64)module.address, (DWORD)module.size); + } + +#else + isInitialized = true; +#endif + } +} +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +typedef DWORD(__stdcall *pZwQuerySystemInformation)(DWORD, LPVOID, DWORD, DWORD*); +#define SystemModuleInformation 11 // SYSTEMINFOCLASS +#define MAXIMUM_FILENAME_LENGTH 256 + +struct SYSTEM_MODULE_INFORMATION +{ + DWORD reserved1; + DWORD reserved2; + PVOID mappedBase; + PVOID imageBase; + DWORD imageSize; + DWORD flags; + WORD loadOrderIndex; + WORD initOrderIndex; + WORD loadCount; + WORD moduleNameOffset; + CHAR imageName[MAXIMUM_FILENAME_LENGTH]; +}; + +#pragma warning (push) +#pragma warning(disable : 4200) +struct MODULE_LIST +{ + DWORD dwModules; + SYSTEM_MODULE_INFORMATION pModulesInfo[]; +}; +#pragma warning (pop) + +void WinSymbolEngine::InitSystemModules() +{ + ULONG returnLength = 0; + ULONG systemInformationLength = 0; + MODULE_LIST* pModuleList = nullptr; + +#pragma warning (push) +#pragma warning(disable : 4191) + pZwQuerySystemInformation ZwQuerySystemInformation = (pZwQuerySystemInformation)GetProcAddress(GetModuleHandle(TEXT("ntdll.dll")), "ZwQuerySystemInformation"); +#pragma warning (pop) + + ZwQuerySystemInformation(SystemModuleInformation, pModuleList, systemInformationLength, &returnLength); + systemInformationLength = returnLength; + pModuleList = (MODULE_LIST*)Memory::Alloc(systemInformationLength); + DWORD status = ZwQuerySystemInformation(SystemModuleInformation, pModuleList, systemInformationLength, &returnLength); + if (status == ERROR_SUCCESS) + { + char systemRootPath[MAXIMUM_FILENAME_LENGTH] = { 0 }; +#if OPTICK_PC + ExpandEnvironmentStringsA("%SystemRoot%", systemRootPath, MAXIMUM_FILENAME_LENGTH); +#else + strcpy_s(systemRootPath, "C:\\Windows"); +#endif + + const char* systemRootPattern = "\\SystemRoot"; + + modules.reserve(modules.size() + pModuleList->dwModules); + + for (uint32_t i = 0; i < pModuleList->dwModules; ++i) + { + SYSTEM_MODULE_INFORMATION& module = pModuleList->pModulesInfo[i]; + + char path[MAXIMUM_FILENAME_LENGTH] = { 0 }; + + if (strstr(module.imageName, systemRootPattern) == module.imageName) + { + strcpy_s(path, systemRootPath); + strcat_s(path, module.imageName + strlen(systemRootPattern)); + } + else + { + strcpy_s(path, module.imageName); + } + + modules.push_back(Module(path, (void*)module.imageBase, module.imageSize)); + } + } + else + { + OPTICK_FAILED("Can't query System Module Information!"); + } + + if (pModuleList) + { + Memory::Free(pModuleList); + } +} +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +void WinSymbolEngine::InitApplicationModules() +{ + HANDLE processHandle = GetCurrentProcess(); + HMODULE hModules[256]; + DWORD modulesSize = 0; + EnumProcessModulesEx(processHandle, hModules, sizeof(hModules), &modulesSize, 0); + + int moduleCount = modulesSize / sizeof(HMODULE); + + modules.reserve(modules.size() + moduleCount); + + for (int i = 0; i < moduleCount; ++i) + { + MODULEINFO info = { 0 }; + if (GetModuleInformation(processHandle, hModules[i], &info, sizeof(MODULEINFO))) + { + char name[MAX_PATH] = "UnknownModule"; + GetModuleFileNameExA(processHandle, hModules[i], name, MAX_PATH); + + modules.push_back(Module(name, info.lpBaseOfDll, info.SizeOfImage)); + } + } +} +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +void WinSymbolEngine::Close() +{ + if (isInitialized) + { +#if USE_DBG_HELP + SymCleanup(hProcess); + if (needRestorePreviousSettings) + { + HANDLE currentProcess = GetCurrentProcess(); + + SymSetOptions(previousOptions); + SymSetSearchPath(currentProcess, previousSearchPath); + SymInitialize(currentProcess, NULL, TRUE); + + needRestorePreviousSettings = false; + } +#endif + modules.clear(); + isInitialized = false; + } +} +////////////////////////////////////////////////////////////////////////// +SymbolEngine* Platform::GetSymbolEngine() +{ + static WinSymbolEngine pdbSymbolEngine; + return &pdbSymbolEngine; +} +////////////////////////////////////////////////////////////////////////// +} +#endif //OPTICK_ENABLE_TRACING +#endif //USE_OPTICK +#endif //_MSC_VER
\ No newline at end of file diff --git a/external/optick/optick_gpu.cpp b/external/optick/optick_gpu.cpp new file mode 100644 index 0000000..d3610c3 --- /dev/null +++ b/external/optick/optick_gpu.cpp @@ -0,0 +1,136 @@ +#include "optick.config.h" + +#if USE_OPTICK +#include "optick_gpu.h" +#include "optick_core.h" +#include "optick_memory.h" + +#include <thread> + +namespace Optick +{ + static_assert((1ULL << 32) % GPUProfiler::MAX_QUERIES_COUNT == 0, "(1 << 32) should be a multiple of MAX_QUERIES_COUNT to handle query index overflow!"); + + + GPUProfiler::GPUProfiler() : currentState(STATE_OFF), currentNode(0), frameNumber(0) + { + + } + + void GPUProfiler::InitNode(const char *nodeName, uint32_t nodeIndex) + { + Node* node = Memory::New<Node>(); + for (int i = 0; i < GPU_QUEUE_COUNT; ++i) + { + char name[128] = { 0 }; + sprintf_s(name, "%s [%s]", nodeName, GetGPUQueueName((GPUQueueType)i)); + node->gpuEventStorage[i] = RegisterStorage(name, uint64_t(-1), ThreadMask::GPU); + node->name = nodeName; + } + nodes[nodeIndex] = node; + } + + void GPUProfiler::Start(uint32 /*mode*/) + { + std::lock_guard<std::recursive_mutex> lock(updateLock); + Reset(); + currentState = STATE_STARTING; + } + + void GPUProfiler::Stop(uint32 /*mode*/) + { + std::lock_guard<std::recursive_mutex> lock(updateLock); + currentState = STATE_OFF; + } + + void GPUProfiler::Dump(uint32 /*mode*/) + { + for (size_t nodeIndex = 0; nodeIndex < nodes.size(); ++nodeIndex) + { + Node* node = nodes[nodeIndex]; + + for (int queueIndex = 0; queueIndex < GPU_QUEUE_COUNT; ++queueIndex) + { + EventBuffer& gpuBuffer = node->gpuEventStorage[queueIndex]->eventBuffer; + + const vector<ThreadEntry*>& threads = Core::Get().GetThreads(); + for (size_t threadIndex = 0; threadIndex < threads.size(); ++threadIndex) + { + ThreadEntry* thread = threads[threadIndex]; + thread->storage.gpuStorage.gpuBuffer[nodeIndex][queueIndex].ForEachChunk([&gpuBuffer](const EventData* events, int count) + { + gpuBuffer.AddRange(events, count); + }); + } + } + } + } + + string GPUProfiler::GetName() const + { + return !nodes.empty() ? nodes[0]->name : string(); + } + + GPUProfiler::~GPUProfiler() + { + for (Node* node : nodes) + Memory::Delete(node); + nodes.clear(); + } + + void GPUProfiler::Reset() + { + for (uint32_t nodeIndex = 0; nodeIndex < nodes.size(); ++nodeIndex) + { + Node& node = *nodes[nodeIndex]; + node.Reset(); + node.clock = GetClockSynchronization(nodeIndex); + } + } + + EventData& GPUProfiler::AddFrameEvent() + { + static const EventDescription* GPUFrameDescription = EventDescription::Create("GPU Frame", __FILE__, __LINE__); + EventData& event = nodes[currentNode]->gpuEventStorage[GPU_QUEUE_GRAPHICS]->eventBuffer.Add(); + event.description = GPUFrameDescription; + event.start = EventTime::INVALID_TIMESTAMP; + event.finish = EventTime::INVALID_TIMESTAMP; + return event; + } + + EventData& GPUProfiler::AddVSyncEvent() + { + static const EventDescription* VSyncDescription = EventDescription::Create("VSync", __FILE__, __LINE__); + EventData& event = nodes[currentNode]->gpuEventStorage[GPU_QUEUE_VSYNC]->eventBuffer.Add(); + event.description = VSyncDescription; + event.start = EventTime::INVALID_TIMESTAMP; + event.finish = EventTime::INVALID_TIMESTAMP; + return event; + } + + TagData<uint32>& GPUProfiler::AddFrameTag() + { + static const EventDescription* FrameTagDescription = EventDescription::CreateShared("Frame"); + TagData<uint32>& tag = nodes[currentNode]->gpuEventStorage[GPU_QUEUE_GRAPHICS]->tagU32Buffer.Add(); + tag.description = FrameTagDescription; + tag.timestamp = EventTime::INVALID_TIMESTAMP; + tag.data = Core::Get().GetCurrentFrame(); + return tag; + } + + const char * GetGPUQueueName(GPUQueueType queue) + { + const char* GPUQueueToName[GPU_QUEUE_COUNT] = { "Graphics", "Compute", "Transfer", "VSync" }; + return GPUQueueToName[queue]; + } + + void GPUProfiler::Node::Reset() + { + queryIndex = 0; + + for (size_t frameIndex = 0; frameIndex < queryGpuframes.size(); ++frameIndex) + queryGpuframes[frameIndex].Reset(); + } +} +#endif //USE_OPTICK + diff --git a/external/optick/optick_gpu.d3d12.cpp b/external/optick/optick_gpu.d3d12.cpp new file mode 100644 index 0000000..1ee4dd9 --- /dev/null +++ b/external/optick/optick_gpu.d3d12.cpp @@ -0,0 +1,382 @@ +#include "optick.config.h" +#if USE_OPTICK +#if OPTICK_ENABLE_GPU_D3D12 + +#include "optick_common.h" +#include "optick_memory.h" +#include "optick_core.h" +#include "optick_gpu.h" + +#include <atomic> +#include <thread> + +#include <d3d12.h> +#include <dxgi.h> +#include <dxgi1_4.h> + + +#define OPTICK_CHECK(args) do { HRESULT __hr = args; (void)__hr; OPTICK_ASSERT(__hr == S_OK, "Failed check"); } while(false); + +namespace Optick +{ + class GPUProfilerD3D12 : public GPUProfiler + { + struct Frame + { + ID3D12CommandAllocator* commandAllocator; + ID3D12GraphicsCommandList* commandList; + + Frame() : commandAllocator(nullptr), commandList(nullptr) + { + Reset(); + } + + void Reset() + { + } + + void Shutdown(); + + ~Frame() + { + Shutdown(); + } + }; + + struct NodePayload + { + ID3D12CommandQueue* commandQueue; + ID3D12QueryHeap* queryHeap; + ID3D12Fence* syncFence; + array<Frame, NUM_FRAMES_DELAY> frames; + + NodePayload() : commandQueue(nullptr), queryHeap(nullptr), syncFence(nullptr) {} + ~NodePayload(); + }; + vector<NodePayload*> nodePayloads; + + ID3D12Resource* queryBuffer; + ID3D12Device* device; + + // VSync Stats + DXGI_FRAME_STATISTICS prevFrameStatistics; + + //void UpdateRange(uint32_t start, uint32_t finish) + void InitNodeInternal(const char* nodeName, uint32_t nodeIndex, ID3D12CommandQueue* pCmdQueue); + + void ResolveTimestamps(uint32_t startIndex, uint32_t count); + + void WaitForFrame(uint64_t frameNumber); + + public: + GPUProfilerD3D12(); + ~GPUProfilerD3D12(); + + void InitDevice(ID3D12Device* pDevice, ID3D12CommandQueue** pCommandQueues, uint32_t numCommandQueues); + + void QueryTimestamp(ID3D12GraphicsCommandList* context, int64_t* outCpuTimestamp); + + void Flip(IDXGISwapChain* swapChain); + + + // Interface implementation + ClockSynchronization GetClockSynchronization(uint32_t nodeIndex) override; + + void QueryTimestamp(void* context, int64_t* outCpuTimestamp) override + { + QueryTimestamp((ID3D12GraphicsCommandList*)context, outCpuTimestamp); + } + + void Flip(void* swapChain) override + { + Flip(static_cast<IDXGISwapChain*>(swapChain)); + } + }; + + template <class T> void SafeRelease(T **ppT) + { + if (*ppT) + { + (*ppT)->Release(); + *ppT = NULL; + } + } + + void InitGpuD3D12(void* device, void** cmdQueues, uint32_t numQueues) + { + GPUProfilerD3D12* gpuProfiler = Memory::New<GPUProfilerD3D12>(); + gpuProfiler->InitDevice((ID3D12Device*)device, (ID3D12CommandQueue**)cmdQueues, numQueues); + Core::Get().InitGPUProfiler(gpuProfiler); + } + + GPUProfilerD3D12::GPUProfilerD3D12() : queryBuffer(nullptr), device(nullptr) + { + prevFrameStatistics = { 0 }; + } + + GPUProfilerD3D12::~GPUProfilerD3D12() + { + WaitForFrame(frameNumber - 1); + + for (NodePayload* payload : nodePayloads) + Memory::Delete(payload); + nodePayloads.clear(); + + for (Node* node : nodes) + Memory::Delete(node); + nodes.clear(); + + SafeRelease(&queryBuffer); + } + + void GPUProfilerD3D12::InitDevice(ID3D12Device* pDevice, ID3D12CommandQueue** pCommandQueues, uint32_t numCommandQueues) + { + device = pDevice; + + uint32_t nodeCount = numCommandQueues; // device->GetNodeCount(); + + nodes.resize(nodeCount); + nodePayloads.resize(nodeCount); + + D3D12_HEAP_PROPERTIES heapDesc; + heapDesc.CPUPageProperty = D3D12_CPU_PAGE_PROPERTY_UNKNOWN; + heapDesc.MemoryPoolPreference = D3D12_MEMORY_POOL_UNKNOWN; + heapDesc.CreationNodeMask = 0; + heapDesc.VisibleNodeMask = (1u << nodeCount) - 1u; + heapDesc.Type = D3D12_HEAP_TYPE_READBACK; + + D3D12_RESOURCE_DESC resourceDesc; + resourceDesc.Dimension = D3D12_RESOURCE_DIMENSION_BUFFER; + resourceDesc.Alignment = 0; + resourceDesc.Width = MAX_QUERIES_COUNT * sizeof(int64_t); + resourceDesc.Height = 1; + resourceDesc.DepthOrArraySize = 1; + resourceDesc.MipLevels = 1; + resourceDesc.Format = DXGI_FORMAT_UNKNOWN; + resourceDesc.SampleDesc.Count = 1; + resourceDesc.SampleDesc.Quality = 0; + resourceDesc.Layout = D3D12_TEXTURE_LAYOUT_ROW_MAJOR; + resourceDesc.Flags = D3D12_RESOURCE_FLAG_NONE; + + OPTICK_CHECK(device->CreateCommittedResource( + &heapDesc, + D3D12_HEAP_FLAG_NONE, + &resourceDesc, + D3D12_RESOURCE_STATE_COPY_DEST, + nullptr, + IID_PPV_ARGS(&queryBuffer))); + + // Get Device Name + LUID adapterLUID = pDevice->GetAdapterLuid(); + + IDXGIFactory4* factory; + OPTICK_CHECK(CreateDXGIFactory2(0, IID_PPV_ARGS(&factory))); + + IDXGIAdapter1* adapter; + factory->EnumAdapterByLuid(adapterLUID, IID_PPV_ARGS(&adapter)); + + DXGI_ADAPTER_DESC1 desc; + adapter->GetDesc1(&desc); + + adapter->Release(); + factory->Release(); + + char deviceName[128] = { 0 }; + wcstombs_s(deviceName, desc.Description, OPTICK_ARRAY_SIZE(deviceName) - 1); + + for (uint32_t nodeIndex = 0; nodeIndex < nodeCount; ++nodeIndex) + InitNodeInternal(deviceName, nodeIndex, pCommandQueues[nodeIndex]); + } + + void GPUProfilerD3D12::InitNodeInternal(const char* nodeName, uint32_t nodeIndex, ID3D12CommandQueue* pCmdQueue) + { + GPUProfiler::InitNode(nodeName, nodeIndex); + + NodePayload* node = Memory::New<NodePayload>(); + nodePayloads[nodeIndex] = node; + node->commandQueue = pCmdQueue; + + D3D12_QUERY_HEAP_DESC queryHeapDesc; + queryHeapDesc.Count = MAX_QUERIES_COUNT; + queryHeapDesc.Type = D3D12_QUERY_HEAP_TYPE_TIMESTAMP; + queryHeapDesc.NodeMask = 1u << nodeIndex; + OPTICK_CHECK(device->CreateQueryHeap(&queryHeapDesc, IID_PPV_ARGS(&node->queryHeap))); + + OPTICK_CHECK(device->CreateFence(0, D3D12_FENCE_FLAG_NONE, IID_PPV_ARGS(&node->syncFence))); + + for (Frame& frame : node->frames) + { + OPTICK_CHECK(device->CreateCommandAllocator(D3D12_COMMAND_LIST_TYPE_DIRECT, IID_PPV_ARGS(&frame.commandAllocator))); + OPTICK_CHECK(device->CreateCommandList(1u << nodeIndex, D3D12_COMMAND_LIST_TYPE_DIRECT, frame.commandAllocator, nullptr, IID_PPV_ARGS(&frame.commandList))); + OPTICK_CHECK(frame.commandList->Close()); + } + } + + void GPUProfilerD3D12::QueryTimestamp(ID3D12GraphicsCommandList* context, int64_t* outCpuTimestamp) + { + if (currentState == STATE_RUNNING) + { + uint32_t index = nodes[currentNode]->QueryTimestamp(outCpuTimestamp); + context->EndQuery(nodePayloads[currentNode]->queryHeap, D3D12_QUERY_TYPE_TIMESTAMP, index); + } + } + + void GPUProfilerD3D12::ResolveTimestamps(uint32_t startIndex, uint32_t count) + { + if (count) + { + Node* node = nodes[currentNode]; + + D3D12_RANGE range = { sizeof(uint64_t)*startIndex, sizeof(uint64_t)*(startIndex + count) }; + void* pData = nullptr; + queryBuffer->Map(0, &range, &pData); + memcpy(&node->queryGpuTimestamps[startIndex], (uint64_t*)pData + startIndex, sizeof(uint64_t) * count); + queryBuffer->Unmap(0, 0); + + // Convert GPU timestamps => CPU Timestamps + for (uint32_t index = startIndex; index < startIndex + count; ++index) + *node->queryCpuTimestamps[index] = node->clock.GetCPUTimestamp(node->queryGpuTimestamps[index]); + } + } + + void GPUProfilerD3D12::WaitForFrame(uint64_t frameNumberToWait) + { + OPTICK_EVENT(); + + NodePayload* payload = nodePayloads[currentNode]; + while (frameNumberToWait > payload->syncFence->GetCompletedValue()) + { + std::this_thread::sleep_for(std::chrono::milliseconds(1)); + } + } + + void GPUProfilerD3D12::Flip(IDXGISwapChain* swapChain) + { + OPTICK_CATEGORY("GPUProfilerD3D12::Flip", Category::Debug); + + std::lock_guard<std::recursive_mutex> lock(updateLock); + + if (currentState == STATE_STARTING) + currentState = STATE_RUNNING; + + if (currentState == STATE_RUNNING) + { + Node& node = *nodes[currentNode]; + NodePayload& payload = *nodePayloads[currentNode]; + + uint32_t currentFrameIndex = frameNumber % NUM_FRAMES_DELAY; + uint32_t nextFrameIndex = (frameNumber + 1) % NUM_FRAMES_DELAY; + + //Frame& currentFrame = frames[frameNumber % NUM_FRAMES_DELAY]; + //Frame& nextFrame = frames[(frameNumber + 1) % NUM_FRAMES_DELAY]; + + QueryFrame& currentFrame = node.queryGpuframes[currentFrameIndex]; + QueryFrame& nextFrame = node.queryGpuframes[nextFrameIndex]; + + ID3D12GraphicsCommandList* commandList = payload.frames[currentFrameIndex].commandList; + ID3D12CommandAllocator* commandAllocator = payload.frames[currentFrameIndex].commandAllocator; + commandAllocator->Reset(); + commandList->Reset(commandAllocator, nullptr); + + if (EventData* frameEvent = currentFrame.frameEvent) + QueryTimestamp(commandList, &frameEvent->finish); + + // Generate GPU Frame event for the next frame + EventData& event = AddFrameEvent(); + QueryTimestamp(commandList, &event.start); + QueryTimestamp(commandList, &AddFrameTag().timestamp); + nextFrame.frameEvent = &event; + + uint32_t queryBegin = currentFrame.queryIndexStart; + uint32_t queryEnd = node.queryIndex; + + if (queryBegin != (uint32_t)-1) + { + OPTICK_ASSERT(queryEnd - queryBegin <= MAX_QUERIES_COUNT, "Too many queries in one frame? Increase GPUProfiler::MAX_QUERIES_COUNT to fix the problem!"); + currentFrame.queryIndexCount = queryEnd - queryBegin; + + uint32_t startIndex = queryBegin % MAX_QUERIES_COUNT; + uint32_t finishIndex = queryEnd % MAX_QUERIES_COUNT; + + if (startIndex < finishIndex) + { + commandList->ResolveQueryData(payload.queryHeap, D3D12_QUERY_TYPE_TIMESTAMP, startIndex, queryEnd - queryBegin, queryBuffer, startIndex * sizeof(int64_t)); + } + else + { + commandList->ResolveQueryData(payload.queryHeap, D3D12_QUERY_TYPE_TIMESTAMP, startIndex, MAX_QUERIES_COUNT - startIndex, queryBuffer, startIndex * sizeof(int64_t)); + commandList->ResolveQueryData(payload.queryHeap, D3D12_QUERY_TYPE_TIMESTAMP, 0, finishIndex, queryBuffer, 0); + } + } + + commandList->Close(); + + payload.commandQueue->ExecuteCommandLists(1, (ID3D12CommandList*const*)&commandList); + payload.commandQueue->Signal(payload.syncFence, frameNumber); + + // Preparing Next Frame + // Try resolve timestamps for the current frame + if (frameNumber >= NUM_FRAMES_DELAY && nextFrame.queryIndexCount) + { + WaitForFrame(frameNumber + 1 - NUM_FRAMES_DELAY); + + uint32_t resolveStart = nextFrame.queryIndexStart % MAX_QUERIES_COUNT; + uint32_t resolveFinish = resolveStart + nextFrame.queryIndexCount; + ResolveTimestamps(resolveStart, std::min<uint32_t>(resolveFinish, MAX_QUERIES_COUNT) - resolveStart); + if (resolveFinish > MAX_QUERIES_COUNT) + ResolveTimestamps(0, resolveFinish - MAX_QUERIES_COUNT); + } + + nextFrame.queryIndexStart = queryEnd; + nextFrame.queryIndexCount = 0; + + // Process VSync + DXGI_FRAME_STATISTICS currentFrameStatistics = { 0 }; + HRESULT result = swapChain->GetFrameStatistics(¤tFrameStatistics); + if ((result == S_OK) && (prevFrameStatistics.PresentCount + 1 == currentFrameStatistics.PresentCount)) + { + EventData& data = AddVSyncEvent(); + data.start = prevFrameStatistics.SyncQPCTime.QuadPart; + data.finish = currentFrameStatistics.SyncQPCTime.QuadPart; + } + prevFrameStatistics = currentFrameStatistics; + } + + ++frameNumber; + } + + GPUProfiler::ClockSynchronization GPUProfilerD3D12::GetClockSynchronization(uint32_t nodeIndex) + { + ClockSynchronization clock; + clock.frequencyCPU = GetHighPrecisionFrequency(); + nodePayloads[nodeIndex]->commandQueue->GetTimestampFrequency((uint64_t*)&clock.frequencyGPU); + nodePayloads[nodeIndex]->commandQueue->GetClockCalibration((uint64_t*)&clock.timestampGPU, (uint64_t*)&clock.timestampCPU); + return clock; + } + + GPUProfilerD3D12::NodePayload::~NodePayload() + { + SafeRelease(&queryHeap); + SafeRelease(&syncFence); + } + + void GPUProfilerD3D12::Frame::Shutdown() + { + SafeRelease(&commandAllocator); + SafeRelease(&commandList); + } +} + +#else +#include "optick_common.h" + +namespace Optick +{ + void InitGpuD3D12(void* /*device*/, void** /*cmdQueues*/, uint32_t /*numQueues*/) + { + OPTICK_FAILED("OPTICK_ENABLE_GPU_D3D12 is disabled! Can't initialize GPU Profiler!"); + } +} + +#endif //OPTICK_ENABLE_GPU_D3D12 +#endif //USE_OPTICK
\ No newline at end of file diff --git a/external/optick/optick_gpu.h b/external/optick/optick_gpu.h new file mode 100644 index 0000000..f028f8a --- /dev/null +++ b/external/optick/optick_gpu.h @@ -0,0 +1,129 @@ +#pragma once +#include "optick.config.h" + +#if USE_OPTICK + +#include <atomic> +#include <mutex> + +#include "optick_common.h" +#include "optick_memory.h" + +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +namespace Optick +{ + const char* GetGPUQueueName(GPUQueueType queue); + //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + class GPUProfiler + { + public: + static const int MAX_FRAME_EVENTS = 1024; + static const int NUM_FRAMES_DELAY = 4; + static const int MAX_QUERIES_COUNT = (2 * MAX_FRAME_EVENTS) * NUM_FRAMES_DELAY; + protected: + + enum State + { + STATE_OFF, + STATE_STARTING, + STATE_RUNNING, + STATE_FINISHING, + }; + + struct ClockSynchronization + { + int64_t frequencyCPU; + int64_t frequencyGPU; + int64_t timestampCPU; + int64_t timestampGPU; + + int64_t GetCPUTimestamp(int64_t gpuTimestamp) + { + return timestampCPU + (gpuTimestamp - timestampGPU) * frequencyCPU / frequencyGPU; + } + + ClockSynchronization() : frequencyCPU(0), frequencyGPU(0), timestampCPU(0), timestampGPU(0) {} + }; + + struct QueryFrame + { + EventData* frameEvent; + uint32_t queryIndexStart; + uint32_t queryIndexCount; + + QueryFrame() + { + Reset(); + } + + void Reset() + { + frameEvent = nullptr; + queryIndexStart = (uint32_t)-1; + queryIndexCount = 0; + } + }; + + struct Node + { + array<QueryFrame, NUM_FRAMES_DELAY> queryGpuframes; + array<int64_t, MAX_QUERIES_COUNT> queryGpuTimestamps; + array<int64_t*, MAX_QUERIES_COUNT> queryCpuTimestamps; + std::atomic<uint32_t> queryIndex; + + ClockSynchronization clock; + + array<EventStorage*, GPU_QUEUE_COUNT> gpuEventStorage; + + uint32_t QueryTimestamp(int64_t* outCpuTimestamp) + { + uint32_t index = queryIndex.fetch_add(1) % MAX_QUERIES_COUNT; + queryCpuTimestamps[index] = outCpuTimestamp; + return index; + } + + string name; + + void Reset(); + + Node() : queryIndex(0) { gpuEventStorage.fill(nullptr); } + }; + + std::recursive_mutex updateLock; + volatile State currentState; + + vector<Node*> nodes; + uint32_t currentNode; + + uint32_t frameNumber; + + void Reset(); + + EventData& AddFrameEvent(); + EventData& AddVSyncEvent(); + TagData<uint32>& AddFrameTag(); + + public: + GPUProfiler(); + + // Init + virtual void InitNode(const char* nodeName, uint32_t nodeIndex); + + // Capture Controls + virtual void Start(uint32 mode); + virtual void Stop(uint32 mode); + virtual void Dump(uint32 mode); + + virtual string GetName() const; + + // Interface to implement + virtual ClockSynchronization GetClockSynchronization(uint32_t nodeIndex) = 0; + virtual void QueryTimestamp(void* context, int64_t* cpuTimestampOut) = 0; + virtual void Flip(void* swapChain) = 0; + + virtual ~GPUProfiler(); + }; + //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +} + +#endif //USE_OPTICK diff --git a/external/optick/optick_gpu.vulkan.cpp b/external/optick/optick_gpu.vulkan.cpp new file mode 100644 index 0000000..6d6f29d --- /dev/null +++ b/external/optick/optick_gpu.vulkan.cpp @@ -0,0 +1,365 @@ +#include "optick.config.h" + +#if USE_OPTICK +#if OPTICK_ENABLE_GPU_VULKAN +#include <vulkan/vulkan.h> + +#include "optick_core.h" +#include "optick_gpu.h" + +#define OPTICK_VK_CHECK(args) do { VkResult __hr = args; OPTICK_ASSERT(__hr == VK_SUCCESS, "Failed check"); (void)__hr; } while(false); + +namespace Optick +{ + class GPUProfilerVulkan : public GPUProfiler + { + protected: + struct Frame + { + VkCommandBuffer commandBuffer; + VkFence fence; + Frame() : commandBuffer(VK_NULL_HANDLE), fence(VK_NULL_HANDLE) {} + }; + + struct NodePayload + { + VkDevice device; + VkPhysicalDevice physicalDevice; + VkQueue queue; + VkQueryPool queryPool; + VkCommandPool commandPool; + + array<Frame, NUM_FRAMES_DELAY> frames; + + NodePayload() : device(VK_NULL_HANDLE), physicalDevice(VK_NULL_HANDLE), queue(VK_NULL_HANDLE), queryPool(VK_NULL_HANDLE), commandPool(VK_NULL_HANDLE) {} + ~NodePayload(); + }; + vector<NodePayload*> nodePayloads; + + void ResolveTimestamps(VkCommandBuffer commandBuffer, uint32_t startIndex, uint32_t count); + void WaitForFrame(uint64_t frameNumber); + + public: + GPUProfilerVulkan(); + ~GPUProfilerVulkan(); + + void InitDevice(VkDevice* devices, VkPhysicalDevice* physicalDevices, VkQueue* cmdQueues, uint32_t* cmdQueuesFamily, uint32_t nodeCount); + void QueryTimestamp(VkCommandBuffer commandBuffer, int64_t* outCpuTimestamp); + + + // Interface implementation + ClockSynchronization GetClockSynchronization(uint32_t nodeIndex) override; + + void QueryTimestamp(void* context, int64_t* outCpuTimestamp) override + { + QueryTimestamp((VkCommandBuffer)context, outCpuTimestamp); + } + + void Flip(void* swapChain) override; + }; + + void InitGpuVulkan(void* vkDevices, void* vkPhysicalDevices, void* vkQueues, uint32_t* cmdQueuesFamily, uint32_t numQueues) + { + GPUProfilerVulkan* gpuProfiler = Memory::New<GPUProfilerVulkan>(); + gpuProfiler->InitDevice((VkDevice*)vkDevices, (VkPhysicalDevice*)vkPhysicalDevices, (VkQueue*)vkQueues, cmdQueuesFamily, numQueues); + Core::Get().InitGPUProfiler(gpuProfiler); + } + + GPUProfilerVulkan::GPUProfilerVulkan() + { + } + + void GPUProfilerVulkan::InitDevice(VkDevice* devices, VkPhysicalDevice* physicalDevices, VkQueue* cmdQueues, uint32_t* cmdQueuesFamily, uint32_t nodeCount) + { + VkQueryPoolCreateInfo queryPoolCreateInfo; + queryPoolCreateInfo.sType = VK_STRUCTURE_TYPE_QUERY_POOL_CREATE_INFO; + queryPoolCreateInfo.pNext = 0; + queryPoolCreateInfo.flags = 0; + queryPoolCreateInfo.queryType = VK_QUERY_TYPE_TIMESTAMP; + queryPoolCreateInfo.queryCount = MAX_QUERIES_COUNT + 1; + + VkCommandPoolCreateInfo commandPoolCreateInfo; + commandPoolCreateInfo.sType = VK_STRUCTURE_TYPE_COMMAND_POOL_CREATE_INFO; + commandPoolCreateInfo.pNext = 0; + commandPoolCreateInfo.flags = VK_COMMAND_POOL_CREATE_TRANSIENT_BIT | VK_COMMAND_POOL_CREATE_RESET_COMMAND_BUFFER_BIT; + + nodes.resize(nodeCount); + nodePayloads.resize(nodeCount); + + VkResult r; + for (uint32_t i = 0; i < nodeCount; ++i) + { + VkPhysicalDeviceProperties properties = { 0 }; + vkGetPhysicalDeviceProperties(physicalDevices[i], &properties); + GPUProfiler::InitNode(properties.deviceName, i); + + NodePayload* nodePayload = Memory::New<NodePayload>(); + nodePayloads[i] = nodePayload; + nodePayload->device = devices[i]; + nodePayload->physicalDevice = physicalDevices[i]; + nodePayload->queue = cmdQueues[i]; + + r = vkCreateQueryPool(devices[i], &queryPoolCreateInfo, 0, &nodePayload->queryPool); + OPTICK_ASSERT(r == VK_SUCCESS, "Failed"); + + commandPoolCreateInfo.queueFamilyIndex = cmdQueuesFamily[i]; + r = vkCreateCommandPool(nodePayload->device, &commandPoolCreateInfo, 0, &nodePayload->commandPool); + OPTICK_ASSERT(r == VK_SUCCESS, "Failed"); + + for (uint32_t j = 0; j < nodePayload->frames.size(); ++j) + { + Frame& frame = nodePayload->frames[j]; + + VkCommandBufferAllocateInfo allocInfo; + allocInfo.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_ALLOCATE_INFO; + allocInfo.pNext = 0; + allocInfo.commandBufferCount = 1; + allocInfo.commandPool = nodePayload->commandPool; + allocInfo.level = VK_COMMAND_BUFFER_LEVEL_PRIMARY; + r = vkAllocateCommandBuffers(nodePayload->device, &allocInfo, &frame.commandBuffer); + OPTICK_ASSERT(r == VK_SUCCESS, "Failed"); + + VkFenceCreateInfo fenceCreateInfo; + fenceCreateInfo.sType = VK_STRUCTURE_TYPE_FENCE_CREATE_INFO; + fenceCreateInfo.pNext = 0; + fenceCreateInfo.flags = j == 0 ? 0 : VK_FENCE_CREATE_SIGNALED_BIT; + r = vkCreateFence(nodePayload->device, &fenceCreateInfo, 0, &frame.fence); + OPTICK_ASSERT(r == VK_SUCCESS, "Failed"); + if (j == 0) + { + VkCommandBufferBeginInfo commandBufferBeginInfo; + commandBufferBeginInfo.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO; + commandBufferBeginInfo.pNext = 0; + commandBufferBeginInfo.pInheritanceInfo = 0; + commandBufferBeginInfo.flags = VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT; + vkBeginCommandBuffer(frame.commandBuffer, &commandBufferBeginInfo); + vkCmdResetQueryPool(frame.commandBuffer, nodePayload->queryPool, 0, MAX_QUERIES_COUNT); + vkEndCommandBuffer(frame.commandBuffer); + + VkSubmitInfo submitInfo = {}; + submitInfo.sType = VK_STRUCTURE_TYPE_SUBMIT_INFO; + submitInfo.pNext = nullptr; + submitInfo.waitSemaphoreCount = 0; + submitInfo.pWaitSemaphores = nullptr; + submitInfo.commandBufferCount = 1; + submitInfo.pCommandBuffers = &frame.commandBuffer; + submitInfo.signalSemaphoreCount = 0; + submitInfo.pSignalSemaphores = nullptr; + vkQueueSubmit(nodePayload->queue, 1, &submitInfo, frame.fence); + vkWaitForFences(nodePayload->device, 1, &frame.fence, 1, (uint64_t)-1); + vkResetCommandBuffer(frame.commandBuffer, VK_COMMAND_BUFFER_RESET_RELEASE_RESOURCES_BIT); + } + } + } + } + + void GPUProfilerVulkan::QueryTimestamp(VkCommandBuffer commandBuffer, int64_t* outCpuTimestamp) + { + if (currentState == STATE_RUNNING) + { + uint32_t index = nodes[currentNode]->QueryTimestamp(outCpuTimestamp); + vkCmdWriteTimestamp(commandBuffer, VK_PIPELINE_STAGE_ALL_GRAPHICS_BIT, nodePayloads[currentNode]->queryPool, index); + } + } + + void GPUProfilerVulkan::ResolveTimestamps(VkCommandBuffer commandBuffer, uint32_t startIndex, uint32_t count) + { + if (count) + { + Node* node = nodes[currentNode]; + + NodePayload* payload = nodePayloads[currentNode]; + + OPTICK_VK_CHECK(vkGetQueryPoolResults(payload->device, payload->queryPool, startIndex, count, 8 * count, &nodes[currentNode]->queryGpuTimestamps[startIndex], 8, VK_QUERY_RESULT_64_BIT)); + vkCmdResetQueryPool(commandBuffer, payload->queryPool, startIndex, count); + + // Convert GPU timestamps => CPU Timestamps + for (uint32_t index = startIndex; index < startIndex + count; ++index) + *node->queryCpuTimestamps[index] = node->clock.GetCPUTimestamp(node->queryGpuTimestamps[index]); + } + } + + void GPUProfilerVulkan::WaitForFrame(uint64_t frameNumberToWait) + { + OPTICK_EVENT(); + + int r = VK_SUCCESS; + do + { + NodePayload& payload = *nodePayloads[currentNode]; + r = vkWaitForFences(nodePayloads[currentNode]->device, 1, &payload.frames[frameNumberToWait % payload.frames.size()].fence, 1, 1000 * 30); + } while (r != VK_SUCCESS); + } + + void GPUProfilerVulkan::Flip(void* /*swapChain*/) + { + OPTICK_CATEGORY("GPUProfilerVulkan::Flip", Category::Debug); + + std::lock_guard<std::recursive_mutex> lock(updateLock); + + if (currentState == STATE_STARTING) + currentState = STATE_RUNNING; + + if (currentState == STATE_RUNNING) + { + Node& node = *nodes[currentNode]; + NodePayload& payload = *nodePayloads[currentNode]; + + uint32_t currentFrameIndex = frameNumber % NUM_FRAMES_DELAY; + uint32_t nextFrameIndex = (frameNumber + 1) % NUM_FRAMES_DELAY; + + QueryFrame& currentFrame = node.queryGpuframes[currentFrameIndex]; + QueryFrame& nextFrame = node.queryGpuframes[nextFrameIndex]; + + VkCommandBuffer commandBuffer = payload.frames[currentFrameIndex].commandBuffer; + VkFence fence = payload.frames[currentFrameIndex].fence; + VkDevice device = payload.device; + VkQueue queue = payload.queue; + + vkWaitForFences(device, 1, &fence, 1, (uint64_t)-1); + + VkCommandBufferBeginInfo commandBufferBeginInfo; + commandBufferBeginInfo.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO; + commandBufferBeginInfo.pNext = 0; + commandBufferBeginInfo.pInheritanceInfo = 0; + commandBufferBeginInfo.flags = VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT; + OPTICK_VK_CHECK(vkBeginCommandBuffer(commandBuffer, &commandBufferBeginInfo)); + vkResetFences(device, 1, &fence); + + if (EventData* frameEvent = currentFrame.frameEvent) + QueryTimestamp(commandBuffer, &frameEvent->finish); + + // Generate GPU Frame event for the next frame + EventData& event = AddFrameEvent(); + QueryTimestamp(commandBuffer, &event.start); + QueryTimestamp(commandBuffer, &AddFrameTag().timestamp); + nextFrame.frameEvent = &event; + + OPTICK_VK_CHECK(vkEndCommandBuffer(commandBuffer)); + VkSubmitInfo submitInfo = {}; + submitInfo.sType = VK_STRUCTURE_TYPE_SUBMIT_INFO; + submitInfo.pNext = nullptr; + submitInfo.waitSemaphoreCount = 0; + submitInfo.pWaitSemaphores = nullptr; + submitInfo.commandBufferCount = 1; + submitInfo.pCommandBuffers = &commandBuffer; + submitInfo.signalSemaphoreCount = 0; + submitInfo.pSignalSemaphores = nullptr; + OPTICK_VK_CHECK(vkQueueSubmit(queue, 1, &submitInfo, fence)); + + uint32_t queryBegin = currentFrame.queryIndexStart; + uint32_t queryEnd = node.queryIndex; + + if (queryBegin != (uint32_t)-1) + { + currentFrame.queryIndexCount = queryEnd - queryBegin; + } + + // Preparing Next Frame + // Try resolve timestamps for the current frame + if (nextFrame.queryIndexStart != (uint32_t)-1) + { + uint32_t startIndex = nextFrame.queryIndexStart % MAX_QUERIES_COUNT; + uint32_t finishIndex = (startIndex + nextFrame.queryIndexCount) % MAX_QUERIES_COUNT; + + if (startIndex < finishIndex) + { + ResolveTimestamps(commandBuffer, startIndex, finishIndex - startIndex); + } + else if (startIndex > finishIndex) + { + ResolveTimestamps(commandBuffer, startIndex, MAX_QUERIES_COUNT - startIndex); + ResolveTimestamps(commandBuffer, 0, finishIndex); + } + } + + nextFrame.queryIndexStart = queryEnd; + nextFrame.queryIndexCount = 0; + } + + ++frameNumber; + } + + GPUProfiler::ClockSynchronization GPUProfilerVulkan::GetClockSynchronization(uint32_t nodeIndex) + { + GPUProfiler::ClockSynchronization clock; + + NodePayload& node = *nodePayloads[nodeIndex]; + Frame& currentFrame = node.frames[frameNumber % NUM_FRAMES_DELAY]; + + VkCommandBufferBeginInfo commandBufferBeginInfo; + commandBufferBeginInfo.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO; + commandBufferBeginInfo.pNext = 0; + commandBufferBeginInfo.pInheritanceInfo = 0; + commandBufferBeginInfo.flags = VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT; + VkCommandBuffer CB = currentFrame.commandBuffer; + VkDevice Device = node.device; + VkFence Fence = currentFrame.fence; + + vkWaitForFences(Device, 1, &Fence, 1, (uint64_t)-1); + vkResetFences(Device, 1, &Fence); + vkResetCommandBuffer(CB, VK_COMMAND_BUFFER_RESET_RELEASE_RESOURCES_BIT); + vkBeginCommandBuffer(CB, &commandBufferBeginInfo); + vkCmdResetQueryPool(CB, nodePayloads[nodeIndex]->queryPool, 0, 1); + vkCmdWriteTimestamp(CB, VK_PIPELINE_STAGE_ALL_GRAPHICS_BIT, nodePayloads[nodeIndex]->queryPool, 0); + vkEndCommandBuffer(CB); + + VkSubmitInfo submitInfo = {}; + submitInfo.sType = VK_STRUCTURE_TYPE_SUBMIT_INFO; + submitInfo.pNext = nullptr; + submitInfo.waitSemaphoreCount = 0; + submitInfo.pWaitSemaphores = nullptr; + submitInfo.commandBufferCount = 1; + submitInfo.pCommandBuffers = &CB; + submitInfo.signalSemaphoreCount = 0; + submitInfo.pSignalSemaphores = nullptr; + vkQueueSubmit(nodePayloads[nodeIndex]->queue, 1, &submitInfo, Fence); + vkWaitForFences(Device, 1, &Fence, 1, (uint64_t)-1); + + clock.timestampGPU = 0; + vkGetQueryPoolResults(Device, nodePayloads[nodeIndex]->queryPool, 0, 1, 8, &clock.timestampGPU, 8, VK_QUERY_RESULT_64_BIT); + clock.timestampCPU = GetHighPrecisionTime(); + clock.frequencyCPU = GetHighPrecisionFrequency(); + + VkPhysicalDeviceProperties Properties; + vkGetPhysicalDeviceProperties(nodePayloads[nodeIndex]->physicalDevice, &Properties); + clock.frequencyGPU = (uint64_t)(1000000000ll / Properties.limits.timestampPeriod); + + return clock; + } + + GPUProfilerVulkan::NodePayload::~NodePayload() + { + vkDestroyCommandPool(device, commandPool, nullptr); + vkDestroyQueryPool(device, queryPool, nullptr); + } + + GPUProfilerVulkan::~GPUProfilerVulkan() + { + WaitForFrame(frameNumber - 1); + + for (NodePayload* payload : nodePayloads) + { + for (Frame& frame : payload->frames) + { + vkDestroyFence(payload->device, frame.fence, nullptr); + vkFreeCommandBuffers(payload->device, payload->commandPool, 1, &frame.commandBuffer); + } + + Memory::Delete(payload); + } + + nodePayloads.clear(); + } +} +#else +#include "optick_common.h" +namespace Optick +{ + void InitGpuVulkan(void* /*devices*/, void* /*physicalDevices*/, void* /*cmdQueues*/, uint32_t* /*cmdQueuesFamily*/, uint32_t /*numQueues*/) + { + OPTICK_FAILED("OPTICK_ENABLE_GPU_VULKAN is disabled! Can't initialize GPU Profiler!"); + } +} +#endif //OPTICK_ENABLE_GPU_D3D12 +#endif //USE_OPTICK
\ No newline at end of file diff --git a/external/optick/optick_memory.h b/external/optick/optick_memory.h new file mode 100644 index 0000000..45249c6 --- /dev/null +++ b/external/optick/optick_memory.h @@ -0,0 +1,419 @@ +#pragma once + +#include "optick_common.h" + +#if USE_OPTICK + +#include <cstring> +#include <new> +#include <stdlib.h> +#include <atomic> + +#include <array> +#include <list> +#include <string> +#include <sstream> +#include <unordered_set> +#include <unordered_map> +#include <vector> + +namespace Optick +{ + class Memory + { + struct Header + { + uint64_t size; + }; + + static std::atomic<uint64_t> memAllocated; + + static void* (*allocate)(size_t); + static void (*deallocate)(void* p); + public: + static OPTICK_INLINE void* Alloc(size_t size) + { + size_t totalSize = size + sizeof(Header); + void *ptr = allocate(totalSize); + OPTICK_VERIFY(ptr, "Can't allocate memory", return nullptr); + + Header* header = (Header*)ptr; + header->size = totalSize; + memAllocated += totalSize; + + return (uint8_t*)ptr + sizeof(Header); + } + + static OPTICK_INLINE void Free(void* p) + { + if (p != nullptr) + { + uint8_t* basePtr = (uint8_t*)p - sizeof(Header); + Header* header = (Header*)basePtr; + memAllocated -= header->size; + deallocate(basePtr); + } + } + + static OPTICK_INLINE size_t GetAllocatedSize() + { + return (size_t)memAllocated; + } + + template<class T> + static T* New() + { + return new (Memory::Alloc(sizeof(T))) T(); + } + + template<class T, class P1> + static T* New(P1 p1) + { + return new (Memory::Alloc(sizeof(T))) T(p1); + } + + template<class T, class P1, class P2> + static T* New(P1 p1, P2 p2) + { + return new (Memory::Alloc(sizeof(T))) T(p1, p2); + } + + template<class T> + static void Delete(T* p) + { + if (p) + { + p->~T(); + Free(p); + } + } + + static void SetAllocator(void* (*allocateFn)(size_t), void(*deallocateFn)(void*)) + { + allocate = allocateFn; + deallocate = deallocateFn; + } + + template<typename T> + struct Allocator : public std::allocator<T> + { + Allocator() {} + template<class U> + Allocator(const Allocator<U>&) {} + template<typename U> struct rebind { typedef Allocator<U> other; }; + + typename std::allocator<T>::pointer allocate(typename std::allocator<T>::size_type n, typename std::allocator<void>::const_pointer = 0) + { + return reinterpret_cast<typename std::allocator<T>::pointer>(Memory::Alloc(n * sizeof(T))); + } + + void deallocate(typename std::allocator<T>::pointer p, typename std::allocator<T>::size_type) + { + Memory::Free(p); + } + }; + }; + + // std::* section + template <typename T, size_t _Size> class array : public std::array<T, _Size>{}; + template <typename T> class vector : public std::vector<T, Memory::Allocator<T>>{}; + template <typename T> class list : public std::list<T, Memory::Allocator<T>>{}; + template <typename T> class unordered_set : public std::unordered_set<T, std::hash<T>, std::equal_to<T>, Memory::Allocator<T>>{}; + template <typename T, typename V> class unordered_map : public std::unordered_map<T, V, std::hash<T>, std::equal_to<T>, Memory::Allocator<std::pair<const T, V>>>{}; + + using string = std::basic_string<char, std::char_traits<char>, Memory::Allocator<char>>; + using wstring = std::basic_string<wchar_t, std::char_traits<wchar_t>, Memory::Allocator<wchar_t>>; + + using istringstream = std::basic_istringstream<char, std::char_traits<char>, Memory::Allocator<char>>; + using ostringstream = std::basic_ostringstream<char, std::char_traits<char>, Memory::Allocator<char>>; + using stringstream = std::basic_stringstream<char, std::char_traits<char>, Memory::Allocator<char>>; + + //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + template<class T, uint32 SIZE> + struct MemoryChunk + { + OPTICK_ALIGN_CACHE T data[SIZE]; + MemoryChunk* next; + MemoryChunk* prev; + + MemoryChunk() : next(0), prev(0) {} + + ~MemoryChunk() + { + MemoryChunk* chunk = this; + while (chunk->next) + chunk = chunk->next; + + while (chunk != this) + { + MemoryChunk* toDelete = chunk; + chunk = toDelete->prev; + Memory::Delete(toDelete); + } + + if (prev != nullptr) + { + prev->next = nullptr; + prev = nullptr; + } + } + }; + //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + template<class T, uint32 SIZE = 16> + class MemoryPool + { + typedef MemoryChunk<T, SIZE> Chunk; + Chunk* root; + Chunk* chunk; + uint32 index; + + OPTICK_INLINE void AddChunk() + { + index = 0; + if (!chunk || !chunk->next) + { + Chunk* newChunk = Memory::New<Chunk>(); + if (chunk) + { + chunk->next = newChunk; + newChunk->prev = chunk; + chunk = newChunk; + } + else + { + root = chunk = newChunk; + } + } + else + { + chunk = chunk->next; + } + } + public: + MemoryPool() : root(nullptr), chunk(nullptr), index(SIZE) {} + + OPTICK_INLINE T& Add() + { + if (index >= SIZE) + AddChunk(); + + return chunk->data[index++]; + } + + OPTICK_INLINE T& Add(const T& item) + { + return Add() = item; + } + + OPTICK_INLINE T* AddRange(const T* items, size_t count, bool allowOverlap = true) + { + if (count == 0 || (count > SIZE && !allowOverlap)) + return nullptr; + + if (count >= (SIZE - index) && !allowOverlap) + { + AddChunk(); + } + + T* result = &chunk->data[index]; + + while (count) + { + size_t numLeft = SIZE - index; + size_t numCopy = numLeft < count ? numLeft : count; + std::memcpy(&chunk->data[index], items, sizeof(T) * numCopy); + + count -= numCopy; + items += numCopy; + index += (uint32_t)numCopy; + + if (count) + AddChunk(); + } + + return result; + } + + + OPTICK_INLINE T* TryAdd(int count) + { + if (index + count <= SIZE) + { + T* res = &chunk->data[index]; + index += count; + return res; + } + + return nullptr; + } + + OPTICK_INLINE T* Back() + { + if (chunk && index > 0) + return &chunk->data[index - 1]; + + if (chunk && chunk->prev != nullptr) + return &chunk->prev->data[SIZE - 1]; + + return nullptr; + } + + OPTICK_INLINE size_t Size() const + { + if (root == nullptr) + return 0; + + size_t count = 0; + + for (const Chunk* it = root; it != chunk; it = it->next) + count += SIZE; + + return count + index; + } + + OPTICK_INLINE bool IsEmpty() const + { + return (chunk == nullptr) || (chunk == root && index == 0); + } + + OPTICK_INLINE void Clear(bool preserveMemory = true) + { + if (!preserveMemory) + { + if (root) + { + Memory::Delete(root); + root = nullptr; + chunk = nullptr; + index = SIZE; + } + } + else if (root) + { + index = 0; + chunk = root; + } + } + + class const_iterator + { + void advance() + { + if (chunkIndex < SIZE - 1) + { + ++chunkIndex; + } + else + { + chunkPtr = chunkPtr->next; + chunkIndex = 0; + } + } + public: + typedef const_iterator self_type; + typedef T value_type; + typedef T& reference; + typedef T* pointer; + typedef int difference_type; + const_iterator(const Chunk* ptr, size_t index) : chunkPtr(ptr), chunkIndex(index) { } + self_type operator++() + { + self_type i = *this; + advance(); + return i; + } + self_type operator++(int /*junk*/) + { + advance(); + return *this; + } + reference operator*() { return (reference)chunkPtr->data[chunkIndex]; } + const pointer operator->() { return &chunkPtr->data[chunkIndex]; } + bool operator==(const self_type& rhs) { return (chunkPtr == rhs.chunkPtr) && (chunkIndex == rhs.chunkIndex); } + bool operator!=(const self_type& rhs) { return (chunkPtr != rhs.chunkPtr) || (chunkIndex != rhs.chunkIndex); } + private: + const Chunk* chunkPtr; + size_t chunkIndex; + }; + + const_iterator begin() const + { + return const_iterator(root, 0); + } + + const_iterator end() const + { + return const_iterator(chunk, index); + } + + template<class Func> + void ForEach(Func func) const + { + for (const Chunk* it = root; it != chunk; it = it->next) + for (uint32 i = 0; i < SIZE; ++i) + func(it->data[i]); + + if (chunk) + for (uint32 i = 0; i < index; ++i) + func(chunk->data[i]); + } + + template<class Func> + void ForEach(Func func) + { + for (Chunk* it = root; it != chunk; it = it->next) + for (uint32 i = 0; i < SIZE; ++i) + func(it->data[i]); + + if (chunk) + for (uint32 i = 0; i < index; ++i) + func(chunk->data[i]); + } + + template<class Func> + void ForEachChunk(Func func) const + { + for (const Chunk* it = root; it != chunk; it = it->next) + func(it->data, SIZE); + + if (chunk) + func(chunk->data, index); + } + + void ToArray(T* destination) const + { + uint32 curIndex = 0; + + for (const Chunk* it = root; it != chunk; it = it->next) + { + memcpy(&destination[curIndex], it->data, sizeof(T) * SIZE); + curIndex += SIZE; + } + + if (chunk && index > 0) + { + memcpy(&destination[curIndex], chunk->data, sizeof(T) * index); + } + } + }; + //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + template<uint32 CHUNK_SIZE> + class MemoryBuffer : private MemoryPool<uint8, CHUNK_SIZE> + { + public: + template<class U> + U* Add(U* data, size_t size, bool allowOverlap = true) + { + return (U*)(MemoryPool<uint8, CHUNK_SIZE>::AddRange((uint8*)data, size, allowOverlap)); + } + + template<class T> + T* Add(const T& val, bool allowOverlap = true) + { + return static_cast<T*>(Add(&val, sizeof(T), allowOverlap)); + } + }; + //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +} + +#endif //USE_OPTICK
\ No newline at end of file diff --git a/external/optick/optick_message.cpp b/external/optick/optick_message.cpp new file mode 100644 index 0000000..b421d50 --- /dev/null +++ b/external/optick/optick_message.cpp @@ -0,0 +1,172 @@ +#include "optick.config.h" + +#if USE_OPTICK +#include "optick_common.h" +#include "optick_core.h" +#include "optick_message.h" +#include "optick_server.h" + +namespace Optick +{ +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +struct MessageHeader +{ + uint32 mark; + uint32 length; + + static const uint32 MESSAGE_MARK = 0xB50FB50F; + + bool IsValid() const { return mark == MESSAGE_MARK; } + + MessageHeader() : mark(0), length(0) {} +}; +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +class MessageFactory +{ + typedef IMessage* (*MessageCreateFunction)(InputDataStream& str); + MessageCreateFunction factory[IMessage::COUNT]; + + template<class T> + void RegisterMessage() + { + factory[T::GetMessageType()] = T::Create; + } + + MessageFactory() + { + memset(&factory[0], 0, sizeof(MessageCreateFunction)); + + RegisterMessage<StartMessage>(); + RegisterMessage<StopMessage>(); + RegisterMessage<CancelMessage>(); + RegisterMessage<TurnSamplingMessage>(); + + for (uint32 msg = 0; msg < IMessage::COUNT; ++msg) + { + OPTICK_ASSERT(factory[msg] != nullptr, "Message is not registered to factory"); + } + } +public: + static MessageFactory& Get() + { + static MessageFactory instance; + return instance; + } + + IMessage* Create(InputDataStream& str) + { + MessageHeader header; + str.Read(header); + + size_t length = str.Length(); + + uint16 applicationID = 0; + uint16 messageType = IMessage::COUNT; + + str >> applicationID; + str >> messageType; + + OPTICK_VERIFY( 0 <= messageType && messageType < IMessage::COUNT && factory[messageType] != nullptr, "Unknown message type!", return nullptr ) + + IMessage* result = factory[messageType](str); + + if (header.length + str.Length() != length) + { + OPTICK_FAILED("Message Stream is corrupted! Invalid Protocol?") + return nullptr; + } + + return result; + } +}; +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +OutputDataStream& operator<<(OutputDataStream& os, const DataResponse& val) +{ + return os << val.version << (uint32)val.type; +} +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + +IMessage* IMessage::Create(InputDataStream& str) +{ + MessageHeader header; + + while (str.Peek(header)) + { + if (header.IsValid()) + { + if (str.Length() < header.length + sizeof(MessageHeader)) + break; // Not enough data yet + + return MessageFactory::Get().Create(str); + } + else + { + // Some garbage in the stream? + str.Skip(1); + } + } + + return nullptr; +} +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +void StartMessage::Apply() +{ + Core& core = Core::Get(); + core.SetSettings(settings); + core.StartCapture(); +} +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +IMessage* StartMessage::Create(InputDataStream& stream) +{ + StartMessage* msg = Memory::New<StartMessage>(); + CaptureSettings& settings = msg->settings; + stream >> settings.mode + >> settings.categoryMask + >> settings.samplingFrequency + >> settings.frameLimit + >> settings.timeLimitUs + >> settings.spikeLimitUs + >> settings.memoryLimitMb; + string password; + stream >> settings.password; + settings.password = base64_decode(password); + return msg; +} + +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +void StopMessage::Apply() +{ + Core::Get().DumpCapture(); +} +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +IMessage* StopMessage::Create(InputDataStream&) +{ + return Memory::New<StopMessage>(); +} +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +void CancelMessage::Apply() +{ + Core::Get().CancelCapture(); +} +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +IMessage* CancelMessage::Create(InputDataStream&) +{ + return Memory::New<CancelMessage>(); +} +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +IMessage* TurnSamplingMessage::Create( InputDataStream& stream ) +{ + TurnSamplingMessage* msg = Memory::New<TurnSamplingMessage>(); + stream >> msg->index; + stream >> msg->isSampling; + return msg; +} +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +void TurnSamplingMessage::Apply() +{ + // Backward compatibility +} +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +} + +#endif //USE_OPTICK
\ No newline at end of file diff --git a/external/optick/optick_message.h b/external/optick/optick_message.h new file mode 100644 index 0000000..a6d553e --- /dev/null +++ b/external/optick/optick_message.h @@ -0,0 +1,130 @@ +#pragma once +#include "optick.config.h" + +#if USE_OPTICK + +#include "optick_common.h" +#include "optick_serialization.h" + +namespace Optick +{ +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +static const uint32 NETWORK_PROTOCOL_VERSION = 24; +static const uint16 NETWORK_APPLICATION_ID = 0xB50F; +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +struct DataResponse +{ + enum Type : uint16 + { + FrameDescriptionBoard = 0, // DescriptionBoard for Instrumental Frames + EventFrame = 1, // Instrumental Data + SamplingFrame = 2, // Sampling Data + NullFrame = 3, // Last Fame Mark + ReportProgress = 4, // Report Current Progress + Handshake = 5, // Handshake Response + Reserved_0 = 6, + SynchronizationData = 7, // Synchronization Data for the thread + TagsPack = 8, // Pack of tags + CallstackDescriptionBoard = 9, // DescriptionBoard with resolved function addresses + CallstackPack = 10, // Pack of CallStacks + Reserved_1 = 11, + Reserved_2 = 12, + Reserved_3 = 13, + Reserved_4 = 14, + //... + Reserved_255 = 255, + + FiberSynchronizationData = 1 << 8, // Synchronization Data for the Fibers + SyscallPack, + SummaryPack, + }; + + uint32 version; + uint32 size; + Type type; + uint16 application; + + DataResponse(Type t, uint32 s) : version(NETWORK_PROTOCOL_VERSION), size(s), type(t), application(NETWORK_APPLICATION_ID){} +}; +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +OutputDataStream& operator << (OutputDataStream& os, const DataResponse& val); +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +class IMessage +{ +public: + enum Type : uint16 + { + Start, + Stop, + Cancel, + TurnSampling, + COUNT, + }; + + virtual void Apply() = 0; + virtual ~IMessage() {} + + static IMessage* Create( InputDataStream& str ); +}; +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +template<IMessage::Type MESSAGE_TYPE> +class Message : public IMessage +{ + enum { id = MESSAGE_TYPE }; +public: + static uint32 GetMessageType() { return id; } +}; +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +struct CaptureSettings +{ + // Capture Mode + uint32 mode; + // Category Filter + uint32 categoryMask; + // Tracer: Sampling Frequency + uint32 samplingFrequency; + // Max Duration for a capture (frames) + uint32 frameLimit; + // Max Duration for a capture (us) + uint32 timeLimitUs; + // Max Duration for a capture (us) + uint32 spikeLimitUs; + // Max Memory for a capture (MB) + uint64 memoryLimitMb; + // Tracer: Root Password for the Device + string password; + + CaptureSettings() : mode(0), categoryMask(0), samplingFrequency(0), frameLimit(0), timeLimitUs(0), spikeLimitUs(0), memoryLimitMb(0) {} +}; +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +struct StartMessage : public Message<IMessage::Start> +{ + CaptureSettings settings; + static IMessage* Create(InputDataStream&); + virtual void Apply() override; +}; +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +struct StopMessage : public Message<IMessage::Stop> +{ + static IMessage* Create(InputDataStream&); + virtual void Apply() override; +}; +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +struct CancelMessage : public Message<IMessage::Cancel> +{ + static IMessage* Create(InputDataStream&); + virtual void Apply() override; +}; +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +struct TurnSamplingMessage : public Message<IMessage::TurnSampling> +{ + int32 index; + byte isSampling; + + static IMessage* Create(InputDataStream& stream); + virtual void Apply() override; +}; +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +} + +#endif //USE_OPTICK
\ No newline at end of file diff --git a/external/optick/optick_serialization.cpp b/external/optick/optick_serialization.cpp new file mode 100644 index 0000000..a47a9b7 --- /dev/null +++ b/external/optick/optick_serialization.cpp @@ -0,0 +1,178 @@ +#include "optick.config.h" + +#if USE_OPTICK + +#include "optick_common.h" +#include "optick_serialization.h" + +namespace Optick +{ + string OutputDataStream::GetData() + { + flush(); + return str(); + } + + OutputDataStream & OutputDataStream::Write(const char * buffer, size_t size) + { + write(buffer, size); + return *this; + } + + OutputDataStream OutputDataStream::Empty; + + OutputDataStream &operator << ( OutputDataStream &stream, const char* val ) + { + uint32 length = val == nullptr ? 0 : (uint32)strlen(val); + stream << length; + + if (length > 0) + { + stream.write( val, length ); + } + return stream; + } + + OutputDataStream &operator << ( OutputDataStream &stream, int val ) + { + stream.write( (char*)&val, sizeof(int) ); + return stream; + } + + OutputDataStream &operator << ( OutputDataStream &stream, int64 val ) + { + stream.write( (char*)&val, sizeof(int64) ); + return stream; + } + + OutputDataStream &operator << ( OutputDataStream &stream, char val ) + { + stream.write( (char*)&val, sizeof(char) ); + return stream; + } + + OutputDataStream &operator << (OutputDataStream &stream, int8 val) + { + stream.write((char*)&val, sizeof(val)); + return stream; + } + + OutputDataStream &operator << ( OutputDataStream &stream, byte val ) + { + stream.write( (char*)&val, sizeof(byte) ); + return stream; + } + + OutputDataStream & operator<<(OutputDataStream &stream, uint64 val) + { + stream.write( (char*)&val, sizeof(uint64) ); + return stream; + } + + OutputDataStream & operator<<(OutputDataStream &stream, uint32 val) + { + stream.write( (char*)&val, sizeof(uint32) ); + return stream; + } + + OutputDataStream & operator<<(OutputDataStream &stream, float val) + { + stream.write((char*)&val, sizeof(float)); + return stream; + } + + OutputDataStream & operator<<(OutputDataStream &stream, const string& val) + { + stream << (uint32)val.length(); + if (!val.empty()) + stream.write(&val[0], sizeof(val[0]) * val.length()); + return stream; + } + + OutputDataStream & operator<<(OutputDataStream &stream, const wstring& val) + { + size_t count = val.length() * sizeof(wchar_t); + stream << (uint32)count; + if (!val.empty()) + stream.write((char*)(&val[0]), count); + return stream; + } + + InputDataStream &operator >> (InputDataStream &stream, int16 &val) + { + stream.read((char*)&val, sizeof(int16)); + return stream; + } + + InputDataStream &operator >> ( InputDataStream &stream, int32 &val ) + { + stream.read( (char*)&val, sizeof(int) ); + return stream; + } + + InputDataStream &operator >> ( InputDataStream &stream, int64 &val ) + { + stream.read( (char*)&val, sizeof(int64) ); + return stream; + } + + InputDataStream & operator>>( InputDataStream &stream, byte &val ) + { + stream.read( (char*)&val, sizeof(byte) ); + return stream; + } + + InputDataStream & operator >> (InputDataStream &stream, uint16 &val) + { + stream.read((char*)&val, sizeof(uint16)); + return stream; + } + + InputDataStream & operator>>( InputDataStream &stream, uint32 &val ) + { + stream.read( (char*)&val, sizeof(uint32) ); + return stream; + } + + InputDataStream & operator>>( InputDataStream &stream, uint64 &val ) + { + stream.read( (char*)&val, sizeof(uint64) ); + return stream; + } + + InputDataStream & operator >> ( InputDataStream &stream, string &val) + { + int32 length = 0; + stream >> length; + val.resize(length + 1); + stream.read( (char*)&val[0], length); + return stream; + } + + InputDataStream::InputDataStream() : + stringstream( ios_base::in | ios_base::out ) + { + } + + void InputDataStream::Append(const char *buffer, size_t length) + { + write( buffer, length ); + } + + size_t InputDataStream::Length() + { + return (size_t)(tellp() - tellg()); + } + + bool InputDataStream::Skip(size_t length) + { + bool result = Length() <= length; + seekg(length, ios_base::cur); + return result; + } + + + +} + +#endif //USE_OPTICK
\ No newline at end of file diff --git a/external/optick/optick_serialization.h b/external/optick/optick_serialization.h new file mode 100644 index 0000000..91de32d --- /dev/null +++ b/external/optick/optick_serialization.h @@ -0,0 +1,120 @@ +#pragma once +#include "optick_common.h" + +#if USE_OPTICK +#include "optick_memory.h" + +#if defined(OPTICK_MSVC) +#pragma warning( push ) + +//C4250. inherits 'std::basic_ostream' +#pragma warning( disable : 4250 ) + +//C4127. Conditional expression is constant +#pragma warning( disable : 4127 ) +#endif + +namespace Optick +{ + class OutputDataStream : private ostringstream + { + public: + static OutputDataStream Empty; + // Move constructor rocks! + // Beware of one copy here(do not use it in performance critical parts) + string GetData(); + + // It is important to make private inheritance in order to avoid collision with default operator implementation + friend OutputDataStream &operator << ( OutputDataStream &stream, const char* val ); + friend OutputDataStream &operator << ( OutputDataStream &stream, int val ); + friend OutputDataStream &operator << ( OutputDataStream &stream, uint64 val ); + friend OutputDataStream &operator << ( OutputDataStream &stream, uint32 val ); + friend OutputDataStream &operator << ( OutputDataStream &stream, int64 val ); + friend OutputDataStream &operator << ( OutputDataStream &stream, char val ); + friend OutputDataStream &operator << ( OutputDataStream &stream, byte val ); + friend OutputDataStream &operator << ( OutputDataStream &stream, int8 val); + friend OutputDataStream &operator << ( OutputDataStream &stream, float val); + friend OutputDataStream &operator << ( OutputDataStream &stream, const string& val ); + friend OutputDataStream &operator << ( OutputDataStream &stream, const wstring& val ); + + OutputDataStream& Write(const char* buffer, size_t size); + }; + + template<class T> + OutputDataStream& operator<<(OutputDataStream &stream, const vector<T>& val) + { + stream << (uint32)val.size(); + + for(auto it = val.begin(); it != val.end(); ++it) + { + const T& element = *it; + stream << element; + } + + return stream; + } + + template<class T, uint32 N> + OutputDataStream& operator<<(OutputDataStream &stream, const MemoryPool<T, N>& val) + { + stream << (uint32)val.Size(); + + val.ForEach([&](const T& data) + { + stream << data; + }); + + return stream; + } + + //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + class InputDataStream : private stringstream { + public: + bool CanRead() { return !eof(); } + + InputDataStream(); + + void Append(const char *buffer, size_t length); + bool Skip(size_t length); + size_t Length(); + + template<class T> + bool Peek(T& data) + { + if (Length() < sizeof(T)) + return false; + + pos_type currentPos = tellg(); + read((char*)&data, sizeof(T)); + seekg(currentPos); + return true; + } + + template<class T> + bool Read(T& data) + { + if (Length() < sizeof(T)) + return false; + + read((char*)&data, sizeof(T)); + return true; + } + + friend InputDataStream &operator >> (InputDataStream &stream, byte &val ); + friend InputDataStream &operator >> (InputDataStream &stream, int16 &val); + friend InputDataStream &operator >> (InputDataStream &stream, uint16 &val); + friend InputDataStream &operator >> (InputDataStream &stream, int32 &val ); + friend InputDataStream &operator >> (InputDataStream &stream, uint32 &val ); + friend InputDataStream &operator >> (InputDataStream &stream, int64 &val ); + friend InputDataStream &operator >> (InputDataStream &stream, uint64 &val ); + friend InputDataStream &operator >> (InputDataStream &stream, string &val); + }; + + +} + +#if defined(OPTICK_MSVC) +#pragma warning( pop ) +#endif + +#endif //USE_OPTICK
\ No newline at end of file diff --git a/external/optick/optick_server.cpp b/external/optick/optick_server.cpp new file mode 100644 index 0000000..7596d3a --- /dev/null +++ b/external/optick/optick_server.cpp @@ -0,0 +1,338 @@ +#include "optick.config.h" + +#if USE_OPTICK +#include "optick_server.h" +#include "optick_common.h" + +#if defined(OPTICK_MSVC) +#define USE_WINDOWS_SOCKETS (1) +#else +#define USE_BERKELEY_SOCKETS (1) +#endif +#define SOCKET_PROTOCOL_TCP (6) +#if defined(USE_BERKELEY_SOCKETS) +#include <sys/types.h> +#include <sys/socket.h> +#include <netinet/in.h> +#include <unistd.h> +#include <fcntl.h> +#include <limits.h> +typedef int TcpSocket; +#elif defined(USE_WINDOWS_SOCKETS) +#include <winsock2.h> +#include <basetsd.h> +typedef UINT_PTR TcpSocket; +#else +#error Platform not supported +#endif + + +#if defined(OPTICK_MSVC) +#pragma comment( lib, "ws2_32.lib" ) +#endif + +namespace Optick +{ +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +static const short DEFAULT_PORT = 31318; +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +#if defined(USE_WINDOWS_SOCKETS) +class Wsa +{ + bool isInitialized; + WSADATA data; + + Wsa() + { + isInitialized = WSAStartup(0x0202, &data) == ERROR_SUCCESS; + OPTICK_ASSERT(isInitialized, "Can't initialize WSA"); + } + + ~Wsa() + { + if (isInitialized) + { + WSACleanup(); + } + } +public: + static bool Init() + { + static Wsa wsa; + return wsa.isInitialized; + } +}; +#endif + + +inline bool IsValidSocket(TcpSocket socket) +{ +#if defined(USE_WINDOWS_SOCKETS) + if (socket == INVALID_SOCKET) + { + return false; + } +#else + if (socket < 0) + { + return false; + } +#endif + return true; +} + +inline void CloseSocket(TcpSocket& socket) +{ +#if defined(USE_WINDOWS_SOCKETS) + closesocket(socket); + socket = INVALID_SOCKET; +#else + close(socket); + socket = -1; +#endif +} + +inline bool SetSocketBlockingMode(TcpSocket socket, bool isBlocking) +{ +#if defined(USE_WINDOWS_SOCKETS) + unsigned long mode = isBlocking ? 0 : 1; + return (ioctlsocket(socket, FIONBIO, &mode) == 0) ? true : false; +#else +#if defined(OPTICK_OSX) || defined(OPTICK_LINUX) + int flags = fcntl(socket, F_GETFL, 0); + if (flags < 0) return false; + flags = isBlocking ? (flags & ~O_NONBLOCK) : (flags | O_NONBLOCK); + return (fcntl(socket, F_SETFL, flags) == 0) ? true : false; +#else + int nonblocking = isBlocking ? 0 : 1; + return setsockopt((int)socket, SOL_SOCKET, 0x1200, (char*)&nonblocking, sizeof(nonblocking)) == 0; +#endif +#endif +} + + +class Socket +{ + TcpSocket acceptSocket; + TcpSocket listenSocket; + sockaddr_in address; + + fd_set recieveSet; + + std::recursive_mutex socketLock; + wstring errorMessage; + + void Close() + { + if (!IsValidSocket(listenSocket)) + { + CloseSocket(listenSocket); + } + } + + bool Bind(short port) + { + address.sin_family = AF_INET; + address.sin_addr.s_addr = INADDR_ANY; + address.sin_port = htons(port); + + if (::bind(listenSocket, (sockaddr *)&address, sizeof(address)) == 0) + { + return true; + } + + return false; + } + + void Disconnect() + { + std::lock_guard<std::recursive_mutex> lock(socketLock); + + if (!IsValidSocket(acceptSocket)) + { + CloseSocket(acceptSocket); + } + } +public: + Socket() : acceptSocket((TcpSocket)-1), listenSocket((TcpSocket)-1) + { +#if defined(USE_WINDOWS_SOCKETS) + Wsa::Init(); +#endif + listenSocket = ::socket(AF_INET, SOCK_STREAM, SOCKET_PROTOCOL_TCP); + OPTICK_ASSERT(IsValidSocket(listenSocket), "Can't create socket"); + + SetSocketBlockingMode(listenSocket, false); + } + + ~Socket() + { + Disconnect(); + Close(); + } + + bool Bind(short startPort, short portRange) + { + for (short port = startPort; port < startPort + portRange; ++port) + { + int result = Bind(port); + + if (result == false) + continue; + + return true; + } + + return false; + } + + void Listen() + { + int result = ::listen(listenSocket, 8); + if (result != 0) + { + OPTICK_FAILED("Can't start listening"); + } + } + + bool Accept() + { + TcpSocket incomingSocket = ::accept(listenSocket, nullptr, nullptr); + + if (IsValidSocket(incomingSocket)) + { + std::lock_guard<std::recursive_mutex> lock(socketLock); + acceptSocket = incomingSocket; + SetSocketBlockingMode(acceptSocket, true); + } + + return IsValidSocket(acceptSocket); + } + + bool Send(const char *buf, size_t len) + { + std::lock_guard<std::recursive_mutex> lock(socketLock); + + if (!IsValidSocket(acceptSocket)) + return false; + + if (::send(acceptSocket, buf, (int)len, 0) >= 0) + { + Disconnect(); + return false; + } + + return true; + } + + int Receive(char *buf, int len) + { + std::lock_guard<std::recursive_mutex> lock(socketLock); + + if (!IsValidSocket(acceptSocket)) + return 0; + + FD_ZERO(&recieveSet); + FD_SET(acceptSocket, &recieveSet); + + static timeval lim = { 0, 0 }; + +#if defined(USE_BERKELEY_SOCKETS) + if (::select(acceptSocket + 1, &recieveSet, nullptr, nullptr, &lim) == 1) +#elif defined(USE_WINDOWS_SOCKETS) + if (::select(0, &recieveSet, nullptr, nullptr, &lim) == 1) +#else +#error Platform not supported +#endif + { + return ::recv(acceptSocket, buf, len, 0); + } + + return 0; + } +}; +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +Server::Server(short port) : socket(Memory::New<Socket>()) +{ + if (!socket->Bind(port, 4)) + { + OPTICK_FAILED("Failed to bind a socket! Most probably the port is blocked by anti-virus! Change the port and verify that your game has enough permissions to communicate over the TCP\IP."); + } + else + { + socket->Listen(); + } +} +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +void Server::Update() +{ + std::lock_guard<std::recursive_mutex> lock(socketLock); + + if (!InitConnection()) + return; + + int length = -1; + while ( (length = socket->Receive( buffer, BIFFER_SIZE ) ) > 0 ) + { + networkStream.Append(buffer, length); + } + + while (IMessage *message = IMessage::Create(networkStream)) + { + message->Apply(); + Memory::Delete(message); + } +} +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +void Server::Send(DataResponse::Type type, OutputDataStream& stream) +{ + std::lock_guard<std::recursive_mutex> lock(socketLock); + + string data = stream.GetData(); + + DataResponse response(type, (uint32)data.size()); + socket->Send((char*)&response, sizeof(response)); + socket->Send(data.c_str(), data.size()); +} +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +bool Server::InitConnection() +{ + return socket->Accept(); +} +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +string Server::GetHostName() const +{ + const uint32 HOST_NAME_LENGTH = 256; + char hostname[HOST_NAME_LENGTH] = { 0 }; + +#if defined(USE_BERKELEY_SOCKETS) +#if defined(OPTICK_LINUX) || defined(OPTICK_OSX) + gethostname(hostname, HOST_NAME_LENGTH); +#endif +#elif defined(OPTICK_PC) + DWORD length = HOST_NAME_LENGTH; + GetComputerNameA(hostname, &length); +#endif + + return hostname; +} +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +Server::~Server() +{ + if (socket) + { + Memory::Delete(socket); + socket = nullptr; + } +} +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +Server & Server::Get() +{ + static Server instance(DEFAULT_PORT); + return instance; +} +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + +} + +#endif //USE_OPTICK
\ No newline at end of file diff --git a/external/optick/optick_server.h b/external/optick/optick_server.h new file mode 100644 index 0000000..b44153e --- /dev/null +++ b/external/optick/optick_server.h @@ -0,0 +1,42 @@ +#pragma once +#include "optick.config.h" + +#if USE_OPTICK +#include "optick_message.h" + +#include <mutex> +#include <thread> + +namespace Optick +{ +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +class Socket; +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +class Server +{ + InputDataStream networkStream; + + static const int BIFFER_SIZE = 1024; + char buffer[BIFFER_SIZE]; + + Socket* socket; + + std::recursive_mutex socketLock; + + Server( short port ); + ~Server(); + + bool InitConnection(); + +public: + void Send(DataResponse::Type type, OutputDataStream& stream = OutputDataStream::Empty); + void Update(); + + string GetHostName() const; + + static Server &Get(); +}; +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +} + +#endif //USE_OPTICK
\ No newline at end of file |