From 0fcabd2b11a5b1d4d16f6f6bdf1efd54d2c45237 Mon Sep 17 00:00:00 2001 From: Yuri Kunde Schlesner Date: Mon, 17 Aug 2015 18:25:21 -0300 Subject: Integrate the MicroProfile profiling library This brings goodies such as a configurable user interface and multi-threaded timeline view. --- externals/microprofile/microprofile.h | 3571 +++++++++++++++++++++++++++++++++ 1 file changed, 3571 insertions(+) create mode 100644 externals/microprofile/microprofile.h (limited to 'externals/microprofile/microprofile.h') diff --git a/externals/microprofile/microprofile.h b/externals/microprofile/microprofile.h new file mode 100644 index 000000000..d1ae0c1c2 --- /dev/null +++ b/externals/microprofile/microprofile.h @@ -0,0 +1,3571 @@ +#pragma once +// This is free and unencumbered software released into the public domain. +// Anyone is free to copy, modify, publish, use, compile, sell, or +// distribute this software, either in source code form or as a compiled +// binary, for any purpose, commercial or non-commercial, and by any +// means. +// In jurisdictions that recognize copyright laws, the author or authors +// of this software dedicate any and all copyright interest in the +// software to the public domain. We make this dedication for the benefit +// of the public at large and to the detriment of our heirs and +// successors. We intend this dedication to be an overt act of +// relinquishment in perpetuity of all present and future rights to this +// software under copyright law. +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +// MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. +// IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR +// OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, +// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR +// OTHER DEALINGS IN THE SOFTWARE. +// For more information, please refer to +// +// *********************************************************************** +// +// +// +// +// Howto: +// Call these functions from your code: +// MicroProfileOnThreadCreate +// MicroProfileMouseButton +// MicroProfileMousePosition +// MicroProfileModKey +// MicroProfileFlip <-- Call this once per frame +// MicroProfileDraw <-- Call this once per frame +// MicroProfileToggleDisplayMode <-- Bind to a key to toggle profiling +// MicroProfileTogglePause <-- Bind to a key to toggle pause +// +// Use these macros in your code in blocks you want to time: +// +// MICROPROFILE_DECLARE +// MICROPROFILE_DEFINE +// MICROPROFILE_DECLARE_GPU +// MICROPROFILE_DEFINE_GPU +// MICROPROFILE_SCOPE +// MICROPROFILE_SCOPEI +// MICROPROFILE_SCOPEGPU +// MICROPROFILE_SCOPEGPUI +// MICROPROFILE_META +// +// +// Usage: +// +// { +// MICROPROFILE_SCOPEI("GroupName", "TimerName", nColorRgb): +// ..Code to be timed.. +// } +// +// MICROPROFILE_DECLARE / MICROPROFILE_DEFINE allows defining groups in a shared place, to ensure sorting of the timers +// +// (in global scope) +// MICROPROFILE_DEFINE(g_ProfileFisk, "Fisk", "Skalle", nSomeColorRgb); +// +// (in some other file) +// MICROPROFILE_DECLARE(g_ProfileFisk); +// +// void foo(){ +// MICROPROFILE_SCOPE(g_ProfileFisk); +// } +// +// Once code is instrumented the gui is activeted by calling MicroProfileToggleDisplayMode or by clicking in the upper left corner of +// the screen +// +// The following functions must be implemented before the profiler is usable +// debug render: +// void MicroProfileDrawText(int nX, int nY, uint32_t nColor, const char* pText, uint32_t nNumCharacters); +// void MicroProfileDrawBox(int nX, int nY, int nX1, int nY1, uint32_t nColor, MicroProfileBoxType = MicroProfileBoxTypeFlat); +// void MicroProfileDrawLine2D(uint32_t nVertices, float* pVertices, uint32_t nColor); +// Gpu time stamps: (See below for d3d/opengl helper) +// uint32_t MicroProfileGpuInsertTimeStamp(); +// uint64_t MicroProfileGpuGetTimeStamp(uint32_t nKey); +// uint64_t MicroProfileTicksPerSecondGpu(); +// threading: +// const char* MicroProfileGetThreadName(); Threadnames in detailed view +// +// Default implementations of Gpu timestamp functions: +// Opengl: +// in .c file where MICROPROFILE_IMPL is defined: +// #define MICROPROFILE_GPU_TIMERS_GL +// call MicroProfileGpuInitGL() on startup +// D3D11: +// in .c file where MICROPROFILE_IMPL is defined: +// #define MICROPROFILE_GPU_TIMERS_D3D11 +// call MICROPROFILE_GPU_TIMERS_D3D11(). Pass Device & ImmediateContext +// +// Limitations: +// GPU timestamps can only be inserted from one thread. + + + +#ifndef MICROPROFILE_ENABLED +#define MICROPROFILE_ENABLED 1 +#endif + +#include +typedef uint64_t MicroProfileToken; +typedef uint16_t MicroProfileGroupId; + +#if 0 == MICROPROFILE_ENABLED + +#define MICROPROFILE_DECLARE(var) +#define MICROPROFILE_DEFINE(var, group, name, color) +#define MICROPROFILE_REGISTER_GROUP(group, color, category) +#define MICROPROFILE_DECLARE_GPU(var) +#define MICROPROFILE_DEFINE_GPU(var, name, color) +#define MICROPROFILE_SCOPE(var) do{}while(0) +#define MICROPROFILE_SCOPEI(group, name, color) do{}while(0) +#define MICROPROFILE_SCOPEGPU(var) do{}while(0) +#define MICROPROFILE_SCOPEGPUI( name, color) do{}while(0) +#define MICROPROFILE_META_CPU(name, count) +#define MICROPROFILE_META_GPU(name, count) +#define MICROPROFILE_FORCEENABLECPUGROUP(s) do{} while(0) +#define MICROPROFILE_FORCEDISABLECPUGROUP(s) do{} while(0) +#define MICROPROFILE_FORCEENABLEGPUGROUP(s) do{} while(0) +#define MICROPROFILE_FORCEDISABLEGPUGROUP(s) do{} while(0) +#define MICROPROFILE_SCOPE_TOKEN(token) + +#define MicroProfileGetTime(group, name) 0.f +#define MicroProfileOnThreadCreate(foo) do{}while(0) +#define MicroProfileFlip() do{}while(0) +#define MicroProfileSetAggregateFrames(a) do{}while(0) +#define MicroProfileGetAggregateFrames() 0 +#define MicroProfileGetCurrentAggregateFrames() 0 +#define MicroProfileTogglePause() do{}while(0) +#define MicroProfileToggleAllGroups() do{} while(0) +#define MicroProfileDumpTimers() do{}while(0) +#define MicroProfileShutdown() do{}while(0) +#define MicroProfileSetForceEnable(a) do{} while(0) +#define MicroProfileGetForceEnable() false +#define MicroProfileSetEnableAllGroups(a) do{} while(0) +#define MicroProfileEnableCategory(a) do{} while(0) +#define MicroProfileDisableCategory(a) do{} while(0) +#define MicroProfileGetEnableAllGroups() false +#define MicroProfileSetForceMetaCounters(a) +#define MicroProfileGetForceMetaCounters() 0 +#define MicroProfileEnableMetaCounter(c) do{}while(0) +#define MicroProfileDisableMetaCounter(c) do{}while(0) +#define MicroProfileDumpFile(html,csv) do{} while(0) +#define MicroProfileWebServerPort() ((uint32_t)-1) + +#else + +#include +#include +#include +#include +#include + +#ifndef MICROPROFILE_API +#define MICROPROFILE_API +#endif + +MICROPROFILE_API int64_t MicroProfileTicksPerSecondCpu(); + + +#if defined(__APPLE__) +#include +#include +#include +#include +#include +#if TARGET_OS_IPHONE +#define MICROPROFILE_IOS +#endif + +#define MP_TICK() mach_absolute_time() +inline int64_t MicroProfileTicksPerSecondCpu() +{ + static int64_t nTicksPerSecond = 0; + if(nTicksPerSecond == 0) + { + mach_timebase_info_data_t sTimebaseInfo; + mach_timebase_info(&sTimebaseInfo); + nTicksPerSecond = 1000000000ll * sTimebaseInfo.denom / sTimebaseInfo.numer; + } + return nTicksPerSecond; +} +inline uint64_t MicroProfileGetCurrentThreadId() +{ + uint64_t tid; + pthread_threadid_np(pthread_self(), &tid); + return tid; +} + +#define MP_BREAK() __builtin_trap() +#define MP_THREAD_LOCAL __thread +#define MP_STRCASECMP strcasecmp +#define MP_GETCURRENTTHREADID() MicroProfileGetCurrentThreadId() +typedef uint64_t ThreadIdType; +#elif defined(_WIN32) +int64_t MicroProfileGetTick(); +#define MP_TICK() MicroProfileGetTick() +#define MP_BREAK() __debugbreak() +#define MP_THREAD_LOCAL __declspec(thread) +#define MP_STRCASECMP _stricmp +#define MP_GETCURRENTTHREADID() GetCurrentThreadId() +typedef uint32_t ThreadIdType; + +#elif defined(__linux__) +#include +#include +inline int64_t MicroProfileTicksPerSecondCpu() +{ + return 1000000000ll; +} + +inline int64_t MicroProfileGetTick() +{ + timespec ts; + clock_gettime(CLOCK_REALTIME, &ts); + return 1000000000ll * ts.tv_sec + ts.tv_nsec; +} +#define MP_TICK() MicroProfileGetTick() +#define MP_BREAK() __builtin_trap() +#define MP_THREAD_LOCAL __thread +#define MP_STRCASECMP strcasecmp +#define MP_GETCURRENTTHREADID() (uint64_t)pthread_self() +typedef uint64_t ThreadIdType; +#endif + + +#ifndef MP_GETCURRENTTHREADID +#define MP_GETCURRENTTHREADID() 0 +typedef uint32_t ThreadIdType; +#endif + + +#define MP_ASSERT(a) do{if(!(a)){MP_BREAK();} }while(0) +#define MICROPROFILE_DECLARE(var) extern MicroProfileToken g_mp_##var +#define MICROPROFILE_DEFINE(var, group, name, color) MicroProfileToken g_mp_##var = MicroProfileGetToken(group, name, color, MicroProfileTokenTypeCpu) +#define MICROPROFILE_REGISTER_GROUP(group, category, color) MicroProfileRegisterGroup(group, category, color) +#define MICROPROFILE_DECLARE_GPU(var) extern MicroProfileToken g_mp_##var +#define MICROPROFILE_DEFINE_GPU(var, name, color) MicroProfileToken g_mp_##var = MicroProfileGetToken("GPU", name, color, MicroProfileTokenTypeGpu) +#define MICROPROFILE_TOKEN_PASTE0(a, b) a ## b +#define MICROPROFILE_TOKEN_PASTE(a, b) MICROPROFILE_TOKEN_PASTE0(a,b) +#define MICROPROFILE_SCOPE(var) MicroProfileScopeHandler MICROPROFILE_TOKEN_PASTE(foo, __LINE__)(g_mp_##var) +#define MICROPROFILE_SCOPE_TOKEN(token) MicroProfileScopeHandler MICROPROFILE_TOKEN_PASTE(foo, __LINE__)(token) +#define MICROPROFILE_SCOPEI(group, name, color) static MicroProfileToken MICROPROFILE_TOKEN_PASTE(g_mp,__LINE__) = MicroProfileGetToken(group, name, color, MicroProfileTokenTypeCpu); MicroProfileScopeHandler MICROPROFILE_TOKEN_PASTE(foo,__LINE__)( MICROPROFILE_TOKEN_PASTE(g_mp,__LINE__)) +#define MICROPROFILE_SCOPEGPU(var) MicroProfileScopeGpuHandler MICROPROFILE_TOKEN_PASTE(foo, __LINE__)(g_mp_##var) +#define MICROPROFILE_SCOPEGPUI(name, color) static MicroProfileToken MICROPROFILE_TOKEN_PASTE(g_mp,__LINE__) = MicroProfileGetToken("GPU", name, color, MicroProfileTokenTypeGpu); MicroProfileScopeGpuHandler MICROPROFILE_TOKEN_PASTE(foo,__LINE__)( MICROPROFILE_TOKEN_PASTE(g_mp,__LINE__)) +#define MICROPROFILE_META_CPU(name, count) static MicroProfileToken MICROPROFILE_TOKEN_PASTE(g_mp_meta,__LINE__) = MicroProfileGetMetaToken(name); MicroProfileMetaUpdate(MICROPROFILE_TOKEN_PASTE(g_mp_meta,__LINE__), count, MicroProfileTokenTypeCpu) +#define MICROPROFILE_META_GPU(name, count) static MicroProfileToken MICROPROFILE_TOKEN_PASTE(g_mp_meta,__LINE__) = MicroProfileGetMetaToken(name); MicroProfileMetaUpdate(MICROPROFILE_TOKEN_PASTE(g_mp_meta,__LINE__), count, MicroProfileTokenTypeGpu) + + +#ifndef MICROPROFILE_USE_THREAD_NAME_CALLBACK +#define MICROPROFILE_USE_THREAD_NAME_CALLBACK 0 +#endif + +#ifndef MICROPROFILE_PER_THREAD_BUFFER_SIZE +#define MICROPROFILE_PER_THREAD_BUFFER_SIZE (2048<<10) +#endif + +#ifndef MICROPROFILE_MAX_FRAME_HISTORY +#define MICROPROFILE_MAX_FRAME_HISTORY 512 +#endif + +#ifndef MICROPROFILE_PRINTF +#define MICROPROFILE_PRINTF printf +#endif + +#ifndef MICROPROFILE_META_MAX +#define MICROPROFILE_META_MAX 8 +#endif + +#ifndef MICROPROFILE_WEBSERVER_PORT +#define MICROPROFILE_WEBSERVER_PORT 1338 +#endif + +#ifndef MICROPROFILE_WEBSERVER +#define MICROPROFILE_WEBSERVER 1 +#endif + +#ifndef MICROPROFILE_WEBSERVER_MAXFRAMES +#define MICROPROFILE_WEBSERVER_MAXFRAMES 30 +#endif + +#ifndef MICROPROFILE_WEBSERVER_SOCKET_BUFFER_SIZE +#define MICROPROFILE_WEBSERVER_SOCKET_BUFFER_SIZE (16<<10) +#endif + +#ifndef MICROPROFILE_GPU_TIMERS +#define MICROPROFILE_GPU_TIMERS 1 +#endif + +#ifndef MICROPROFILE_GPU_FRAME_DELAY +#define MICROPROFILE_GPU_FRAME_DELAY 3 //must be > 0 +#endif + + +#ifndef MICROPROFILE_NAME_MAX_LEN +#define MICROPROFILE_NAME_MAX_LEN 64 +#endif + +#define MICROPROFILE_FORCEENABLECPUGROUP(s) MicroProfileForceEnableGroup(s, MicroProfileTokenTypeCpu) +#define MICROPROFILE_FORCEDISABLECPUGROUP(s) MicroProfileForceDisableGroup(s, MicroProfileTokenTypeCpu) +#define MICROPROFILE_FORCEENABLEGPUGROUP(s) MicroProfileForceEnableGroup(s, MicroProfileTokenTypeGpu) +#define MICROPROFILE_FORCEDISABLEGPUGROUP(s) MicroProfileForceDisableGroup(s, MicroProfileTokenTypeGpu) + +#define MICROPROFILE_INVALID_TICK ((uint64_t)-1) +#define MICROPROFILE_GROUP_MASK_ALL 0xffffffffffff + + +#define MICROPROFILE_INVALID_TOKEN (uint64_t)-1 + +enum MicroProfileTokenType +{ + MicroProfileTokenTypeCpu, + MicroProfileTokenTypeGpu, +}; + +enum MicroProfileBoxType +{ + MicroProfileBoxTypeBar, + MicroProfileBoxTypeFlat, +}; + + + +struct MicroProfile; + +MICROPROFILE_API void MicroProfileInit(); +MICROPROFILE_API void MicroProfileShutdown(); +MICROPROFILE_API MicroProfileToken MicroProfileFindToken(const char* sGroup, const char* sName); +MICROPROFILE_API MicroProfileToken MicroProfileGetToken(const char* sGroup, const char* sName, uint32_t nColor, MicroProfileTokenType Token = MicroProfileTokenTypeCpu); +MICROPROFILE_API MicroProfileToken MicroProfileGetMetaToken(const char* pName); +MICROPROFILE_API void MicroProfileMetaUpdate(MicroProfileToken, int nCount, MicroProfileTokenType eTokenType); +MICROPROFILE_API uint64_t MicroProfileEnter(MicroProfileToken nToken); +MICROPROFILE_API void MicroProfileLeave(MicroProfileToken nToken, uint64_t nTick); +MICROPROFILE_API uint64_t MicroProfileGpuEnter(MicroProfileToken nToken); +MICROPROFILE_API void MicroProfileGpuLeave(MicroProfileToken nToken, uint64_t nTick); +inline uint16_t MicroProfileGetTimerIndex(MicroProfileToken t){ return (t&0xffff); } +inline uint64_t MicroProfileGetGroupMask(MicroProfileToken t){ return ((t>>16)&MICROPROFILE_GROUP_MASK_ALL);} +inline MicroProfileToken MicroProfileMakeToken(uint64_t nGroupMask, uint16_t nTimer){ return (nGroupMask<<16) | nTimer;} + +MICROPROFILE_API void MicroProfileFlip(); //! call once per frame. +MICROPROFILE_API void MicroProfileTogglePause(); +MICROPROFILE_API void MicroProfileForceEnableGroup(const char* pGroup, MicroProfileTokenType Type); +MICROPROFILE_API void MicroProfileForceDisableGroup(const char* pGroup, MicroProfileTokenType Type); +MICROPROFILE_API float MicroProfileGetTime(const char* pGroup, const char* pName); +MICROPROFILE_API void MicroProfileContextSwitchSearch(uint32_t* pContextSwitchStart, uint32_t* pContextSwitchEnd, uint64_t nBaseTicksCpu, uint64_t nBaseTicksEndCpu); +MICROPROFILE_API void MicroProfileOnThreadCreate(const char* pThreadName); //should be called from newly created threads +MICROPROFILE_API void MicroProfileOnThreadExit(); //call on exit to reuse log +MICROPROFILE_API void MicroProfileInitThreadLog(); +MICROPROFILE_API void MicroProfileSetForceEnable(bool bForceEnable); +MICROPROFILE_API bool MicroProfileGetForceEnable(); +MICROPROFILE_API void MicroProfileSetEnableAllGroups(bool bEnable); +MICROPROFILE_API void MicroProfileEnableCategory(const char* pCategory); +MICROPROFILE_API void MicroProfileDisableCategory(const char* pCategory); +MICROPROFILE_API bool MicroProfileGetEnableAllGroups(); +MICROPROFILE_API void MicroProfileSetForceMetaCounters(bool bEnable); +MICROPROFILE_API bool MicroProfileGetForceMetaCounters(); +MICROPROFILE_API void MicroProfileEnableMetaCounter(const char* pMet); +MICROPROFILE_API void MicroProfileDisableMetaCounter(const char* pMet); +MICROPROFILE_API void MicroProfileSetAggregateFrames(int frames); +MICROPROFILE_API int MicroProfileGetAggregateFrames(); +MICROPROFILE_API int MicroProfileGetCurrentAggregateFrames(); +MICROPROFILE_API MicroProfile* MicroProfileGet(); +MICROPROFILE_API void MicroProfileGetRange(uint32_t nPut, uint32_t nGet, uint32_t nRange[2][2]); +MICROPROFILE_API std::recursive_mutex& MicroProfileGetMutex(); +MICROPROFILE_API void MicroProfileStartContextSwitchTrace(); +MICROPROFILE_API void MicroProfileStopContextSwitchTrace(); +MICROPROFILE_API bool MicroProfileIsLocalThread(uint32_t nThreadId); + + +#if MICROPROFILE_WEBSERVER +MICROPROFILE_API void MicroProfileDumpFile(const char* pHtml, const char* pCsv); +MICROPROFILE_API uint32_t MicroProfileWebServerPort(); +#else +#define MicroProfileDumpFile(c) do{} while(0) +#define MicroProfileWebServerPort() ((uint32_t)-1) +#endif + + + + +#if MICROPROFILE_GPU_TIMERS +MICROPROFILE_API uint32_t MicroProfileGpuInsertTimeStamp(); +MICROPROFILE_API uint64_t MicroProfileGpuGetTimeStamp(uint32_t nKey); +MICROPROFILE_API uint64_t MicroProfileTicksPerSecondGpu(); +MICROPROFILE_API int MicroProfileGetGpuTickReference(int64_t* pOutCPU, int64_t* pOutGpu); +#else +#define MicroProfileGpuInsertTimeStamp() 1 +#define MicroProfileGpuGetTimeStamp(a) 0 +#define MicroProfileTicksPerSecondGpu() 1 +#define MicroProfileGetGpuTickReference(a,b) 0 +#endif + +#if MICROPROFILE_GPU_TIMERS_D3D11 +#define MICROPROFILE_D3D_MAX_QUERIES (8<<10) +MICROPROFILE_API void MicroProfileGpuInitD3D11(void* pDevice, void* pDeviceContext); +#endif + +#if MICROPROFILE_GPU_TIMERS_GL +#define MICROPROFILE_GL_MAX_QUERIES (8<<10) +MICROPROFILE_API void MicroProfileGpuInitGL(); +#endif + + + +#if MICROPROFILE_USE_THREAD_NAME_CALLBACK +MICROPROFILE_API const char* MicroProfileGetThreadName(); +#else +#define MicroProfileGetThreadName() "" +#endif + +#if !defined(MICROPROFILE_THREAD_NAME_FROM_ID) +#define MICROPROFILE_THREAD_NAME_FROM_ID(a) "" +#endif + + +struct MicroProfileScopeHandler +{ + MicroProfileToken nToken; + uint64_t nTick; + MicroProfileScopeHandler(MicroProfileToken Token):nToken(Token) + { + nTick = MicroProfileEnter(nToken); + } + ~MicroProfileScopeHandler() + { + MicroProfileLeave(nToken, nTick); + } +}; + +struct MicroProfileScopeGpuHandler +{ + MicroProfileToken nToken; + uint64_t nTick; + MicroProfileScopeGpuHandler(MicroProfileToken Token):nToken(Token) + { + nTick = MicroProfileGpuEnter(nToken); + } + ~MicroProfileScopeGpuHandler() + { + MicroProfileGpuLeave(nToken, nTick); + } +}; + + + +#define MICROPROFILE_MAX_TIMERS 1024 +#define MICROPROFILE_MAX_GROUPS 48 //dont bump! no. of bits used it bitmask +#define MICROPROFILE_MAX_CATEGORIES 16 +#define MICROPROFILE_MAX_GRAPHS 5 +#define MICROPROFILE_GRAPH_HISTORY 128 +#define MICROPROFILE_BUFFER_SIZE ((MICROPROFILE_PER_THREAD_BUFFER_SIZE)/sizeof(MicroProfileLogEntry)) +#define MICROPROFILE_MAX_CONTEXT_SWITCH_THREADS 256 +#define MICROPROFILE_STACK_MAX 32 +//#define MICROPROFILE_MAX_PRESETS 5 +#define MICROPROFILE_ANIM_DELAY_PRC 0.5f +#define MICROPROFILE_GAP_TIME 50 //extra ms to fetch to close timers from earlier frames + + +#ifndef MICROPROFILE_MAX_THREADS +#define MICROPROFILE_MAX_THREADS 32 +#endif + +#ifndef MICROPROFILE_UNPACK_RED +#define MICROPROFILE_UNPACK_RED(c) ((c)>>16) +#endif + +#ifndef MICROPROFILE_UNPACK_GREEN +#define MICROPROFILE_UNPACK_GREEN(c) ((c)>>8) +#endif + +#ifndef MICROPROFILE_UNPACK_BLUE +#define MICROPROFILE_UNPACK_BLUE(c) ((c)) +#endif + +#ifndef MICROPROFILE_DEFAULT_PRESET +#define MICROPROFILE_DEFAULT_PRESET "Default" +#endif + + +#ifndef MICROPROFILE_CONTEXT_SWITCH_TRACE +#if defined(_WIN32) +#define MICROPROFILE_CONTEXT_SWITCH_TRACE 1 +#elif defined(__APPLE__) +#define MICROPROFILE_CONTEXT_SWITCH_TRACE 0 //disabled until dtrace script is working. +#else +#define MICROPROFILE_CONTEXT_SWITCH_TRACE 0 +#endif +#endif + +#if MICROPROFILE_CONTEXT_SWITCH_TRACE +#define MICROPROFILE_CONTEXT_SWITCH_BUFFER_SIZE (128*1024) //2mb with 16 byte entry size +#else +#define MICROPROFILE_CONTEXT_SWITCH_BUFFER_SIZE (1) +#endif + +#ifndef MICROPROFILE_MINIZ +#define MICROPROFILE_MINIZ 0 +#endif + +#ifdef _WIN32 +#include +typedef UINT_PTR MpSocket; +#else +typedef int MpSocket; +#endif + + +#if defined(__APPLE__) || defined(__linux__) +typedef pthread_t MicroProfileThread; +#elif defined(_WIN32) +typedef HANDLE MicroProfileThread; +#else +typedef std::thread* MicroProfileThread; +#endif + + + +enum MicroProfileDrawMask +{ + MP_DRAW_OFF = 0x0, + MP_DRAW_BARS = 0x1, + MP_DRAW_DETAILED = 0x2, + MP_DRAW_HIDDEN = 0x3, +}; + +enum MicroProfileDrawBarsMask +{ + MP_DRAW_TIMERS = 0x1, + MP_DRAW_AVERAGE = 0x2, + MP_DRAW_MAX = 0x4, + MP_DRAW_CALL_COUNT = 0x8, + MP_DRAW_TIMERS_EXCLUSIVE = 0x10, + MP_DRAW_AVERAGE_EXCLUSIVE = 0x20, + MP_DRAW_MAX_EXCLUSIVE = 0x40, + MP_DRAW_META_FIRST = 0x80, + MP_DRAW_ALL = 0xffffffff, + +}; + +typedef uint64_t MicroProfileLogEntry; + +struct MicroProfileTimer +{ + uint64_t nTicks; + uint32_t nCount; +}; + +struct MicroProfileCategory +{ + char pName[MICROPROFILE_NAME_MAX_LEN]; + uint64_t nGroupMask; +}; + +struct MicroProfileGroupInfo +{ + char pName[MICROPROFILE_NAME_MAX_LEN]; + uint32_t nNameLen; + uint32_t nGroupIndex; + uint32_t nNumTimers; + uint32_t nMaxTimerNameLen; + uint32_t nColor; + uint32_t nCategory; + MicroProfileTokenType Type; +}; + +struct MicroProfileTimerInfo +{ + MicroProfileToken nToken; + uint32_t nTimerIndex; + uint32_t nGroupIndex; + char pName[MICROPROFILE_NAME_MAX_LEN]; + uint32_t nNameLen; + uint32_t nColor; + bool bGraph; +}; + +struct MicroProfileGraphState +{ + int64_t nHistory[MICROPROFILE_GRAPH_HISTORY]; + MicroProfileToken nToken; + int32_t nKey; +}; + +struct MicroProfileContextSwitch +{ + ThreadIdType nThreadOut; + ThreadIdType nThreadIn; + int64_t nCpu : 8; + int64_t nTicks : 56; +}; + + +struct MicroProfileFrameState +{ + int64_t nFrameStartCpu; + int64_t nFrameStartGpu; + uint32_t nLogStart[MICROPROFILE_MAX_THREADS]; +}; + +struct MicroProfileThreadLog +{ + MicroProfileLogEntry Log[MICROPROFILE_BUFFER_SIZE]; + + std::atomic nPut; + std::atomic nGet; + uint32_t nActive; + uint32_t nGpu; + ThreadIdType nThreadId; + + uint32_t nStack[MICROPROFILE_STACK_MAX]; + int64_t nChildTickStack[MICROPROFILE_STACK_MAX]; + uint32_t nStackPos; + + + uint8_t nGroupStackPos[MICROPROFILE_MAX_GROUPS]; + int64_t nGroupTicks[MICROPROFILE_MAX_GROUPS]; + int64_t nAggregateGroupTicks[MICROPROFILE_MAX_GROUPS]; + enum + { + THREAD_MAX_LEN = 64, + }; + char ThreadName[64]; + int nFreeListNext; +}; + +#if MICROPROFILE_GPU_TIMERS_D3D11 +struct MicroProfileD3D11Frame +{ + uint32_t m_nQueryStart; + uint32_t m_nQueryCount; + uint32_t m_nRateQueryStarted; + void* m_pRateQuery; +}; + +struct MicroProfileGpuTimerState +{ + uint32_t bInitialized; + void* m_pDevice; + void* m_pDeviceContext; + void* m_pQueries[MICROPROFILE_D3D_MAX_QUERIES]; + int64_t m_nQueryResults[MICROPROFILE_D3D_MAX_QUERIES]; + uint32_t m_nQueryPut; + uint32_t m_nQueryGet; + uint32_t m_nQueryFrame; + int64_t m_nQueryFrequency; + MicroProfileD3D11Frame m_QueryFrames[MICROPROFILE_GPU_FRAME_DELAY]; +}; +#elif MICROPROFILE_GPU_TIMERS_GL +struct MicroProfileGpuTimerState +{ + uint32_t GLTimers[MICROPROFILE_GL_MAX_QUERIES]; + uint32_t GLTimerPos; +}; +#else +struct MicroProfileGpuTimerState{}; +#endif + +struct MicroProfile +{ + uint32_t nTotalTimers; + uint32_t nGroupCount; + uint32_t nCategoryCount; + uint32_t nAggregateClear; + uint32_t nAggregateFlip; + uint32_t nAggregateFlipCount; + uint32_t nAggregateFrames; + + uint64_t nAggregateFlipTick; + + uint32_t nDisplay; + uint32_t nBars; + uint64_t nActiveGroup; + uint32_t nActiveBars; + + uint64_t nForceGroup; + uint32_t nForceEnable; + uint32_t nForceMetaCounters; + + uint64_t nForceGroupUI; + uint64_t nActiveGroupWanted; + uint32_t nAllGroupsWanted; + uint32_t nAllThreadsWanted; + + uint32_t nOverflow; + + uint64_t nGroupMask; + uint32_t nRunning; + uint32_t nToggleRunning; + uint32_t nMaxGroupSize; + uint32_t nDumpFileNextFrame; + uint32_t nAutoClearFrames; + char HtmlDumpPath[512]; + char CsvDumpPath[512]; + + int64_t nPauseTicks; + + float fReferenceTime; + float fRcpReferenceTime; + + MicroProfileCategory CategoryInfo[MICROPROFILE_MAX_CATEGORIES]; + MicroProfileGroupInfo GroupInfo[MICROPROFILE_MAX_GROUPS]; + MicroProfileTimerInfo TimerInfo[MICROPROFILE_MAX_TIMERS]; + uint8_t TimerToGroup[MICROPROFILE_MAX_TIMERS]; + + MicroProfileTimer AccumTimers[MICROPROFILE_MAX_TIMERS]; + uint64_t AccumMaxTimers[MICROPROFILE_MAX_TIMERS]; + uint64_t AccumTimersExclusive[MICROPROFILE_MAX_TIMERS]; + uint64_t AccumMaxTimersExclusive[MICROPROFILE_MAX_TIMERS]; + + MicroProfileTimer Frame[MICROPROFILE_MAX_TIMERS]; + uint64_t FrameExclusive[MICROPROFILE_MAX_TIMERS]; + + MicroProfileTimer Aggregate[MICROPROFILE_MAX_TIMERS]; + uint64_t AggregateMax[MICROPROFILE_MAX_TIMERS]; + uint64_t AggregateExclusive[MICROPROFILE_MAX_TIMERS]; + uint64_t AggregateMaxExclusive[MICROPROFILE_MAX_TIMERS]; + + + uint64_t FrameGroup[MICROPROFILE_MAX_GROUPS]; + uint64_t AccumGroup[MICROPROFILE_MAX_GROUPS]; + uint64_t AccumGroupMax[MICROPROFILE_MAX_GROUPS]; + + uint64_t AggregateGroup[MICROPROFILE_MAX_GROUPS]; + uint64_t AggregateGroupMax[MICROPROFILE_MAX_GROUPS]; + + + struct + { + uint64_t nCounters[MICROPROFILE_MAX_TIMERS]; + + uint64_t nAccum[MICROPROFILE_MAX_TIMERS]; + uint64_t nAccumMax[MICROPROFILE_MAX_TIMERS]; + + uint64_t nAggregate[MICROPROFILE_MAX_TIMERS]; + uint64_t nAggregateMax[MICROPROFILE_MAX_TIMERS]; + + uint64_t nSum; + uint64_t nSumAccum; + uint64_t nSumAccumMax; + uint64_t nSumAggregate; + uint64_t nSumAggregateMax; + + const char* pName; + } MetaCounters[MICROPROFILE_META_MAX]; + + MicroProfileGraphState Graph[MICROPROFILE_MAX_GRAPHS]; + uint32_t nGraphPut; + + uint32_t nThreadActive[MICROPROFILE_MAX_THREADS]; + MicroProfileThreadLog* Pool[MICROPROFILE_MAX_THREADS]; + uint32_t nNumLogs; + uint32_t nMemUsage; + int nFreeListHead; + + uint32_t nFrameCurrent; + uint32_t nFrameCurrentIndex; + uint32_t nFramePut; + uint64_t nFramePutIndex; + + MicroProfileFrameState Frames[MICROPROFILE_MAX_FRAME_HISTORY]; + + uint64_t nFlipTicks; + uint64_t nFlipAggregate; + uint64_t nFlipMax; + uint64_t nFlipAggregateDisplay; + uint64_t nFlipMaxDisplay; + + MicroProfileThread ContextSwitchThread; + bool bContextSwitchRunning; + bool bContextSwitchStop; + bool bContextSwitchAllThreads; + bool bContextSwitchNoBars; + uint32_t nContextSwitchUsage; + uint32_t nContextSwitchLastPut; + + int64_t nContextSwitchHoverTickIn; + int64_t nContextSwitchHoverTickOut; + uint32_t nContextSwitchHoverThread; + uint32_t nContextSwitchHoverThreadBefore; + uint32_t nContextSwitchHoverThreadAfter; + uint8_t nContextSwitchHoverCpu; + uint8_t nContextSwitchHoverCpuNext; + + uint32_t nContextSwitchPut; + MicroProfileContextSwitch ContextSwitch[MICROPROFILE_CONTEXT_SWITCH_BUFFER_SIZE]; + + + MpSocket ListenerSocket; + uint32_t nWebServerPort; + + char WebServerBuffer[MICROPROFILE_WEBSERVER_SOCKET_BUFFER_SIZE]; + uint32_t WebServerPut; + + uint64_t nWebServerDataSent; + + MicroProfileGpuTimerState GPU; + + +}; + +#define MP_LOG_TICK_MASK 0x0000ffffffffffff +#define MP_LOG_INDEX_MASK 0x3fff000000000000 +#define MP_LOG_BEGIN_MASK 0xc000000000000000 +#define MP_LOG_GPU_EXTRA 0x3 +#define MP_LOG_META 0x2 +#define MP_LOG_ENTER 0x1 +#define MP_LOG_LEAVE 0x0 + + +inline int MicroProfileLogType(MicroProfileLogEntry Index) +{ + return ((MP_LOG_BEGIN_MASK & Index)>>62) & 0x3; +} + +inline uint64_t MicroProfileLogTimerIndex(MicroProfileLogEntry Index) +{ + return (0x3fff&(Index>>48)); +} + +inline MicroProfileLogEntry MicroProfileMakeLogIndex(uint64_t nBegin, MicroProfileToken nToken, int64_t nTick) +{ + MicroProfileLogEntry Entry = (nBegin<<62) | ((0x3fff&nToken)<<48) | (MP_LOG_TICK_MASK&nTick); + int t = MicroProfileLogType(Entry); + uint64_t nTimerIndex = MicroProfileLogTimerIndex(Entry); + MP_ASSERT(t == nBegin); + MP_ASSERT(nTimerIndex == (nToken&0x3fff)); + return Entry; + +} + +inline int64_t MicroProfileLogTickDifference(MicroProfileLogEntry Start, MicroProfileLogEntry End) +{ + uint64_t nStart = Start; + uint64_t nEnd = End; + int64_t nDifference = ((nEnd<<16) - (nStart<<16)); + return nDifference >> 16; +} + +inline int64_t MicroProfileLogGetTick(MicroProfileLogEntry e) +{ + return MP_LOG_TICK_MASK & e; +} + +inline int64_t MicroProfileLogSetTick(MicroProfileLogEntry e, int64_t nTick) +{ + return (MP_LOG_TICK_MASK & nTick) | (e & ~MP_LOG_TICK_MASK); +} + +template +T MicroProfileMin(T a, T b) +{ return a < b ? a : b; } + +template +T MicroProfileMax(T a, T b) +{ return a > b ? a : b; } + +inline int64_t MicroProfileMsToTick(float fMs, int64_t nTicksPerSecond) +{ + return (int64_t)(fMs*0.001f*nTicksPerSecond); +} + +inline float MicroProfileTickToMsMultiplier(int64_t nTicksPerSecond) +{ + return 1000.f / nTicksPerSecond; +} + +inline uint16_t MicroProfileGetGroupIndex(MicroProfileToken t) +{ + return (uint16_t)MicroProfileGet()->TimerToGroup[MicroProfileGetTimerIndex(t)]; +} + + + +#ifdef MICROPROFILE_IMPL + +#ifdef _WIN32 +#include +#define snprintf _snprintf + +#pragma warning(push) +#pragma warning(disable: 4244) +int64_t MicroProfileTicksPerSecondCpu() +{ + static int64_t nTicksPerSecond = 0; + if(nTicksPerSecond == 0) + { + QueryPerformanceFrequency((LARGE_INTEGER*)&nTicksPerSecond); + } + return nTicksPerSecond; +} +int64_t MicroProfileGetTick() +{ + int64_t ticks; + QueryPerformanceCounter((LARGE_INTEGER*)&ticks); + return ticks; +} + +#endif + +#if defined(MICROPROFILE_WEBSERVER) || defined(MICROPROFILE_CONTEXT_SWITCH_TRACE) + + +typedef void* (*MicroProfileThreadFunc)(void*); + +#if defined(__APPLE__) || defined(__linux__) +typedef pthread_t MicroProfileThread; +void MicroProfileThreadStart(MicroProfileThread* pThread, MicroProfileThreadFunc Func) +{ + pthread_attr_t Attr; + int r = pthread_attr_init(&Attr); + MP_ASSERT(r == 0); + pthread_create(pThread, &Attr, Func, 0); +} +void MicroProfileThreadJoin(MicroProfileThread* pThread) +{ + int r = pthread_join(*pThread, 0); + MP_ASSERT(r == 0); +} +#elif defined(_WIN32) +typedef HANDLE MicroProfileThread; +DWORD _stdcall ThreadTrampoline(void* pFunc) +{ + MicroProfileThreadFunc F = (MicroProfileThreadFunc)pFunc; + return (uint32_t)F(0); +} + +void MicroProfileThreadStart(MicroProfileThread* pThread, MicroProfileThreadFunc Func) +{ + *pThread = CreateThread(0, 0, ThreadTrampoline, Func, 0, 0); +} +void MicroProfileThreadJoin(MicroProfileThread* pThread) +{ + WaitForSingleObject(*pThread, INFINITE); + CloseHandle(*pThread); +} +#else +#include +typedef std::thread* MicroProfileThread; +inline void MicroProfileThreadStart(MicroProfileThread* pThread, MicroProfileThreadFunc Func) +{ + *pThread = new std::thread(Func, nullptr); +} +inline void MicroProfileThreadJoin(MicroProfileThread* pThread) +{ + (*pThread)->join(); + delete *pThread; +} +#endif +#endif + +#if MICROPROFILE_WEBSERVER + +#ifdef _WIN32 +#define MP_INVALID_SOCKET(f) (f == INVALID_SOCKET) +#endif + +#if defined(__APPLE__) +#include +#include +#include +#define MP_INVALID_SOCKET(f) (f < 0) +#endif + + +void MicroProfileWebServerStart(); +void MicroProfileWebServerStop(); +bool MicroProfileWebServerUpdate(); +void MicroProfileDumpToFile(); + +#else + +#define MicroProfileWebServerStart() do{}while(0) +#define MicroProfileWebServerStop() do{}while(0) +#define MicroProfileWebServerUpdate() false +#define MicroProfileDumpToFile() do{} while(0) +#endif + + +#if MICROPROFILE_GPU_TIMERS_D3D11 +void MicroProfileGpuFlip(); +void MicroProfileGpuShutdown(); +#else +#define MicroProfileGpuFlip() do{}while(0) +#define MicroProfileGpuShutdown() do{}while(0) +#endif + + + +#include +#include +#include +#include + + +#ifndef MICROPROFILE_DEBUG +#define MICROPROFILE_DEBUG 0 +#endif + + +#define S g_MicroProfile + +MicroProfile g_MicroProfile; +MicroProfileThreadLog* g_MicroProfileGpuLog = 0; +#ifdef MICROPROFILE_IOS +// iOS doesn't support __thread +static pthread_key_t g_MicroProfileThreadLogKey; +static pthread_once_t g_MicroProfileThreadLogKeyOnce = PTHREAD_ONCE_INIT; +static void MicroProfileCreateThreadLogKey() +{ + pthread_key_create(&g_MicroProfileThreadLogKey, NULL); +} +#else +MP_THREAD_LOCAL MicroProfileThreadLog* g_MicroProfileThreadLog = 0; +#endif +static bool g_bUseLock = false; /// This is used because windows does not support using mutexes under dll init(which is where global initialization is handled) + + +MICROPROFILE_DEFINE(g_MicroProfileFlip, "MicroProfile", "MicroProfileFlip", 0x3355ee); +MICROPROFILE_DEFINE(g_MicroProfileThreadLoop, "MicroProfile", "ThreadLoop", 0x3355ee); +MICROPROFILE_DEFINE(g_MicroProfileClear, "MicroProfile", "Clear", 0x3355ee); +MICROPROFILE_DEFINE(g_MicroProfileAccumulate, "MicroProfile", "Accumulate", 0x3355ee); +MICROPROFILE_DEFINE(g_MicroProfileContextSwitchSearch,"MicroProfile", "ContextSwitchSearch", 0xDD7300); + +inline std::recursive_mutex& MicroProfileMutex() +{ + static std::recursive_mutex Mutex; + return Mutex; +} +std::recursive_mutex& MicroProfileGetMutex() +{ + return MicroProfileMutex(); +} + +MICROPROFILE_API MicroProfile* MicroProfileGet() +{ + return &g_MicroProfile; +} + + +MicroProfileThreadLog* MicroProfileCreateThreadLog(const char* pName); + + +void MicroProfileInit() +{ + std::recursive_mutex& mutex = MicroProfileMutex(); + bool bUseLock = g_bUseLock; + if(bUseLock) + mutex.lock(); + static bool bOnce = true; + if(bOnce) + { + S.nMemUsage += sizeof(S); + bOnce = false; + memset(&S, 0, sizeof(S)); + for(int i = 0; i < MICROPROFILE_MAX_GROUPS; ++i) + { + S.GroupInfo[i].pName[0] = '\0'; + } + for(int i = 0; i < MICROPROFILE_MAX_CATEGORIES; ++i) + { + S.CategoryInfo[i].pName[0] = '\0'; + S.CategoryInfo[i].nGroupMask = 0; + } + strcpy(&S.CategoryInfo[0].pName[0], "default"); + S.nCategoryCount = 1; + for(int i = 0; i < MICROPROFILE_MAX_TIMERS; ++i) + { + S.TimerInfo[i].pName[0] = '\0'; + } + S.nGroupCount = 0; + S.nAggregateFlipTick = MP_TICK(); + S.nActiveGroup = 0; + S.nActiveBars = 0; + S.nForceGroup = 0; + S.nAllGroupsWanted = 0; + S.nActiveGroupWanted = 0; + S.nAllThreadsWanted = 1; + S.nAggregateFlip = 0; + S.nTotalTimers = 0; + for(uint32_t i = 0; i < MICROPROFILE_MAX_GRAPHS; ++i) + { + S.Graph[i].nToken = MICROPROFILE_INVALID_TOKEN; + } + S.nRunning = 1; + S.fReferenceTime = 33.33f; + S.fRcpReferenceTime = 1.f / S.fReferenceTime; + S.nFreeListHead = -1; + int64_t nTick = MP_TICK(); + for(int i = 0; i < MICROPROFILE_MAX_FRAME_HISTORY; ++i) + { + S.Frames[i].nFrameStartCpu = nTick; + S.Frames[i].nFrameStartGpu = -1; + } + + MicroProfileThreadLog* pGpu = MicroProfileCreateThreadLog("GPU"); + g_MicroProfileGpuLog = pGpu; + MP_ASSERT(S.Pool[0] == pGpu); + pGpu->nGpu = 1; + pGpu->nThreadId = 0; + + S.nWebServerDataSent = (uint64_t)-1; + } + if(bUseLock) + mutex.unlock(); +} + +void MicroProfileShutdown() +{ + std::lock_guard Lock(MicroProfileMutex()); + MicroProfileWebServerStop(); + MicroProfileStopContextSwitchTrace(); + MicroProfileGpuShutdown(); +} + +#ifdef MICROPROFILE_IOS +inline MicroProfileThreadLog* MicroProfileGetThreadLog() +{ + pthread_once(&g_MicroProfileThreadLogKeyOnce, MicroProfileCreateThreadLogKey); + return (MicroProfileThreadLog*)pthread_getspecific(g_MicroProfileThreadLogKey); +} + +inline void MicroProfileSetThreadLog(MicroProfileThreadLog* pLog) +{ + pthread_once(&g_MicroProfileThreadLogKeyOnce, MicroProfileCreateThreadLogKey); + pthread_setspecific(g_MicroProfileThreadLogKey, pLog); +} +#else +MicroProfileThreadLog* MicroProfileGetThreadLog() +{ + return g_MicroProfileThreadLog; +} +inline void MicroProfileSetThreadLog(MicroProfileThreadLog* pLog) +{ + g_MicroProfileThreadLog = pLog; +} +#endif + + +MicroProfileThreadLog* MicroProfileCreateThreadLog(const char* pName) +{ + MicroProfileThreadLog* pLog = 0; + if(S.nFreeListHead != -1) + { + pLog = S.Pool[S.nFreeListHead]; + MP_ASSERT(pLog->nPut.load() == 0); + MP_ASSERT(pLog->nGet.load() == 0); + S.nFreeListHead = S.Pool[S.nFreeListHead]->nFreeListNext; + } + else + { + pLog = new MicroProfileThreadLog; + S.nMemUsage += sizeof(MicroProfileThreadLog); + S.Pool[S.nNumLogs++] = pLog; + } + memset(pLog, 0, sizeof(*pLog)); + int len = (int)strlen(pName); + int maxlen = sizeof(pLog->ThreadName)-1; + len = len < maxlen ? len : maxlen; + memcpy(&pLog->ThreadName[0], pName, len); + pLog->ThreadName[len] = '\0'; + pLog->nThreadId = MP_GETCURRENTTHREADID(); + pLog->nFreeListNext = -1; + pLog->nActive = 1; + return pLog; +} + +void MicroProfileOnThreadCreate(const char* pThreadName) +{ + g_bUseLock = true; + MicroProfileInit(); + std::lock_guard Lock(MicroProfileMutex()); + MP_ASSERT(MicroProfileGetThreadLog() == 0); + MicroProfileThreadLog* pLog = MicroProfileCreateThreadLog(pThreadName ? pThreadName : MicroProfileGetThreadName()); + MP_ASSERT(pLog); + MicroProfileSetThreadLog(pLog); +} + +void MicroProfileOnThreadExit() +{ + std::lock_guard Lock(MicroProfileMutex()); + MicroProfileThreadLog* pLog = MicroProfileGetThreadLog(); + if(pLog) + { + int32_t nLogIndex = -1; + for(int i = 0; i < MICROPROFILE_MAX_THREADS; ++i) + { + if(pLog == S.Pool[i]) + { + nLogIndex = i; + break; + } + } + MP_ASSERT(nLogIndex < MICROPROFILE_MAX_THREADS && nLogIndex > 0); + pLog->nFreeListNext = S.nFreeListHead; + pLog->nActive = 0; + pLog->nPut.store(0); + pLog->nGet.store(0); + S.nFreeListHead = nLogIndex; + for(int i = 0; i < MICROPROFILE_MAX_FRAME_HISTORY; ++i) + { + S.Frames[i].nLogStart[nLogIndex] = 0; + } + memset(pLog->nGroupStackPos, 0, sizeof(pLog->nGroupStackPos)); + memset(pLog->nGroupTicks, 0, sizeof(pLog->nGroupTicks)); + } +} + +void MicroProfileInitThreadLog() +{ + MicroProfileOnThreadCreate(nullptr); +} + + +struct MicroProfileScopeLock +{ + bool bUseLock; + std::recursive_mutex& m; + MicroProfileScopeLock(std::recursive_mutex& m) : bUseLock(g_bUseLock), m(m) + { + if(bUseLock) + m.lock(); + } + ~MicroProfileScopeLock() + { + if(bUseLock) + m.unlock(); + } +}; + +MicroProfileToken MicroProfileFindToken(const char* pGroup, const char* pName) +{ + MicroProfileInit(); + MicroProfileScopeLock L(MicroProfileMutex()); + for(uint32_t i = 0; i < S.nTotalTimers; ++i) + { + if(!MP_STRCASECMP(pName, S.TimerInfo[i].pName) && !MP_STRCASECMP(pGroup, S.GroupInfo[S.TimerToGroup[i]].pName)) + { + return S.TimerInfo[i].nToken; + } + } + return MICROPROFILE_INVALID_TOKEN; +} + +uint16_t MicroProfileGetGroup(const char* pGroup, MicroProfileTokenType Type) +{ + for(uint32_t i = 0; i < S.nGroupCount; ++i) + { + if(!MP_STRCASECMP(pGroup, S.GroupInfo[i].pName)) + { + return i; + } + } + uint16_t nGroupIndex = 0xffff; + uint32_t nLen = (uint32_t)strlen(pGroup); + if(nLen > MICROPROFILE_NAME_MAX_LEN-1) + nLen = MICROPROFILE_NAME_MAX_LEN-1; + memcpy(&S.GroupInfo[S.nGroupCount].pName[0], pGroup, nLen); + S.GroupInfo[S.nGroupCount].pName[nLen] = '\0'; + S.GroupInfo[S.nGroupCount].nNameLen = nLen; + S.GroupInfo[S.nGroupCount].nNumTimers = 0; + S.GroupInfo[S.nGroupCount].nGroupIndex = S.nGroupCount; + S.GroupInfo[S.nGroupCount].Type = Type; + S.GroupInfo[S.nGroupCount].nMaxTimerNameLen = 0; + S.GroupInfo[S.nGroupCount].nColor = 0x88888888; + S.GroupInfo[S.nGroupCount].nCategory = 0; + S.CategoryInfo[0].nGroupMask |= (1ll << (uint64_t)S.nGroupCount); + nGroupIndex = S.nGroupCount++; + S.nGroupMask = (S.nGroupMask<<1)|1; + MP_ASSERT(nGroupIndex < MICROPROFILE_MAX_GROUPS); + return nGroupIndex; +} + +void MicroProfileRegisterGroup(const char* pGroup, const char* pCategory, uint32_t nColor) +{ + int nCategoryIndex = -1; + for(uint32_t i = 0; i < S.nCategoryCount; ++i) + { + if(!MP_STRCASECMP(pCategory, S.CategoryInfo[i].pName)) + { + nCategoryIndex = (int)i; + break; + } + } + if(-1 == nCategoryIndex && S.nCategoryCount < MICROPROFILE_MAX_CATEGORIES) + { + MP_ASSERT(S.CategoryInfo[S.nCategoryCount].pName[0] == '\0'); + nCategoryIndex = (int)S.nCategoryCount++; + uint32_t nLen = (uint32_t)strlen(pCategory); + if(nLen > MICROPROFILE_NAME_MAX_LEN-1) + nLen = MICROPROFILE_NAME_MAX_LEN-1; + memcpy(&S.CategoryInfo[nCategoryIndex].pName[0], pCategory, nLen); + S.CategoryInfo[nCategoryIndex].pName[nLen] = '\0'; + } + uint16_t nGroup = MicroProfileGetGroup(pGroup, 0 != MP_STRCASECMP(pGroup, "gpu")?MicroProfileTokenTypeCpu : MicroProfileTokenTypeGpu); + S.GroupInfo[nGroup].nColor = nColor; + if(nCategoryIndex >= 0) + { + uint64_t nBit = 1ll << nGroup; + uint32_t nOldCategory = S.GroupInfo[nGroup].nCategory; + S.CategoryInfo[nOldCategory].nGroupMask &= ~nBit; + S.CategoryInfo[nCategoryIndex].nGroupMask |= nBit; + S.GroupInfo[nGroup].nCategory = nCategoryIndex; + } +} + +MicroProfileToken MicroProfileGetToken(const char* pGroup, const char* pName, uint32_t nColor, MicroProfileTokenType Type) +{ + MicroProfileInit(); + MicroProfileScopeLock L(MicroProfileMutex()); + MicroProfileToken ret = MicroProfileFindToken(pGroup, pName); + if(ret != MICROPROFILE_INVALID_TOKEN) + return ret; + uint16_t nGroupIndex = MicroProfileGetGroup(pGroup, Type); + uint16_t nTimerIndex = (uint16_t)(S.nTotalTimers++); + uint64_t nGroupMask = 1ll << nGroupIndex; + MicroProfileToken nToken = MicroProfileMakeToken(nGroupMask, nTimerIndex); + S.GroupInfo[nGroupIndex].nNumTimers++; + S.GroupInfo[nGroupIndex].nMaxTimerNameLen = MicroProfileMax(S.GroupInfo[nGroupIndex].nMaxTimerNameLen, (uint32_t)strlen(pName)); + MP_ASSERT(S.GroupInfo[nGroupIndex].Type == Type); //dont mix cpu & gpu timers in the same group + S.nMaxGroupSize = MicroProfileMax(S.nMaxGroupSize, S.GroupInfo[nGroupIndex].nNumTimers); + S.TimerInfo[nTimerIndex].nToken = nToken; + uint32_t nLen = (uint32_t)strlen(pName); + if(nLen > MICROPROFILE_NAME_MAX_LEN-1) + nLen = MICROPROFILE_NAME_MAX_LEN-1; + memcpy(&S.TimerInfo[nTimerIndex].pName, pName, nLen); + S.TimerInfo[nTimerIndex].pName[nLen] = '\0'; + S.TimerInfo[nTimerIndex].nNameLen = nLen; + S.TimerInfo[nTimerIndex].nColor = nColor&0xffffff; + S.TimerInfo[nTimerIndex].nGroupIndex = nGroupIndex; + S.TimerInfo[nTimerIndex].nTimerIndex = nTimerIndex; + S.TimerToGroup[nTimerIndex] = nGroupIndex; + return nToken; +} + +MicroProfileToken MicroProfileGetMetaToken(const char* pName) +{ + MicroProfileInit(); + MicroProfileScopeLock L(MicroProfileMutex()); + for(uint32_t i = 0; i < MICROPROFILE_META_MAX; ++i) + { + if(!S.MetaCounters[i].pName) + { + S.MetaCounters[i].pName = pName; + return i; + } + else if(!MP_STRCASECMP(pName, S.MetaCounters[i].pName)) + { + return i; + } + } + MP_ASSERT(0);//out of slots, increase MICROPROFILE_META_MAX + return (MicroProfileToken)-1; +} + + +inline void MicroProfileLogPut(MicroProfileToken nToken_, uint64_t nTick, uint64_t nBegin, MicroProfileThreadLog* pLog) +{ + MP_ASSERT(pLog != 0); //this assert is hit if MicroProfileOnCreateThread is not called + MP_ASSERT(pLog->nActive); + uint32_t nPos = pLog->nPut.load(std::memory_order_relaxed); + uint32_t nNextPos = (nPos+1) % MICROPROFILE_BUFFER_SIZE; + if(nNextPos == pLog->nGet.load(std::memory_order_relaxed)) + { + S.nOverflow = 100; + } + else + { + pLog->Log[nPos] = MicroProfileMakeLogIndex(nBegin, nToken_, nTick); + pLog->nPut.store(nNextPos, std::memory_order_release); + } +} + +uint64_t MicroProfileEnter(MicroProfileToken nToken_) +{ + if(MicroProfileGetGroupMask(nToken_) & S.nActiveGroup) + { + if(!MicroProfileGetThreadLog()) + { + MicroProfileInitThreadLog(); + } + uint64_t nTick = MP_TICK(); + MicroProfileLogPut(nToken_, nTick, MP_LOG_ENTER, MicroProfileGetThreadLog()); + return nTick; + } + return MICROPROFILE_INVALID_TICK; +} + +void MicroProfileMetaUpdate(MicroProfileToken nToken, int nCount, MicroProfileTokenType eTokenType) +{ + if((MP_DRAW_META_FIRST<nTicks <= S.nPauseTicks) + { + uint32_t nPut = S.nContextSwitchPut; + S.ContextSwitch[nPut] = *pContextSwitch; + S.nContextSwitchPut = (S.nContextSwitchPut+1) % MICROPROFILE_CONTEXT_SWITCH_BUFFER_SIZE; + } +} + + +void MicroProfileGetRange(uint32_t nPut, uint32_t nGet, uint32_t nRange[2][2]) +{ + if(nPut > nGet) + { + nRange[0][0] = nGet; + nRange[0][1] = nPut; + nRange[1][0] = nRange[1][1] = 0; + } + else if(nPut != nGet) + { + MP_ASSERT(nGet != MICROPROFILE_BUFFER_SIZE); + uint32_t nCountEnd = MICROPROFILE_BUFFER_SIZE - nGet; + nRange[0][0] = nGet; + nRange[0][1] = nGet + nCountEnd; + nRange[1][0] = 0; + nRange[1][1] = nPut; + } +} + +void MicroProfileFlip() +{ + #if 0 + //verify LogEntry wraps correctly + MicroProfileLogEntry c = MP_LOG_TICK_MASK-5000; + for(int i = 0; i < 10000; ++i, c += 1) + { + MicroProfileLogEntry l2 = (c+2500) & MP_LOG_TICK_MASK; + MP_ASSERT(2500 == MicroProfileLogTickDifference(c, l2)); + } + #endif + MICROPROFILE_SCOPE(g_MicroProfileFlip); + std::lock_guard Lock(MicroProfileMutex()); + + + MicroProfileGpuFlip(); + + if(S.nToggleRunning) + { + S.nRunning = !S.nRunning; + if(!S.nRunning) + S.nPauseTicks = MP_TICK(); + S.nToggleRunning = 0; + for(uint32_t i = 0; i < MICROPROFILE_MAX_THREADS; ++i) + { + MicroProfileThreadLog* pLog = S.Pool[i]; + if(pLog) + { + pLog->nStackPos = 0; + } + } + } + uint32_t nAggregateClear = S.nAggregateClear || S.nAutoClearFrames, nAggregateFlip = 0; + if(S.nDumpFileNextFrame) + { + MicroProfileDumpToFile(); + S.nDumpFileNextFrame = 0; + S.nAutoClearFrames = MICROPROFILE_GPU_FRAME_DELAY + 3; //hide spike from dumping webpage + } + if(S.nWebServerDataSent == (uint64_t)-1) + { + MicroProfileWebServerStart(); + S.nWebServerDataSent = 0; + } + + if(MicroProfileWebServerUpdate()) + { + S.nAutoClearFrames = MICROPROFILE_GPU_FRAME_DELAY + 3; //hide spike from dumping webpage + } + + if(S.nAutoClearFrames) + { + nAggregateClear = 1; + nAggregateFlip = 1; + S.nAutoClearFrames -= 1; + } + + + if(S.nRunning || S.nForceEnable) + { + S.nFramePutIndex++; + S.nFramePut = (S.nFramePut+1) % MICROPROFILE_MAX_FRAME_HISTORY; + MP_ASSERT((S.nFramePutIndex % MICROPROFILE_MAX_FRAME_HISTORY) == S.nFramePut); + S.nFrameCurrent = (S.nFramePut + MICROPROFILE_MAX_FRAME_HISTORY - MICROPROFILE_GPU_FRAME_DELAY - 1) % MICROPROFILE_MAX_FRAME_HISTORY; + S.nFrameCurrentIndex++; + uint32_t nFrameNext = (S.nFrameCurrent+1) % MICROPROFILE_MAX_FRAME_HISTORY; + + uint32_t nContextSwitchPut = S.nContextSwitchPut; + if(S.nContextSwitchLastPut < nContextSwitchPut) + { + S.nContextSwitchUsage = (nContextSwitchPut - S.nContextSwitchLastPut); + } + else + { + S.nContextSwitchUsage = MICROPROFILE_CONTEXT_SWITCH_BUFFER_SIZE - S.nContextSwitchLastPut + nContextSwitchPut; + } + S.nContextSwitchLastPut = nContextSwitchPut; + + MicroProfileFrameState* pFramePut = &S.Frames[S.nFramePut]; + MicroProfileFrameState* pFrameCurrent = &S.Frames[S.nFrameCurrent]; + MicroProfileFrameState* pFrameNext = &S.Frames[nFrameNext]; + + pFramePut->nFrameStartCpu = MP_TICK(); + pFramePut->nFrameStartGpu = (uint32_t)MicroProfileGpuInsertTimeStamp(); + if(pFrameNext->nFrameStartGpu != (uint64_t)-1) + pFrameNext->nFrameStartGpu = MicroProfileGpuGetTimeStamp((uint32_t)pFrameNext->nFrameStartGpu); + + if(pFrameCurrent->nFrameStartGpu == (uint64_t)-1) + pFrameCurrent->nFrameStartGpu = pFrameNext->nFrameStartGpu + 1; + + uint64_t nFrameStartCpu = pFrameCurrent->nFrameStartCpu; + uint64_t nFrameEndCpu = pFrameNext->nFrameStartCpu; + + { + uint64_t nTick = nFrameEndCpu - nFrameStartCpu; + S.nFlipTicks = nTick; + S.nFlipAggregate += nTick; + S.nFlipMax = MicroProfileMax(S.nFlipMax, nTick); + } + + uint8_t* pTimerToGroup = &S.TimerToGroup[0]; + for(uint32_t i = 0; i < MICROPROFILE_MAX_THREADS; ++i) + { + MicroProfileThreadLog* pLog = S.Pool[i]; + if(!pLog) + { + pFramePut->nLogStart[i] = 0; + } + else + { + uint32_t nPut = pLog->nPut.load(std::memory_order_acquire); + pFramePut->nLogStart[i] = nPut; + MP_ASSERT(nPut< MICROPROFILE_BUFFER_SIZE); + //need to keep last frame around to close timers. timers more than 1 frame old is ditched. + pLog->nGet.store(nPut, std::memory_order_relaxed); + } + } + + if(S.nRunning) + { + uint64_t* pFrameGroup = &S.FrameGroup[0]; + { + MICROPROFILE_SCOPE(g_MicroProfileClear); + for(uint32_t i = 0; i < S.nTotalTimers; ++i) + { + S.Frame[i].nTicks = 0; + S.Frame[i].nCount = 0; + S.FrameExclusive[i] = 0; + } + for(uint32_t i = 0; i < MICROPROFILE_MAX_GROUPS; ++i) + { + pFrameGroup[i] = 0; + } + for(uint32_t j = 0; j < MICROPROFILE_META_MAX; ++j) + { + if(S.MetaCounters[j].pName && 0 != (S.nActiveBars & (MP_DRAW_META_FIRST<nGroupStackPos[0]; + int64_t nGroupTicks[MICROPROFILE_MAX_GROUPS] = {0}; + + + uint32_t nPut = pFrameNext->nLogStart[i]; + uint32_t nGet = pFrameCurrent->nLogStart[i]; + uint32_t nRange[2][2] = { {0, 0}, {0, 0}, }; + MicroProfileGetRange(nPut, nGet, nRange); + + + //fetch gpu results. + if(pLog->nGpu) + { + for(uint32_t j = 0; j < 2; ++j) + { + uint32_t nStart = nRange[j][0]; + uint32_t nEnd = nRange[j][1]; + for(uint32_t k = nStart; k < nEnd; ++k) + { + MicroProfileLogEntry L = pLog->Log[k]; + if(MicroProfileLogType(L) < MP_LOG_META) + { + pLog->Log[k] = MicroProfileLogSetTick(L, MicroProfileGpuGetTimeStamp((uint32_t)MicroProfileLogGetTick(L))); + } + } + } + } + + + uint32_t* pStack = &pLog->nStack[0]; + int64_t* pChildTickStack = &pLog->nChildTickStack[0]; + uint32_t nStackPos = pLog->nStackPos; + + for(uint32_t j = 0; j < 2; ++j) + { + uint32_t nStart = nRange[j][0]; + uint32_t nEnd = nRange[j][1]; + for(uint32_t k = nStart; k < nEnd; ++k) + { + MicroProfileLogEntry LE = pLog->Log[k]; + int nType = MicroProfileLogType(LE); + + if(MP_LOG_ENTER == nType) + { + int nTimer = MicroProfileLogTimerIndex(LE); + uint8_t nGroup = pTimerToGroup[nTimer]; + MP_ASSERT(nStackPos < MICROPROFILE_STACK_MAX); + MP_ASSERT(nGroup < MICROPROFILE_MAX_GROUPS); + pGroupStackPos[nGroup]++; + pStack[nStackPos++] = k; + pChildTickStack[nStackPos] = 0; + + } + else if(MP_LOG_META == nType) + { + if(nStackPos) + { + int64_t nMetaIndex = MicroProfileLogTimerIndex(LE); + int64_t nMetaCount = MicroProfileLogGetTick(LE); + MP_ASSERT(nMetaIndex < MICROPROFILE_META_MAX); + int64_t nCounter = MicroProfileLogTimerIndex(pLog->Log[pStack[nStackPos-1]]); + S.MetaCounters[nMetaIndex].nCounters[nCounter] += nMetaCount; + } + } + else if(MP_LOG_LEAVE == nType) + { + int nTimer = MicroProfileLogTimerIndex(LE); + uint8_t nGroup = pTimerToGroup[nTimer]; + MP_ASSERT(nGroup < MICROPROFILE_MAX_GROUPS); + if(nStackPos) + { + int64_t nTickStart = pLog->Log[pStack[nStackPos-1]]; + int64_t nTicks = MicroProfileLogTickDifference(nTickStart, LE); + int64_t nChildTicks = pChildTickStack[nStackPos]; + nStackPos--; + pChildTickStack[nStackPos] += nTicks; + + uint32_t nTimerIndex = MicroProfileLogTimerIndex(LE); + S.Frame[nTimerIndex].nTicks += nTicks; + S.FrameExclusive[nTimerIndex] += (nTicks-nChildTicks); + S.Frame[nTimerIndex].nCount += 1; + + MP_ASSERT(nGroup < MICROPROFILE_MAX_GROUPS); + uint8_t nGroupStackPos = pGroupStackPos[nGroup]; + if(nGroupStackPos) + { + nGroupStackPos--; + if(0 == nGroupStackPos) + { + nGroupTicks[nGroup] += nTicks; + } + pGroupStackPos[nGroup] = nGroupStackPos; + } + } + } + } + } + for(uint32_t i = 0; i < MICROPROFILE_MAX_GROUPS; ++i) + { + pLog->nGroupTicks[i] += nGroupTicks[i]; + pFrameGroup[i] += nGroupTicks[i]; + } + pLog->nStackPos = nStackPos; + } + } + { + MICROPROFILE_SCOPE(g_MicroProfileAccumulate); + for(uint32_t i = 0; i < S.nTotalTimers; ++i) + { + S.AccumTimers[i].nTicks += S.Frame[i].nTicks; + S.AccumTimers[i].nCount += S.Frame[i].nCount; + S.AccumMaxTimers[i] = MicroProfileMax(S.AccumMaxTimers[i], S.Frame[i].nTicks); + S.AccumTimersExclusive[i] += S.FrameExclusive[i]; + S.AccumMaxTimersExclusive[i] = MicroProfileMax(S.AccumMaxTimersExclusive[i], S.FrameExclusive[i]); + } + + for(uint32_t i = 0; i < MICROPROFILE_MAX_GROUPS; ++i) + { + S.AccumGroup[i] += pFrameGroup[i]; + S.AccumGroupMax[i] = MicroProfileMax(S.AccumGroupMax[i], pFrameGroup[i]); + } + + for(uint32_t j = 0; j < MICROPROFILE_META_MAX; ++j) + { + if(S.MetaCounters[j].pName && 0 != (S.nActiveBars & (MP_DRAW_META_FIRST<nAggregateGroupTicks[0], &pLog->nGroupTicks[0], sizeof(pLog->nAggregateGroupTicks)); + + if(nAggregateClear) + { + memset(&pLog->nGroupTicks[0], 0, sizeof(pLog->nGroupTicks)); + } + } + + for(uint32_t j = 0; j < MICROPROFILE_META_MAX; ++j) + { + if(S.MetaCounters[j].pName && 0 != (S.nActiveBars & (MP_DRAW_META_FIRST<= 0) + { + if(bEnabled) + { + S.nActiveGroupWanted |= S.CategoryInfo[nCategoryIndex].nGroupMask; + } + else + { + S.nActiveGroupWanted &= ~S.CategoryInfo[nCategoryIndex].nGroupMask; + } + } +} + + +void MicroProfileEnableCategory(const char* pCategory) +{ + MicroProfileEnableCategory(pCategory, true); +} +void MicroProfileDisableCategory(const char* pCategory) +{ + MicroProfileEnableCategory(pCategory, false); +} + +bool MicroProfileGetEnableAllGroups() +{ + return 0 != S.nAllGroupsWanted; +} + +void MicroProfileSetForceMetaCounters(bool bForce) +{ + S.nForceMetaCounters = bForce ? 1 : 0; +} + +bool MicroProfileGetForceMetaCounters() +{ + return 0 != S.nForceMetaCounters; +} + +void MicroProfileEnableMetaCounter(const char* pMeta) +{ + for(uint32_t i = 0; i < MICROPROFILE_META_MAX; ++i) + { + if(S.MetaCounters[i].pName && 0 == MP_STRCASECMP(S.MetaCounters[i].pName, pMeta)) + { + S.nBars |= (MP_DRAW_META_FIRST< Lock(MicroProfileMutex()); + uint16_t nGroup = MicroProfileGetGroup(pGroup, Type); + S.nForceGroup |= (1ll << nGroup); +} + +void MicroProfileForceDisableGroup(const char* pGroup, MicroProfileTokenType Type) +{ + MicroProfileInit(); + std::lock_guard Lock(MicroProfileMutex()); + uint16_t nGroup = MicroProfileGetGroup(pGroup, Type); + S.nForceGroup &= ~(1ll << nGroup); +} + + +void MicroProfileCalcAllTimers(float* pTimers, float* pAverage, float* pMax, float* pCallAverage, float* pExclusive, float* pAverageExclusive, float* pMaxExclusive, float* pTotal, uint32_t nSize) +{ + for(uint32_t i = 0; i < S.nTotalTimers && i < nSize; ++i) + { + const uint32_t nGroupId = S.TimerInfo[i].nGroupIndex; + const float fToMs = MicroProfileTickToMsMultiplier(S.GroupInfo[nGroupId].Type == MicroProfileTokenTypeGpu ? MicroProfileTicksPerSecondGpu() : MicroProfileTicksPerSecondCpu()); + uint32_t nTimer = i; + uint32_t nIdx = i * 2; + uint32_t nAggregateFrames = S.nAggregateFrames ? S.nAggregateFrames : 1; + uint32_t nAggregateCount = S.Aggregate[nTimer].nCount ? S.Aggregate[nTimer].nCount : 1; + float fToPrc = S.fRcpReferenceTime; + float fMs = fToMs * (S.Frame[nTimer].nTicks); + float fPrc = MicroProfileMin(fMs * fToPrc, 1.f); + float fAverageMs = fToMs * (S.Aggregate[nTimer].nTicks / nAggregateFrames); + float fAveragePrc = MicroProfileMin(fAverageMs * fToPrc, 1.f); + float fMaxMs = fToMs * (S.AggregateMax[nTimer]); + float fMaxPrc = MicroProfileMin(fMaxMs * fToPrc, 1.f); + float fCallAverageMs = fToMs * (S.Aggregate[nTimer].nTicks / nAggregateCount); + float fCallAveragePrc = MicroProfileMin(fCallAverageMs * fToPrc, 1.f); + float fMsExclusive = fToMs * (S.FrameExclusive[nTimer]); + float fPrcExclusive = MicroProfileMin(fMsExclusive * fToPrc, 1.f); + float fAverageMsExclusive = fToMs * (S.AggregateExclusive[nTimer] / nAggregateFrames); + float fAveragePrcExclusive = MicroProfileMin(fAverageMsExclusive * fToPrc, 1.f); + float fMaxMsExclusive = fToMs * (S.AggregateMaxExclusive[nTimer]); + float fMaxPrcExclusive = MicroProfileMin(fMaxMsExclusive * fToPrc, 1.f); + float fTotalMs = fToMs * S.Aggregate[nTimer].nTicks; + pTimers[nIdx] = fMs; + pTimers[nIdx+1] = fPrc; + pAverage[nIdx] = fAverageMs; + pAverage[nIdx+1] = fAveragePrc; + pMax[nIdx] = fMaxMs; + pMax[nIdx+1] = fMaxPrc; + pCallAverage[nIdx] = fCallAverageMs; + pCallAverage[nIdx+1] = fCallAveragePrc; + pExclusive[nIdx] = fMsExclusive; + pExclusive[nIdx+1] = fPrcExclusive; + pAverageExclusive[nIdx] = fAverageMsExclusive; + pAverageExclusive[nIdx+1] = fAveragePrcExclusive; + pMaxExclusive[nIdx] = fMaxMsExclusive; + pMaxExclusive[nIdx+1] = fMaxPrcExclusive; + pTotal[nIdx] = fTotalMs; + pTotal[nIdx+1] = 0.f; + } +} + +void MicroProfileTogglePause() +{ + S.nToggleRunning = 1; +} + +float MicroProfileGetTime(const char* pGroup, const char* pName) +{ + MicroProfileToken nToken = MicroProfileFindToken(pGroup, pName); + if(nToken == MICROPROFILE_INVALID_TOKEN) + { + return 0.f; + } + uint32_t nTimerIndex = MicroProfileGetTimerIndex(nToken); + uint32_t nGroupIndex = MicroProfileGetGroupIndex(nToken); + float fToMs = MicroProfileTickToMsMultiplier(S.GroupInfo[nGroupIndex].Type == MicroProfileTokenTypeGpu ? MicroProfileTicksPerSecondGpu() : MicroProfileTicksPerSecondCpu()); + return S.Frame[nTimerIndex].nTicks * fToMs; +} + + +void MicroProfileContextSwitchSearch(uint32_t* pContextSwitchStart, uint32_t* pContextSwitchEnd, uint64_t nBaseTicksCpu, uint64_t nBaseTicksEndCpu) +{ + MICROPROFILE_SCOPE(g_MicroProfileContextSwitchSearch); + uint32_t nContextSwitchPut = S.nContextSwitchPut; + uint64_t nContextSwitchStart, nContextSwitchEnd; + nContextSwitchStart = nContextSwitchEnd = (nContextSwitchPut + MICROPROFILE_CONTEXT_SWITCH_BUFFER_SIZE - 1) % MICROPROFILE_CONTEXT_SWITCH_BUFFER_SIZE; + int64_t nSearchEnd = nBaseTicksEndCpu + MicroProfileMsToTick(30.f, MicroProfileTicksPerSecondCpu()); + int64_t nSearchBegin = nBaseTicksCpu - MicroProfileMsToTick(30.f, MicroProfileTicksPerSecondCpu()); + for(uint32_t i = 0; i < MICROPROFILE_CONTEXT_SWITCH_BUFFER_SIZE; ++i) + { + uint32_t nIndex = (nContextSwitchPut + MICROPROFILE_CONTEXT_SWITCH_BUFFER_SIZE - (i+1)) % MICROPROFILE_CONTEXT_SWITCH_BUFFER_SIZE; + MicroProfileContextSwitch& CS = S.ContextSwitch[nIndex]; + if(CS.nTicks > nSearchEnd) + { + nContextSwitchEnd = nIndex; + } + if(CS.nTicks > nSearchBegin) + { + nContextSwitchStart = nIndex; + } + } + *pContextSwitchStart = nContextSwitchStart; + *pContextSwitchEnd = nContextSwitchEnd; +} + + + +#if MICROPROFILE_WEBSERVER + +#define MICROPROFILE_EMBED_HTML + +extern const char* g_MicroProfileHtml_begin[]; +extern size_t g_MicroProfileHtml_begin_sizes[]; +extern size_t g_MicroProfileHtml_begin_count; +extern const char* g_MicroProfileHtml_end[]; +extern size_t g_MicroProfileHtml_end_sizes[]; +extern size_t g_MicroProfileHtml_end_count; + +typedef void MicroProfileWriteCallback(void* Handle, size_t size, const char* pData); + +uint32_t MicroProfileWebServerPort() +{ + return S.nWebServerPort; +} + +void MicroProfileDumpFile(const char* pHtml, const char* pCsv) +{ + S.nDumpFileNextFrame = 0; + if(pHtml) + { + uint32_t nLen = strlen(pHtml); + if(nLen > sizeof(S.HtmlDumpPath)-1) + { + return; + } + memcpy(S.HtmlDumpPath, pHtml, nLen+1); + S.nDumpFileNextFrame |= 1; + } + if(pCsv) + { + uint32_t nLen = strlen(pCsv); + if(nLen > sizeof(S.CsvDumpPath)-1) + { + return; + } + memcpy(S.CsvDumpPath, pCsv, nLen+1); + S.nDumpFileNextFrame |= 2; + } +} + +void MicroProfilePrintf(MicroProfileWriteCallback CB, void* Handle, const char* pFmt, ...) +{ + char buffer[32*1024]; + va_list args; + va_start (args, pFmt); +#ifdef _WIN32 + size_t size = vsprintf_s(buffer, pFmt, args); +#else + size_t size = vsnprintf(buffer, sizeof(buffer)-1, pFmt, args); +#endif + CB(Handle, size, &buffer[0]); + va_end (args); +} + +#define printf(...) MicroProfilePrintf(CB, Handle, __VA_ARGS__) +void MicroProfileDumpCsv(MicroProfileWriteCallback CB, void* Handle, int nMaxFrames) +{ + uint32_t nAggregateFrames = S.nAggregateFrames ? S.nAggregateFrames : 1; + float fToMsCPU = MicroProfileTickToMsMultiplier(MicroProfileTicksPerSecondCpu()); + float fToMsGPU = MicroProfileTickToMsMultiplier(MicroProfileTicksPerSecondGpu()); + + printf("frames,%d\n", nAggregateFrames); + printf("group,name,average,max,callaverage\n"); + + uint32_t nNumTimers = S.nTotalTimers; + uint32_t nBlockSize = 2 * nNumTimers; + float* pTimers = (float*)alloca(nBlockSize * 8 * sizeof(float)); + float* pAverage = pTimers + nBlockSize; + float* pMax = pTimers + 2 * nBlockSize; + float* pCallAverage = pTimers + 3 * nBlockSize; + float* pTimersExclusive = pTimers + 4 * nBlockSize; + float* pAverageExclusive = pTimers + 5 * nBlockSize; + float* pMaxExclusive = pTimers + 6 * nBlockSize; + float* pTotal = pTimers + 7 * nBlockSize; + + MicroProfileCalcAllTimers(pTimers, pAverage, pMax, pCallAverage, pTimersExclusive, pAverageExclusive, pMaxExclusive, pTotal, nNumTimers); + + for(uint32_t i = 0; i < S.nTotalTimers; ++i) + { + uint32_t nIdx = i * 2; + printf("\"%s\",\"%s\",%f,%f,%f\n", S.TimerInfo[i].pName, S.GroupInfo[S.TimerInfo[i].nGroupIndex].pName, pAverage[nIdx], pMax[nIdx], pCallAverage[nIdx]); + } + + printf("\n\n"); + + printf("group,average,max,total\n"); + for(uint32_t j = 0; j < MICROPROFILE_MAX_GROUPS; ++j) + { + const char* pGroupName = S.GroupInfo[j].pName; + float fToMs = S.GroupInfo[j].Type == MicroProfileTokenTypeGpu ? fToMsGPU : fToMsCPU; + if(pGroupName[0] != '\0') + { + printf("\"%s\",%.3f,%.3f,%.3f\n", pGroupName, fToMs * S.AggregateGroup[j] / nAggregateFrames, fToMs * S.AggregateGroup[j] / nAggregateFrames, fToMs * S.AggregateGroup[j]); + } + } + + printf("\n\n"); + printf("group,thread,average,total\n"); + for(uint32_t j = 0; j < MICROPROFILE_MAX_GROUPS; ++j) + { + for(uint32_t i = 0; i < S.nNumLogs; ++i) + { + if(S.Pool[i]) + { + const char* pThreadName = &S.Pool[i]->ThreadName[0]; + // MicroProfilePrintf(CB, Handle, "var ThreadGroupTime%d = [", i); + float fToMs = S.Pool[i]->nGpu ? fToMsGPU : fToMsCPU; + { + uint64_t nTicks = S.Pool[i]->nAggregateGroupTicks[j]; + float fTime = nTicks / nAggregateFrames * fToMs; + float fTimeTotal = nTicks * fToMs; + if(fTimeTotal > 0.01f) + { + const char* pGroupName = S.GroupInfo[j].pName; + printf("\"%s\",\"%s\",%.3f,%.3f\n", pGroupName, pThreadName, fTime, fTimeTotal); + } + } + } + } + } + + printf("\n\n"); + printf("frametimecpu\n"); + + const uint32_t nCount = MICROPROFILE_MAX_FRAME_HISTORY - MICROPROFILE_GPU_FRAME_DELAY - 3; + const uint32_t nStart = S.nFrameCurrent; + for(uint32_t i = nCount; i > 0; i--) + { + uint32_t nFrame = (nStart + MICROPROFILE_MAX_FRAME_HISTORY - i) % MICROPROFILE_MAX_FRAME_HISTORY; + uint32_t nFrameNext = (nStart + MICROPROFILE_MAX_FRAME_HISTORY - i + 1) % MICROPROFILE_MAX_FRAME_HISTORY; + uint64_t nTicks = S.Frames[nFrameNext].nFrameStartCpu - S.Frames[nFrame].nFrameStartCpu; + printf("%f,", nTicks * fToMsCPU); + } + printf("\n"); + + printf("\n\n"); + printf("frametimegpu\n"); + + for(uint32_t i = nCount; i > 0; i--) + { + uint32_t nFrame = (nStart + MICROPROFILE_MAX_FRAME_HISTORY - i) % MICROPROFILE_MAX_FRAME_HISTORY; + uint32_t nFrameNext = (nStart + MICROPROFILE_MAX_FRAME_HISTORY - i + 1) % MICROPROFILE_MAX_FRAME_HISTORY; + uint64_t nTicks = S.Frames[nFrameNext].nFrameStartGpu - S.Frames[nFrame].nFrameStartGpu; + printf("%f,", nTicks * fToMsGPU); + } + printf("\n\n"); + printf("Meta\n");//only single frame snapshot + printf("name,average,max,total\n"); + for(int j = 0; j < MICROPROFILE_META_MAX; ++j) + { + if(S.MetaCounters[j].pName) + { + printf("\"%s\",%f,%lld,%lld\n",S.MetaCounters[j].pName, S.MetaCounters[j].nSumAggregate / (float)nAggregateFrames, S.MetaCounters[j].nSumAggregateMax,S.MetaCounters[j].nSumAggregate); + } + } +} +#undef printf + +void MicroProfileDumpHtml(MicroProfileWriteCallback CB, void* Handle, int nMaxFrames, const char* pHost) +{ + uint32_t nRunning = S.nRunning; + S.nRunning = 0; + //stall pushing of timers + uint64_t nActiveGroup = S.nActiveGroup; + S.nActiveGroup = 0; + S.nPauseTicks = MP_TICK(); + + + for(size_t i = 0; i < g_MicroProfileHtml_begin_count; ++i) + { + CB(Handle, g_MicroProfileHtml_begin_sizes[i]-1, g_MicroProfileHtml_begin[i]); + } + //dump info + uint64_t nTicks = MP_TICK(); + + float fToMsCPU = MicroProfileTickToMsMultiplier(MicroProfileTicksPerSecondCpu()); + float fToMsGPU = MicroProfileTickToMsMultiplier(MicroProfileTicksPerSecondGpu()); + float fAggregateMs = fToMsCPU * (nTicks - S.nAggregateFlipTick); + MicroProfilePrintf(CB, Handle, "var DumpHost = '%s';\n", pHost ? pHost : ""); + time_t CaptureTime; + time(&CaptureTime); + MicroProfilePrintf(CB, Handle, "var DumpUtcCaptureTime = %ld;\n", CaptureTime); + MicroProfilePrintf(CB, Handle, "var AggregateInfo = {'Frames':%d, 'Time':%f};\n", S.nAggregateFrames, fAggregateMs); + + //categories + MicroProfilePrintf(CB, Handle, "var CategoryInfo = Array(%d);\n",S.nCategoryCount); + for(uint32_t i = 0; i < S.nCategoryCount; ++i) + { + MicroProfilePrintf(CB, Handle, "CategoryInfo[%d] = \"%s\";\n", i, S.CategoryInfo[i].pName); + } + + //groups + MicroProfilePrintf(CB, Handle, "var GroupInfo = Array(%d);\n\n",S.nGroupCount); + uint32_t nAggregateFrames = S.nAggregateFrames ? S.nAggregateFrames : 1; + float fRcpAggregateFrames = 1.f / nAggregateFrames; + for(uint32_t i = 0; i < S.nGroupCount; ++i) + { + MP_ASSERT(i == S.GroupInfo[i].nGroupIndex); + float fToMs = S.GroupInfo[i].Type == MicroProfileTokenTypeCpu ? fToMsCPU : fToMsGPU; + MicroProfilePrintf(CB, Handle, "GroupInfo[%d] = MakeGroup(%d, \"%s\", %d, %d, %d, %f, %f, %f, '#%02x%02x%02x');\n", + S.GroupInfo[i].nGroupIndex, + S.GroupInfo[i].nGroupIndex, + S.GroupInfo[i].pName, + S.GroupInfo[i].nCategory, + S.GroupInfo[i].nNumTimers, + S.GroupInfo[i].Type == MicroProfileTokenTypeGpu?1:0, + fToMs * S.AggregateGroup[i], + fToMs * S.AggregateGroup[i] / nAggregateFrames, + fToMs * S.AggregateGroupMax[i], + MICROPROFILE_UNPACK_RED(S.GroupInfo[i].nColor) & 0xff, + MICROPROFILE_UNPACK_GREEN(S.GroupInfo[i].nColor) & 0xff, + MICROPROFILE_UNPACK_BLUE(S.GroupInfo[i].nColor) & 0xff); + } + //timers + + uint32_t nNumTimers = S.nTotalTimers; + uint32_t nBlockSize = 2 * nNumTimers; + float* pTimers = (float*)alloca(nBlockSize * 8 * sizeof(float)); + float* pAverage = pTimers + nBlockSize; + float* pMax = pTimers + 2 * nBlockSize; + float* pCallAverage = pTimers + 3 * nBlockSize; + float* pTimersExclusive = pTimers + 4 * nBlockSize; + float* pAverageExclusive = pTimers + 5 * nBlockSize; + float* pMaxExclusive = pTimers + 6 * nBlockSize; + float* pTotal = pTimers + 7 * nBlockSize; + + MicroProfileCalcAllTimers(pTimers, pAverage, pMax, pCallAverage, pTimersExclusive, pAverageExclusive, pMaxExclusive, pTotal, nNumTimers); + + MicroProfilePrintf(CB, Handle, "\nvar TimerInfo = Array(%d);\n\n", S.nTotalTimers); + for(uint32_t i = 0; i < S.nTotalTimers; ++i) + { + uint32_t nIdx = i * 2; + MP_ASSERT(i == S.TimerInfo[i].nTimerIndex); + MicroProfilePrintf(CB, Handle, "var Meta%d = [", i); + bool bOnce = true; + for(int j = 0; j < MICROPROFILE_META_MAX; ++j) + { + if(S.MetaCounters[j].pName) + { + uint32_t lala = S.MetaCounters[j].nCounters[i]; + MicroProfilePrintf(CB, Handle, bOnce ? "%d" : ",%d", lala); + bOnce = false; + } + } + MicroProfilePrintf(CB, Handle, "];\n"); + MicroProfilePrintf(CB, Handle, "var MetaAvg%d = [", i); + bOnce = true; + for(int j = 0; j < MICROPROFILE_META_MAX; ++j) + { + if(S.MetaCounters[j].pName) + { + MicroProfilePrintf(CB, Handle, bOnce ? "%f" : ",%f", fRcpAggregateFrames * S.MetaCounters[j].nAggregate[i]); + bOnce = false; + } + } + MicroProfilePrintf(CB, Handle, "];\n"); + MicroProfilePrintf(CB, Handle, "var MetaMax%d = [", i); + bOnce = true; + for(int j = 0; j < MICROPROFILE_META_MAX; ++j) + { + if(S.MetaCounters[j].pName) + { + MicroProfilePrintf(CB, Handle, bOnce ? "%d" : ",%d", S.MetaCounters[j].nAggregateMax[i]); + bOnce = false; + } + } + MicroProfilePrintf(CB, Handle, "];\n"); + + + uint32_t nColor = S.TimerInfo[i].nColor; + uint32_t nColorDark = (nColor >> 1) & ~0x80808080; + MicroProfilePrintf(CB, Handle, "TimerInfo[%d] = MakeTimer(%d, \"%s\", %d, '#%02x%02x%02x','#%02x%02x%02x', %f, %f, %f, %f, %f, %d, %f, Meta%d, MetaAvg%d, MetaMax%d);\n", S.TimerInfo[i].nTimerIndex, S.TimerInfo[i].nTimerIndex, S.TimerInfo[i].pName, S.TimerInfo[i].nGroupIndex, + MICROPROFILE_UNPACK_RED(nColor) & 0xff, + MICROPROFILE_UNPACK_GREEN(nColor) & 0xff, + MICROPROFILE_UNPACK_BLUE(nColor) & 0xff, + MICROPROFILE_UNPACK_RED(nColorDark) & 0xff, + MICROPROFILE_UNPACK_GREEN(nColorDark) & 0xff, + MICROPROFILE_UNPACK_BLUE(nColorDark) & 0xff, + pAverage[nIdx], + pMax[nIdx], + pAverageExclusive[nIdx], + pMaxExclusive[nIdx], + pCallAverage[nIdx], + S.Aggregate[i].nCount, + pTotal[nIdx], + i,i,i); + + } + + MicroProfilePrintf(CB, Handle, "\nvar ThreadNames = ["); + for(uint32_t i = 0; i < S.nNumLogs; ++i) + { + if(S.Pool[i]) + { + MicroProfilePrintf(CB, Handle, "'%s',", S.Pool[i]->ThreadName); + } + else + { + MicroProfilePrintf(CB, Handle, "'Thread %d',", i); + } + } + MicroProfilePrintf(CB, Handle, "];\n\n"); + + + for(uint32_t i = 0; i < S.nNumLogs; ++i) + { + if(S.Pool[i]) + { + MicroProfilePrintf(CB, Handle, "var ThreadGroupTime%d = [", i); + float fToMs = S.Pool[i]->nGpu ? fToMsGPU : fToMsCPU; + for(uint32_t j = 0; j < MICROPROFILE_MAX_GROUPS; ++j) + { + MicroProfilePrintf(CB, Handle, "%f,", S.Pool[i]->nAggregateGroupTicks[j]/nAggregateFrames * fToMs); + } + MicroProfilePrintf(CB, Handle, "];\n"); + } + } + MicroProfilePrintf(CB, Handle, "\nvar ThreadGroupTimeArray = ["); + for(uint32_t i = 0; i < S.nNumLogs; ++i) + { + if(S.Pool[i]) + { + MicroProfilePrintf(CB, Handle, "ThreadGroupTime%d,", i); + } + } + MicroProfilePrintf(CB, Handle, "];\n"); + + + for(uint32_t i = 0; i < S.nNumLogs; ++i) + { + if(S.Pool[i]) + { + MicroProfilePrintf(CB, Handle, "var ThreadGroupTimeTotal%d = [", i); + float fToMs = S.Pool[i]->nGpu ? fToMsGPU : fToMsCPU; + for(uint32_t j = 0; j < MICROPROFILE_MAX_GROUPS; ++j) + { + MicroProfilePrintf(CB, Handle, "%f,", S.Pool[i]->nAggregateGroupTicks[j] * fToMs); + } + MicroProfilePrintf(CB, Handle, "];\n"); + } + } + MicroProfilePrintf(CB, Handle, "\nvar ThreadGroupTimeTotalArray = ["); + for(uint32_t i = 0; i < S.nNumLogs; ++i) + { + if(S.Pool[i]) + { + MicroProfilePrintf(CB, Handle, "ThreadGroupTimeTotal%d,", i); + } + } + MicroProfilePrintf(CB, Handle, "];"); + + + + + MicroProfilePrintf(CB, Handle, "\nvar ThreadIds = ["); + for(uint32_t i = 0; i < S.nNumLogs; ++i) + { + if(S.Pool[i]) + { + ThreadIdType ThreadId = S.Pool[i]->nThreadId; + if(!ThreadId) + { + ThreadId = (ThreadIdType)-1; + } + MicroProfilePrintf(CB, Handle, "%d,", ThreadId); + } + else + { + MicroProfilePrintf(CB, Handle, "-1,", i); + } + } + MicroProfilePrintf(CB, Handle, "];\n\n"); + + MicroProfilePrintf(CB, Handle, "\nvar MetaNames = ["); + for(int i = 0; i < MICROPROFILE_META_MAX; ++i) + { + if(S.MetaCounters[i].pName) + { + MicroProfilePrintf(CB, Handle, "'%s',", S.MetaCounters[i].pName); + } + } + + + MicroProfilePrintf(CB, Handle, "];\n\n"); + + + + uint32_t nNumFrames = (MICROPROFILE_MAX_FRAME_HISTORY - MICROPROFILE_GPU_FRAME_DELAY - 3); //leave a few to not overwrite + nNumFrames = MicroProfileMin(nNumFrames, (uint32_t)nMaxFrames); + + + uint32_t nFirstFrame = (S.nFrameCurrent + MICROPROFILE_MAX_FRAME_HISTORY - nNumFrames) % MICROPROFILE_MAX_FRAME_HISTORY; + uint32_t nLastFrame = (nFirstFrame + nNumFrames) % MICROPROFILE_MAX_FRAME_HISTORY; + MP_ASSERT(nLastFrame == (S.nFrameCurrent % MICROPROFILE_MAX_FRAME_HISTORY)); + MP_ASSERT(nFirstFrame < MICROPROFILE_MAX_FRAME_HISTORY); + MP_ASSERT(nLastFrame < MICROPROFILE_MAX_FRAME_HISTORY); + const int64_t nTickStart = S.Frames[nFirstFrame].nFrameStartCpu; + const int64_t nTickEnd = S.Frames[nLastFrame].nFrameStartCpu; + int64_t nTickStartGpu = S.Frames[nFirstFrame].nFrameStartGpu; + + int64_t nTickReferenceCpu, nTickReferenceGpu; + int64_t nTicksPerSecondCpu = MicroProfileTicksPerSecondCpu(); + int64_t nTicksPerSecondGpu = MicroProfileTicksPerSecondGpu(); + int nTickReference = 0; + if(MicroProfileGetGpuTickReference(&nTickReferenceCpu, &nTickReferenceGpu)) + { + nTickStartGpu = (nTickStart - nTickReferenceCpu) * nTicksPerSecondGpu / nTicksPerSecondCpu + nTickReferenceGpu; + nTickReference = 1; + } + + +#if MICROPROFILE_DEBUG + printf("dumping %d frames\n", nNumFrames); + printf("dumping frame %d to %d\n", nFirstFrame, nLastFrame); +#endif + + + uint32_t* nTimerCounter = (uint32_t*)alloca(sizeof(uint32_t)* S.nTotalTimers); + memset(nTimerCounter, 0, sizeof(uint32_t) * S.nTotalTimers); + + MicroProfilePrintf(CB, Handle, "var Frames = Array(%d);\n", nNumFrames); + for(uint32_t i = 0; i < nNumFrames; ++i) + { + uint32_t nFrameIndex = (nFirstFrame + i) % MICROPROFILE_MAX_FRAME_HISTORY; + uint32_t nFrameIndexNext = (nFrameIndex + 1) % MICROPROFILE_MAX_FRAME_HISTORY; + + for(uint32_t j = 0; j < S.nNumLogs; ++j) + { + MicroProfileThreadLog* pLog = S.Pool[j]; + int64_t nStartTickBase = pLog->nGpu ? nTickStartGpu : nTickStart; + uint32_t nLogStart = S.Frames[nFrameIndex].nLogStart[j]; + uint32_t nLogEnd = S.Frames[nFrameIndexNext].nLogStart[j]; + + float fToMsCpu = MicroProfileTickToMsMultiplier(nTicksPerSecondCpu); + float fToMsBase = MicroProfileTickToMsMultiplier(pLog->nGpu ? nTicksPerSecondGpu : nTicksPerSecondCpu); + MicroProfilePrintf(CB, Handle, "var ts_%d_%d = [", i, j); + if(nLogStart != nLogEnd) + { + uint32_t k = nLogStart; + uint32_t nLogType = MicroProfileLogType(pLog->Log[k]); + float fToMs = nLogType == MP_LOG_GPU_EXTRA ? fToMsCpu : fToMsBase; + int64_t nStartTick = nLogType == MP_LOG_GPU_EXTRA ? nTickStart : nStartTickBase; + float fTime = nLogType == MP_LOG_META ? 0.f : MicroProfileLogTickDifference(nStartTick, pLog->Log[k]) * fToMs; + MicroProfilePrintf(CB, Handle, "%f", fTime); + for(k = (k+1) % MICROPROFILE_BUFFER_SIZE; k != nLogEnd; k = (k+1) % MICROPROFILE_BUFFER_SIZE) + { + uint32_t nLogType = MicroProfileLogType(pLog->Log[k]); + float fToMs = nLogType == MP_LOG_GPU_EXTRA ? fToMsCpu : fToMsBase; + nStartTick = nLogType == MP_LOG_GPU_EXTRA ? nTickStart : nStartTickBase; + float fTime = nLogType == MP_LOG_META ? 0.f : MicroProfileLogTickDifference(nStartTick, pLog->Log[k]) * fToMs; + MicroProfilePrintf(CB, Handle, ",%f", fTime); + } + } + MicroProfilePrintf(CB, Handle, "];\n"); + MicroProfilePrintf(CB, Handle, "var tt_%d_%d = [", i, j); + if(nLogStart != nLogEnd) + { + uint32_t k = nLogStart; + MicroProfilePrintf(CB, Handle, "%d", MicroProfileLogType(pLog->Log[k])); + for(k = (k+1) % MICROPROFILE_BUFFER_SIZE; k != nLogEnd; k = (k+1) % MICROPROFILE_BUFFER_SIZE) + { + uint32_t nLogType = MicroProfileLogType(pLog->Log[k]); + if(nLogType == MP_LOG_META) + { + //for meta, store the count + 3, which is the tick part + nLogType = 3 + MicroProfileLogGetTick(pLog->Log[k]); + } + MicroProfilePrintf(CB, Handle, ",%d", nLogType); + } + } + MicroProfilePrintf(CB, Handle, "];\n"); + + MicroProfilePrintf(CB, Handle, "var ti_%d_%d = [", i, j); + if(nLogStart != nLogEnd) + { + uint32_t k = nLogStart; + MicroProfilePrintf(CB, Handle, "%d", (uint32_t)MicroProfileLogTimerIndex(pLog->Log[k])); + for(k = (k+1) % MICROPROFILE_BUFFER_SIZE; k != nLogEnd; k = (k+1) % MICROPROFILE_BUFFER_SIZE) + { + uint32_t nTimerIndex = (uint32_t)MicroProfileLogTimerIndex(pLog->Log[k]); + MicroProfilePrintf(CB, Handle, ",%d", nTimerIndex); + nTimerCounter[nTimerIndex]++; + } + } + MicroProfilePrintf(CB, Handle, "];\n"); + + } + + MicroProfilePrintf(CB, Handle, "var ts%d = [", i); + for(uint32_t j = 0; j < S.nNumLogs; ++j) + { + MicroProfilePrintf(CB, Handle, "ts_%d_%d,", i, j); + } + MicroProfilePrintf(CB, Handle, "];\n"); + MicroProfilePrintf(CB, Handle, "var tt%d = [", i); + for(uint32_t j = 0; j < S.nNumLogs; ++j) + { + MicroProfilePrintf(CB, Handle, "tt_%d_%d,", i, j); + } + MicroProfilePrintf(CB, Handle, "];\n"); + + MicroProfilePrintf(CB, Handle, "var ti%d = [", i); + for(uint32_t j = 0; j < S.nNumLogs; ++j) + { + MicroProfilePrintf(CB, Handle, "ti_%d_%d,", i, j); + } + MicroProfilePrintf(CB, Handle, "];\n"); + + + int64_t nFrameStart = S.Frames[nFrameIndex].nFrameStartCpu; + int64_t nFrameEnd = S.Frames[nFrameIndexNext].nFrameStartCpu; + + float fToMs = MicroProfileTickToMsMultiplier(nTicksPerSecondCpu); + float fFrameMs = MicroProfileLogTickDifference(nTickStart, nFrameStart) * fToMs; + float fFrameEndMs = MicroProfileLogTickDifference(nTickStart, nFrameEnd) * fToMs; + float fFrameGpuMs = 0; + float fFrameGpuEndMs = 0; + if(nTickReference) + { + fFrameGpuMs = MicroProfileLogTickDifference(nTickStartGpu, S.Frames[nFrameIndex].nFrameStartGpu) * fToMsGPU; + fFrameGpuEndMs = MicroProfileLogTickDifference(nTickStartGpu, S.Frames[nFrameIndexNext].nFrameStartGpu) * fToMsGPU; + } + MicroProfilePrintf(CB, Handle, "Frames[%d] = MakeFrame(%d, %f, %f, %f, %f, ts%d, tt%d, ti%d);\n", i, 0, fFrameMs, fFrameEndMs, fFrameGpuMs, fFrameGpuEndMs, i, i, i); + } + + uint32_t nContextSwitchStart = 0; + uint32_t nContextSwitchEnd = 0; + MicroProfileContextSwitchSearch(&nContextSwitchStart, &nContextSwitchEnd, nTickStart, nTickEnd); + + uint32_t nWrittenBefore = S.nWebServerDataSent; + MicroProfilePrintf(CB, Handle, "var CSwitchThreadInOutCpu = ["); + for(uint32_t j = nContextSwitchStart; j != nContextSwitchEnd; j = (j+1) % MICROPROFILE_CONTEXT_SWITCH_BUFFER_SIZE) + { + MicroProfileContextSwitch CS = S.ContextSwitch[j]; + int nCpu = CS.nCpu; + MicroProfilePrintf(CB, Handle, "%d,%d,%d,", CS.nThreadIn, CS.nThreadOut, nCpu); + } + MicroProfilePrintf(CB, Handle, "];\n"); + MicroProfilePrintf(CB, Handle, "var CSwitchTime = ["); + float fToMsCpu = MicroProfileTickToMsMultiplier(MicroProfileTicksPerSecondCpu()); + for(uint32_t j = nContextSwitchStart; j != nContextSwitchEnd; j = (j+1) % MICROPROFILE_CONTEXT_SWITCH_BUFFER_SIZE) + { + MicroProfileContextSwitch CS = S.ContextSwitch[j]; + float fTime = MicroProfileLogTickDifference(nTickStart, CS.nTicks) * fToMsCpu; + MicroProfilePrintf(CB, Handle, "%f,", fTime); + } + MicroProfilePrintf(CB, Handle, "];\n"); + uint32_t nWrittenAfter = S.nWebServerDataSent; + MicroProfilePrintf(CB, Handle, "//CSwitch Size %d\n", nWrittenAfter - nWrittenBefore); + + + for(size_t i = 0; i < g_MicroProfileHtml_end_count; ++i) + { + CB(Handle, g_MicroProfileHtml_end_sizes[i]-1, g_MicroProfileHtml_end[i]); + } + + uint32_t* nGroupCounter = (uint32_t*)alloca(sizeof(uint32_t)* S.nGroupCount); + + memset(nGroupCounter, 0, sizeof(uint32_t) * S.nGroupCount); + for(uint32_t i = 0; i < S.nTotalTimers; ++i) + { + uint32_t nGroupIndex = S.TimerInfo[i].nGroupIndex; + nGroupCounter[nGroupIndex] += nTimerCounter[i]; + } + + uint32_t* nGroupCounterSort = (uint32_t*)alloca(sizeof(uint32_t)* S.nGroupCount); + uint32_t* nTimerCounterSort = (uint32_t*)alloca(sizeof(uint32_t)* S.nTotalTimers); + for(uint32_t i = 0; i < S.nGroupCount; ++i) + { + nGroupCounterSort[i] = i; + } + for(uint32_t i = 0; i < S.nTotalTimers; ++i) + { + nTimerCounterSort[i] = i; + } + std::sort(nGroupCounterSort, nGroupCounterSort + S.nGroupCount, + [nGroupCounter](const uint32_t l, const uint32_t r) + { + return nGroupCounter[l] > nGroupCounter[r]; + } + ); + + std::sort(nTimerCounterSort, nTimerCounterSort + S.nTotalTimers, + [nTimerCounter](const uint32_t l, const uint32_t r) + { + return nTimerCounter[l] > nTimerCounter[r]; + } + ); + + MicroProfilePrintf(CB, Handle, "\n\n"); + + S.nActiveGroup = nActiveGroup; + S.nRunning = nRunning; + +#if MICROPROFILE_DEBUG + int64_t nTicksEnd = MP_TICK(); + float fMs = fToMsCpu * (nTicksEnd - S.nPauseTicks); + printf("html dump took %6.2fms\n", fMs); +#endif + + +} + +void MicroProfileWriteFile(void* Handle, size_t nSize, const char* pData) +{ + fwrite(pData, nSize, 1, (FILE*)Handle); +} + +void MicroProfileDumpToFile() +{ + std::lock_guard Lock(MicroProfileMutex()); + if(S.nDumpFileNextFrame&1) + { + FILE* F = fopen(S.HtmlDumpPath, "w"); + if(F) + { + MicroProfileDumpHtml(MicroProfileWriteFile, F, MICROPROFILE_WEBSERVER_MAXFRAMES, S.HtmlDumpPath); + fclose(F); + } + } + if(S.nDumpFileNextFrame&2) + { + FILE* F = fopen(S.CsvDumpPath, "w"); + if(F) + { + MicroProfileDumpCsv(MicroProfileWriteFile, F, MICROPROFILE_WEBSERVER_MAXFRAMES); + fclose(F); + } + } +} + +void MicroProfileFlushSocket(MpSocket Socket) +{ + send(Socket, &S.WebServerBuffer[0], S.WebServerPut, 0); + S.WebServerPut = 0; + +} + +void MicroProfileWriteSocket(void* Handle, size_t nSize, const char* pData) +{ + S.nWebServerDataSent += nSize; + MpSocket Socket = *(MpSocket*)Handle; + if(nSize > MICROPROFILE_WEBSERVER_SOCKET_BUFFER_SIZE / 2) + { + MicroProfileFlushSocket(Socket); + send(Socket, pData, nSize, 0); + + } + else + { + memcpy(&S.WebServerBuffer[S.WebServerPut], pData, nSize); + S.WebServerPut += nSize; + if(S.WebServerPut > MICROPROFILE_WEBSERVER_SOCKET_BUFFER_SIZE/2) + { + MicroProfileFlushSocket(Socket); + } + } +} + +#if MICROPROFILE_MINIZ +#ifndef MICROPROFILE_COMPRESS_BUFFER_SIZE +#define MICROPROFILE_COMPRESS_BUFFER_SIZE (256<<10) +#endif + +#define MICROPROFILE_COMPRESS_CHUNK (MICROPROFILE_COMPRESS_BUFFER_SIZE/2) +struct MicroProfileCompressedSocketState +{ + unsigned char DeflateOut[MICROPROFILE_COMPRESS_CHUNK]; + unsigned char DeflateIn[MICROPROFILE_COMPRESS_CHUNK]; + mz_stream Stream; + MpSocket Socket; + uint32_t nSize; + uint32_t nCompressedSize; + uint32_t nFlushes; + uint32_t nMemmoveBytes; +}; + +void MicroProfileCompressedSocketFlush(MicroProfileCompressedSocketState* pState) +{ + mz_stream& Stream = pState->Stream; + unsigned char* pSendStart = &pState->DeflateOut[0]; + unsigned char* pSendEnd = &pState->DeflateOut[MICROPROFILE_COMPRESS_CHUNK - Stream.avail_out]; + if(pSendStart != pSendEnd) + { + send(pState->Socket, (const char*)pSendStart, pSendEnd - pSendStart, 0); + pState->nCompressedSize += pSendEnd - pSendStart; + } + Stream.next_out = &pState->DeflateOut[0]; + Stream.avail_out = MICROPROFILE_COMPRESS_CHUNK; + +} +void MicroProfileCompressedSocketStart(MicroProfileCompressedSocketState* pState, MpSocket Socket) +{ + mz_stream& Stream = pState->Stream; + memset(&Stream, 0, sizeof(Stream)); + Stream.next_out = &pState->DeflateOut[0]; + Stream.avail_out = MICROPROFILE_COMPRESS_CHUNK; + Stream.next_in = &pState->DeflateIn[0]; + Stream.avail_in = 0; + mz_deflateInit(&Stream, Z_DEFAULT_COMPRESSION); + pState->Socket = Socket; + pState->nSize = 0; + pState->nCompressedSize = 0; + pState->nFlushes = 0; + pState->nMemmoveBytes = 0; + +} +void MicroProfileCompressedSocketFinish(MicroProfileCompressedSocketState* pState) +{ + mz_stream& Stream = pState->Stream; + MicroProfileCompressedSocketFlush(pState); + int r = mz_deflate(&Stream, MZ_FINISH); + MP_ASSERT(r == MZ_STREAM_END); + MicroProfileCompressedSocketFlush(pState); + r = mz_deflateEnd(&Stream); + MP_ASSERT(r == MZ_OK); +} + +void MicroProfileCompressedWriteSocket(void* Handle, size_t nSize, const char* pData) +{ + MicroProfileCompressedSocketState* pState = (MicroProfileCompressedSocketState*)Handle; + mz_stream& Stream = pState->Stream; + const unsigned char* pDeflateInEnd = Stream.next_in + Stream.avail_in; + const unsigned char* pDeflateInStart = &pState->DeflateIn[0]; + const unsigned char* pDeflateInRealEnd = &pState->DeflateIn[MICROPROFILE_COMPRESS_CHUNK]; + pState->nSize += nSize; + if(nSize <= pDeflateInRealEnd - pDeflateInEnd) + { + memcpy((void*)pDeflateInEnd, pData, nSize); + Stream.avail_in += nSize; + MP_ASSERT(Stream.next_in + Stream.avail_in <= pDeflateInRealEnd); + return; + } + int Flush = 0; + while(nSize) + { + pDeflateInEnd = Stream.next_in + Stream.avail_in; + if(Flush) + { + pState->nFlushes++; + MicroProfileCompressedSocketFlush(pState); + pDeflateInRealEnd = &pState->DeflateIn[MICROPROFILE_COMPRESS_CHUNK]; + if(pDeflateInEnd == pDeflateInRealEnd) + { + if(Stream.avail_in) + { + MP_ASSERT(pDeflateInStart != Stream.next_in); + memmove((void*)pDeflateInStart, Stream.next_in, Stream.avail_in); + pState->nMemmoveBytes += Stream.avail_in; + } + Stream.next_in = pDeflateInStart; + pDeflateInEnd = Stream.next_in + Stream.avail_in; + } + } + size_t nSpace = pDeflateInRealEnd - pDeflateInEnd; + size_t nBytes = MicroProfileMin(nSpace, nSize); + MP_ASSERT(nBytes + pDeflateInEnd <= pDeflateInRealEnd); + memcpy((void*)pDeflateInEnd, pData, nBytes); + Stream.avail_in += nBytes; + nSize -= nBytes; + pData += nBytes; + int r = mz_deflate(&Stream, MZ_NO_FLUSH); + Flush = r == MZ_BUF_ERROR || nBytes == 0 || Stream.avail_out == 0 ? 1 : 0; + MP_ASSERT(r == MZ_BUF_ERROR || r == MZ_OK); + if(r == MZ_BUF_ERROR) + { + r = mz_deflate(&Stream, MZ_SYNC_FLUSH); + } + } +} +#endif + + +#ifndef MicroProfileSetNonBlocking //fcntl doesnt work on a some unix like platforms.. +void MicroProfileSetNonBlocking(MpSocket Socket, int NonBlocking) +{ +#ifdef _WIN32 + u_long nonBlocking = NonBlocking ? 1 : 0; + ioctlsocket(Socket, FIONBIO, &nonBlocking); +#else + int Options = fcntl(Socket, F_GETFL); + if(NonBlocking) + { + fcntl(Socket, F_SETFL, Options|O_NONBLOCK); + } + else + { + fcntl(Socket, F_SETFL, Options&(~O_NONBLOCK)); + } +#endif +} +#endif + +void MicroProfileWebServerStart() +{ +#ifdef _WIN32 + WSADATA wsa; + if(WSAStartup(MAKEWORD(2, 2), &wsa)) + { + S.ListenerSocket = -1; + return; + } +#endif + + S.ListenerSocket = socket(PF_INET, SOCK_STREAM, 6); + MP_ASSERT(!MP_INVALID_SOCKET(S.ListenerSocket)); + MicroProfileSetNonBlocking(S.ListenerSocket, 1); + + S.nWebServerPort = (uint32_t)-1; + struct sockaddr_in Addr; + Addr.sin_family = AF_INET; + Addr.sin_addr.s_addr = INADDR_ANY; + for(int i = 0; i < 20; ++i) + { + Addr.sin_port = htons(MICROPROFILE_WEBSERVER_PORT+i); + if(0 == bind(S.ListenerSocket, (sockaddr*)&Addr, sizeof(Addr))) + { + S.nWebServerPort = MICROPROFILE_WEBSERVER_PORT+i; + break; + } + } + listen(S.ListenerSocket, 8); +} + +void MicroProfileWebServerStop() +{ +#ifdef _WIN32 + closesocket(S.ListenerSocket); + WSACleanup(); +#else + close(S.ListenerSocket); +#endif +} + +int MicroProfileParseGet(const char* pGet) +{ + const char* pStart = pGet; + while(*pGet != '\0') + { + if(*pGet < '0' || *pGet > '9') + return 0; + pGet++; + } + int nFrames = atoi(pStart); + if(nFrames) + { + return nFrames; + } + else + { + return MICROPROFILE_WEBSERVER_MAXFRAMES; + } +} +bool MicroProfileWebServerUpdate() +{ + MICROPROFILE_SCOPEI("MicroProfile", "Webserver-update", -1); + MpSocket Connection = accept(S.ListenerSocket, 0, 0); + bool bServed = false; + if(!MP_INVALID_SOCKET(Connection)) + { + std::lock_guard Lock(MicroProfileMutex()); + char Req[8192]; + MicroProfileSetNonBlocking(Connection, 0); + int nReceived = recv(Connection, Req, sizeof(Req)-1, 0); + if(nReceived > 0) + { + Req[nReceived] = '\0'; +#if MICROPROFILE_MINIZ +#define MICROPROFILE_HTML_HEADER "HTTP/1.0 200 OK\r\nContent-Type: text/html\r\nContent-Encoding: deflate\r\nExpires: Tue, 01 Jan 2199 16:00:00 GMT\r\n\r\n" +#else +#define MICROPROFILE_HTML_HEADER "HTTP/1.0 200 OK\r\nContent-Type: text/html\r\nExpires: Tue, 01 Jan 2199 16:00:00 GMT\r\n\r\n" +#endif + char* pHttp = strstr(Req, "HTTP/"); + char* pGet = strstr(Req, "GET /"); + char* pHost = strstr(Req, "Host: "); + auto Terminate = [](char* pString) + { + char* pEnd = pString; + while(*pEnd != '\0') + { + if(*pEnd == '\r' || *pEnd == '\n' || *pEnd == ' ') + { + *pEnd = '\0'; + return; + } + pEnd++; + } + }; + if(pHost) + { + pHost += sizeof("Host: ")-1; + Terminate(pHost); + } + + if(pHttp && pGet) + { + *pHttp = '\0'; + pGet += sizeof("GET /")-1; + Terminate(pGet); + int nFrames = MicroProfileParseGet(pGet); + if(nFrames) + { + uint64_t nTickStart = MP_TICK(); + send(Connection, MICROPROFILE_HTML_HEADER, sizeof(MICROPROFILE_HTML_HEADER)-1, 0); + uint64_t nDataStart = S.nWebServerDataSent; + S.WebServerPut = 0; + #if 0 == MICROPROFILE_MINIZ + MicroProfileDumpHtml(MicroProfileWriteSocket, &Connection, nFrames, pHost); + uint64_t nDataEnd = S.nWebServerDataSent; + uint64_t nTickEnd = MP_TICK(); + uint64_t nDiff = (nTickEnd - nTickStart); + float fMs = MicroProfileTickToMsMultiplier(MicroProfileTicksPerSecondCpu()) * nDiff; + int nKb = ((nDataEnd-nDataStart)>>10) + 1; + int nCompressedKb = nKb; + MicroProfilePrintf(MicroProfileWriteSocket, &Connection, "\n\n\n",nKb, fMs); + MicroProfileFlushSocket(Connection); + #else + MicroProfileCompressedSocketState CompressState; + MicroProfileCompressedSocketStart(&CompressState, Connection); + MicroProfileDumpHtml(MicroProfileCompressedWriteSocket, &CompressState, nFrames, pHost); + S.nWebServerDataSent += CompressState.nSize; + uint64_t nDataEnd = S.nWebServerDataSent; + uint64_t nTickEnd = MP_TICK(); + uint64_t nDiff = (nTickEnd - nTickStart); + float fMs = MicroProfileTickToMsMultiplier(MicroProfileTicksPerSecondCpu()) * nDiff; + int nKb = ((nDataEnd-nDataStart)>>10) + 1; + int nCompressedKb = ((CompressState.nCompressedSize)>>10) + 1; + MicroProfilePrintf(MicroProfileCompressedWriteSocket, &CompressState, "\n\n\n", nKb, nCompressedKb, fMs); + MicroProfileCompressedSocketFinish(&CompressState); + MicroProfileFlushSocket(Connection); + #endif + + #if MICROPROFILE_DEBUG + printf("\n\n\n", nKb, nCompressedKb, fMs); + #endif + } + } + } +#ifdef _WIN32 + closesocket(Connection); +#else + close(Connection); +#endif + } + return bServed; +} +#endif + + + + +#if MICROPROFILE_CONTEXT_SWITCH_TRACE +//functions that need to be implemented per platform. +void* MicroProfileTraceThread(void* unused); +bool MicroProfileIsLocalThread(uint32_t nThreadId); + + +void MicroProfileStartContextSwitchTrace() +{ + if(!S.bContextSwitchRunning) + { + S.bContextSwitchRunning = true; + S.bContextSwitchStop = false; + MicroProfileThreadStart(&S.ContextSwitchThread, MicroProfileTraceThread); + } +} + +void MicroProfileStopContextSwitchTrace() +{ + if(S.bContextSwitchRunning) + { + S.bContextSwitchStop = true; + MicroProfileThreadJoin(&S.ContextSwitchThread); + } +} + + +#ifdef _WIN32 +#define INITGUID +#include +#include +#include + + +static GUID g_MicroProfileThreadClassGuid = { 0x3d6fa8d1, 0xfe05, 0x11d0, 0x9d, 0xda, 0x00, 0xc0, 0x4f, 0xd7, 0xba, 0x7c }; + +struct MicroProfileSCSwitch +{ + uint32_t NewThreadId; + uint32_t OldThreadId; + int8_t NewThreadPriority; + int8_t OldThreadPriority; + uint8_t PreviousCState; + int8_t SpareByte; + int8_t OldThreadWaitReason; + int8_t OldThreadWaitMode; + int8_t OldThreadState; + int8_t OldThreadWaitIdealProcessor; + uint32_t NewThreadWaitTime; + uint32_t Reserved; +}; + + +VOID WINAPI MicroProfileContextSwitchCallback(PEVENT_TRACE pEvent) +{ + if (pEvent->Header.Guid == g_MicroProfileThreadClassGuid) + { + if (pEvent->Header.Class.Type == 36) + { + MicroProfileSCSwitch* pCSwitch = (MicroProfileSCSwitch*) pEvent->MofData; + if ((pCSwitch->NewThreadId != 0) || (pCSwitch->OldThreadId != 0)) + { + MicroProfileContextSwitch Switch; + Switch.nThreadOut = pCSwitch->OldThreadId; + Switch.nThreadIn = pCSwitch->NewThreadId; + Switch.nCpu = pEvent->BufferContext.ProcessorNumber; + Switch.nTicks = pEvent->Header.TimeStamp.QuadPart; + MicroProfileContextSwitchPut(&Switch); + } + } + } +} + +ULONG WINAPI MicroProfileBufferCallback(PEVENT_TRACE_LOGFILE Buffer) +{ + return (S.bContextSwitchStop || !S.bContextSwitchRunning) ? FALSE : TRUE; +} + + +struct MicroProfileKernelTraceProperties : public EVENT_TRACE_PROPERTIES +{ + char dummy[sizeof(KERNEL_LOGGER_NAME)]; +}; + +void MicroProfileContextSwitchShutdownTrace() +{ + TRACEHANDLE SessionHandle = 0; + MicroProfileKernelTraceProperties sessionProperties; + + ZeroMemory(&sessionProperties, sizeof(sessionProperties)); + sessionProperties.Wnode.BufferSize = sizeof(sessionProperties); + sessionProperties.Wnode.Flags = WNODE_FLAG_TRACED_GUID; + sessionProperties.Wnode.ClientContext = 1; //QPC clock resolution + sessionProperties.Wnode.Guid = SystemTraceControlGuid; + sessionProperties.BufferSize = 1; + sessionProperties.NumberOfBuffers = 128; + sessionProperties.EnableFlags = EVENT_TRACE_FLAG_CSWITCH; + sessionProperties.LogFileMode = EVENT_TRACE_REAL_TIME_MODE; + sessionProperties.MaximumFileSize = 0; + sessionProperties.LoggerNameOffset = sizeof(EVENT_TRACE_PROPERTIES); + sessionProperties.LogFileNameOffset = 0; + + EVENT_TRACE_LOGFILE log; + ZeroMemory(&log, sizeof(log)); + log.LoggerName = KERNEL_LOGGER_NAME; + log.ProcessTraceMode = 0; + TRACEHANDLE hLog = OpenTrace(&log); + if (hLog) + { + ControlTrace(SessionHandle, KERNEL_LOGGER_NAME, &sessionProperties, EVENT_TRACE_CONTROL_STOP); + } + CloseTrace(hLog); + + +} + +void* MicroProfileTraceThread(void* unused) +{ + + MicroProfileContextSwitchShutdownTrace(); + ULONG status = ERROR_SUCCESS; + TRACEHANDLE SessionHandle = 0; + MicroProfileKernelTraceProperties sessionProperties; + + ZeroMemory(&sessionProperties, sizeof(sessionProperties)); + sessionProperties.Wnode.BufferSize = sizeof(sessionProperties); + sessionProperties.Wnode.Flags = WNODE_FLAG_TRACED_GUID; + sessionProperties.Wnode.ClientContext = 1; //QPC clock resolution + sessionProperties.Wnode.Guid = SystemTraceControlGuid; + sessionProperties.BufferSize = 1; + sessionProperties.NumberOfBuffers = 128; + sessionProperties.EnableFlags = EVENT_TRACE_FLAG_CSWITCH|EVENT_TRACE_FLAG_PROCESS; + sessionProperties.LogFileMode = EVENT_TRACE_REAL_TIME_MODE; + sessionProperties.MaximumFileSize = 0; + sessionProperties.LoggerNameOffset = sizeof(EVENT_TRACE_PROPERTIES); + sessionProperties.LogFileNameOffset = 0; + + + status = StartTrace((PTRACEHANDLE) &SessionHandle, KERNEL_LOGGER_NAME, &sessionProperties); + + if (ERROR_SUCCESS != status) + { + S.bContextSwitchRunning = false; + return 0; + } + + EVENT_TRACE_LOGFILE log; + ZeroMemory(&log, sizeof(log)); + + log.LoggerName = KERNEL_LOGGER_NAME; + log.ProcessTraceMode = PROCESS_TRACE_MODE_REAL_TIME | PROCESS_TRACE_MODE_RAW_TIMESTAMP; + log.EventCallback = MicroProfileContextSwitchCallback; + log.BufferCallback = MicroProfileBufferCallback; + + TRACEHANDLE hLog = OpenTrace(&log); + ProcessTrace(&hLog, 1, 0, 0); + CloseTrace(hLog); + MicroProfileContextSwitchShutdownTrace(); + + S.bContextSwitchRunning = false; + return 0; +} + +bool MicroProfileIsLocalThread(uint32_t nThreadId) +{ + HANDLE h = OpenThread(THREAD_QUERY_LIMITED_INFORMATION, FALSE, nThreadId); + if(h == NULL) + return false; + DWORD hProcess = GetProcessIdOfThread(h); + CloseHandle(h); + return GetCurrentProcessId() == hProcess; +} + +#elif defined(__APPLE__) +#include +void* MicroProfileTraceThread(void* unused) +{ + FILE* pFile = fopen("mypipe", "r"); + if(!pFile) + { + printf("CONTEXT SWITCH FAILED TO OPEN FILE: make sure to run dtrace script\n"); + S.bContextSwitchRunning = false; + return 0; + } + printf("STARTING TRACE THREAD\n"); + char* pLine = 0; + size_t cap = 0; + size_t len = 0; + struct timeval tv; + + gettimeofday(&tv, NULL); + + uint64_t nsSinceEpoch = ((uint64_t)(tv.tv_sec) * 1000000 + (uint64_t)(tv.tv_usec)) * 1000; + uint64_t nTickEpoch = MP_TICK(); + uint32_t nLastThread[MICROPROFILE_MAX_CONTEXT_SWITCH_THREADS] = {0}; + mach_timebase_info_data_t sTimebaseInfo; + mach_timebase_info(&sTimebaseInfo); + S.bContextSwitchRunning = true; + + uint64_t nProcessed = 0; + uint64_t nProcessedLast = 0; + while((len = getline(&pLine, &cap, pFile))>0 && !S.bContextSwitchStop) + { + nProcessed += len; + if(nProcessed - nProcessedLast > 10<<10) + { + nProcessedLast = nProcessed; + printf("processed %llukb %llukb\n", (nProcessed-nProcessedLast)>>10,nProcessed >>10); + } + + char* pX = strchr(pLine, 'X'); + if(pX) + { + int cpu = atoi(pX+1); + char* pX2 = strchr(pX + 1, 'X'); + char* pX3 = strchr(pX2 + 1, 'X'); + int thread = atoi(pX2+1); + char* lala; + int64_t timestamp = strtoll(pX3 + 1, &lala, 10); + MicroProfileContextSwitch Switch; + + //convert to ticks. + uint64_t nDeltaNsSinceEpoch = timestamp - nsSinceEpoch; + uint64_t nDeltaTickSinceEpoch = sTimebaseInfo.numer * nDeltaNsSinceEpoch / sTimebaseInfo.denom; + uint64_t nTicks = nDeltaTickSinceEpoch + nTickEpoch; + if(cpu < MICROPROFILE_MAX_CONTEXT_SWITCH_THREADS) + { + Switch.nThreadOut = nLastThread[cpu]; + Switch.nThreadIn = thread; + nLastThread[cpu] = thread; + Switch.nCpu = cpu; + Switch.nTicks = nTicks; + MicroProfileContextSwitchPut(&Switch); + } + } + } + printf("EXITING TRACE THREAD\n"); + S.bContextSwitchRunning = false; + return 0; +} + +bool MicroProfileIsLocalThread(uint32_t nThreadId) +{ + return false; +} + +#endif +#else + +bool MicroProfileIsLocalThread(uint32_t nThreadId){return false;} +void MicroProfileStopContextSwitchTrace(){} +void MicroProfileStartContextSwitchTrace(){} + +#endif + + + + +#if MICROPROFILE_GPU_TIMERS_D3D11 +uint32_t MicroProfileGpuInsertTimeStamp() +{ + MicroProfileD3D11Frame& Frame = S.GPU.m_QueryFrames[S.GPU.m_nQueryFrame]; + if(Frame.m_nRateQueryStarted) + { + uint32_t nCurrent = (Frame.m_nQueryStart + Frame.m_nQueryCount) % MICROPROFILE_D3D_MAX_QUERIES; + uint32_t nNext = (nCurrent + 1) % MICROPROFILE_D3D_MAX_QUERIES; + if(nNext != S.GPU.m_nQueryGet) + { + Frame.m_nQueryCount++; + ID3D11Query* pQuery = (ID3D11Query*)S.GPU.m_pQueries[nCurrent]; + ID3D11DeviceContext* pContext = (ID3D11DeviceContext*)S.GPU.m_pDeviceContext; + pContext->End(pQuery); + S.GPU.m_nQueryPut = nNext; + return nCurrent; + } + } + return (uint32_t)-1; +} + +uint64_t MicroProfileGpuGetTimeStamp(uint32_t nIndex) +{ + if(nIndex == (uint32_t)-1) + { + return (uint64_t)-1; + } + int64_t nResult = S.GPU.m_nQueryResults[nIndex]; + MP_ASSERT(nResult != -1); + return nResult; +} + +bool MicroProfileGpuGetData(void* pQuery, void* pData, uint32_t nDataSize) +{ + HRESULT hr; + do + { + hr = ((ID3D11DeviceContext*)S.GPU.m_pDeviceContext)->GetData((ID3D11Query*)pQuery, pData, nDataSize, 0); + }while(hr == S_FALSE); + switch(hr) + { + case DXGI_ERROR_DEVICE_REMOVED: + case DXGI_ERROR_INVALID_CALL: + case E_INVALIDARG: + MP_BREAK(); + return false; + + } + return true; +} + +uint64_t MicroProfileTicksPerSecondGpu() +{ + return S.GPU.m_nQueryFrequency; +} + +void MicroProfileGpuFlip() +{ + MicroProfileD3D11Frame& CurrentFrame = S.GPU.m_QueryFrames[S.GPU.m_nQueryFrame]; + ID3D11DeviceContext* pContext = (ID3D11DeviceContext*)S.GPU.m_pDeviceContext; + if(CurrentFrame.m_nRateQueryStarted) + { + pContext->End((ID3D11Query*)CurrentFrame.m_pRateQuery); + } + uint32_t nNextFrame = (S.GPU.m_nQueryFrame + 1) % MICROPROFILE_GPU_FRAME_DELAY; + MicroProfileD3D11Frame& OldFrame = S.GPU.m_QueryFrames[nNextFrame]; + if(OldFrame.m_nRateQueryStarted) + { + struct RateQueryResult + { + uint64_t nFrequency; + BOOL bDisjoint; + }; + RateQueryResult Result; + if(MicroProfileGpuGetData(OldFrame.m_pRateQuery, &Result, sizeof(Result))) + { + if(S.GPU.m_nQueryFrequency != (int64_t)Result.nFrequency) + { + if(S.GPU.m_nQueryFrequency) + { + OutputDebugString("Query freq changing"); + } + S.GPU.m_nQueryFrequency = Result.nFrequency; + } + uint32_t nStart = OldFrame.m_nQueryStart; + uint32_t nCount = OldFrame.m_nQueryCount; + for(uint32_t i = 0; i < nCount; ++i) + { + uint32_t nIndex = (i + nStart) % MICROPROFILE_D3D_MAX_QUERIES; + + + + if(!MicroProfileGpuGetData(S.GPU.m_pQueries[nIndex], &S.GPU.m_nQueryResults[nIndex], sizeof(uint64_t))) + { + S.GPU.m_nQueryResults[nIndex] = -1; + } + } + } + else + { + uint32_t nStart = OldFrame.m_nQueryStart; + uint32_t nCount = OldFrame.m_nQueryCount; + for(uint32_t i = 0; i < nCount; ++i) + { + uint32_t nIndex = (i + nStart) % MICROPROFILE_D3D_MAX_QUERIES; + S.GPU.m_nQueryResults[nIndex] = -1; + } + } + S.GPU.m_nQueryGet = (OldFrame.m_nQueryStart + OldFrame.m_nQueryCount) % MICROPROFILE_D3D_MAX_QUERIES; + } + + S.GPU.m_nQueryFrame = nNextFrame; + MicroProfileD3D11Frame& NextFrame = S.GPU.m_QueryFrames[nNextFrame]; + pContext->Begin((ID3D11Query*)NextFrame.m_pRateQuery); + NextFrame.m_nQueryStart = S.GPU.m_nQueryPut; + NextFrame.m_nQueryCount = 0; + + NextFrame.m_nRateQueryStarted = 1; +} + +void MicroProfileGpuInitD3D11(void* pDevice_, void* pDeviceContext_) +{ + ID3D11Device* pDevice = (ID3D11Device*)pDevice_; + ID3D11DeviceContext* pDeviceContext = (ID3D11DeviceContext*)pDeviceContext_; + S.GPU.m_pDeviceContext = pDeviceContext_; + + D3D11_QUERY_DESC Desc; + Desc.MiscFlags = 0; + Desc.Query = D3D11_QUERY_TIMESTAMP; + for(uint32_t i = 0; i < MICROPROFILE_D3D_MAX_QUERIES; ++i) + { + HRESULT hr = pDevice->CreateQuery(&Desc, (ID3D11Query**)&S.GPU.m_pQueries[i]); + MP_ASSERT(hr == S_OK); + S.GPU.m_nQueryResults[i] = -1; + } + S.GPU.m_nQueryPut = 0; + S.GPU.m_nQueryGet = 0; + S.GPU.m_nQueryFrame = 0; + S.GPU.m_nQueryFrequency = 0; + Desc.Query = D3D11_QUERY_TIMESTAMP_DISJOINT; + for(uint32_t i = 0; i < MICROPROFILE_GPU_FRAME_DELAY; ++i) + { + S.GPU.m_QueryFrames[i].m_nQueryStart = 0; + S.GPU.m_QueryFrames[i].m_nQueryCount = 0; + S.GPU.m_QueryFrames[i].m_nRateQueryStarted = 0; + HRESULT hr = pDevice->CreateQuery(&Desc, (ID3D11Query**)&S.GPU.m_QueryFrames[i].m_pRateQuery); + MP_ASSERT(hr == S_OK); + } +} + + +void MicroProfileGpuShutdown() +{ + for(uint32_t i = 0; i < MICROPROFILE_D3D_MAX_QUERIES; ++i) + { + ((ID3D11Query*)&S.GPU.m_pQueries[i])->Release(); + S.GPU.m_pQueries[i] = 0; + } + for(uint32_t i = 0; i < MICROPROFILE_GPU_FRAME_DELAY; ++i) + { + ((ID3D11Query*)S.GPU.m_QueryFrames[i].m_pRateQuery)->Release(); + S.GPU.m_QueryFrames[i].m_pRateQuery = 0; + } +} + +int MicroProfileGetGpuTickReference(int64_t* pOutCPU, int64_t* pOutGpu) +{ + return 0; +} + + +#elif MICROPROFILE_GPU_TIMERS_GL +void MicroProfileGpuInitGL() +{ + S.GPU.GLTimerPos = 0; + glGenQueries(MICROPROFILE_GL_MAX_QUERIES, &S.GPU.GLTimers[0]); +} + +uint32_t MicroProfileGpuInsertTimeStamp() +{ + uint32_t nIndex = (S.GPU.GLTimerPos+1)%MICROPROFILE_GL_MAX_QUERIES; + glQueryCounter(S.GPU.GLTimers[nIndex], GL_TIMESTAMP); + S.GPU.GLTimerPos = nIndex; + return nIndex; +} +uint64_t MicroProfileGpuGetTimeStamp(uint32_t nKey) +{ + uint64_t result; + glGetQueryObjectui64v(S.GPU.GLTimers[nKey], GL_QUERY_RESULT, &result); + return result; +} + +uint64_t MicroProfileTicksPerSecondGpu() +{ + return 1000000000ll; +} + +int MicroProfileGetGpuTickReference(int64_t* pOutCpu, int64_t* pOutGpu) +{ + int64_t nGpuTimeStamp; + glGetInteger64v(GL_TIMESTAMP, &nGpuTimeStamp); + if(nGpuTimeStamp) + { + *pOutCpu = MP_TICK(); + *pOutGpu = nGpuTimeStamp; + #if 0 //debug test if timestamp diverges + static int64_t nTicksPerSecondCpu = MicroProfileTicksPerSecondCpu(); + static int64_t nTicksPerSecondGpu = MicroProfileTicksPerSecondGpu(); + static int64_t nGpuStart = 0; + static int64_t nCpuStart = 0; + if(!nCpuStart) + { + nCpuStart = *pOutCpu; + nGpuStart = *pOutGpu; + } + static int nCountDown = 100; + if(0 == nCountDown--) + { + int64_t nCurCpu = *pOutCpu; + int64_t nCurGpu = *pOutGpu; + double fDistanceCpu = (nCurCpu - nCpuStart) / (double)nTicksPerSecondCpu; + double fDistanceGpu = (nCurGpu - nGpuStart) / (double)nTicksPerSecondGpu; + + char buf[254]; + snprintf(buf, sizeof(buf)-1,"Distance %f %f diff %f\n", fDistanceCpu, fDistanceGpu, fDistanceCpu-fDistanceGpu); + OutputDebugString(buf); + nCountDown = 100; + } + #endif + return 1; + } + return 0; +} + + +#endif + +#undef S + +#ifdef _WIN32 +#pragma warning(pop) +#endif + + + + + +#endif +#endif +#ifdef MICROPROFILE_EMBED_HTML +#include "microprofile_html.h" +#endif -- cgit v1.2.3