diff --git a/UI/ProfilerDraw.cpp b/UI/ProfilerDraw.cpp index a4ffb8bbb3..8d7cf74021 100644 --- a/UI/ProfilerDraw.cpp +++ b/UI/ProfilerDraw.cpp @@ -55,6 +55,7 @@ void DrawProfile(UIContext &ui) { #ifdef USE_PROFILER PROFILE_THIS_SCOPE("timing"); int numCategories = Profiler_GetNumCategories(); + int numThreads = Profiler_GetNumThreads(); int historyLength = Profiler_GetHistoryLength(); ui.SetFontStyle(ui.theme->uiFont); @@ -63,8 +64,11 @@ void DrawProfile(UIContext &ui) { float legendMinVal = lastMaxVal * (1.0f / 120.0f); std::vector history(historyLength); + std::vector slowestThread(historyLength); std::vector catStatus(numCategories); + Profiler_GetSlowestThreads(&slowestThread[0], historyLength); + float rowH = 30.0f; float legendHeight = 0.0f; float legendWidth = 80.0f; @@ -75,7 +79,7 @@ void DrawProfile(UIContext &ui) { continue; } - Profiler_GetHistory(i, &history[0], historyLength); + Profiler_GetSlowestHistory(i, &slowestThread[0], &history[0], historyLength); catStatus[i] = PROFILE_CAT_NOLEGEND; for (int j = 0; j < historyLength; ++j) { if (history[j] > legendMinVal) { @@ -153,7 +157,7 @@ void DrawProfile(UIContext &ui) { if (catStatus[i] == PROFILE_CAT_IGNORE) { continue; } - Profiler_GetHistory(i, &history[0], historyLength); + Profiler_GetSlowestHistory(i, &slowestThread[0], &history[0], historyLength); float x = 10; uint32_t col = nice_colors[i % ARRAY_SIZE(nice_colors)]; diff --git a/ext/native/profiler/profiler.cpp b/ext/native/profiler/profiler.cpp index d48ad7ebbf..35e270b477 100644 --- a/ext/native/profiler/profiler.cpp +++ b/ext/native/profiler/profiler.cpp @@ -1,8 +1,10 @@ // Ultra-lightweight category profiler with history. -#include -#include +#include +#include #include +#include +#include #include @@ -11,8 +13,9 @@ #include "gfx_es2/draw_buffer.h" #include "profiler/profiler.h" -#define MAX_CATEGORIES 32 // Can be any number -#define MAX_DEPTH 16 // Can be any number +#define MAX_CATEGORIES 64 // Can be any number, represents max profiled names. +#define MAX_DEPTH 16 // Can be any number, represents max nesting depth of profiled names. +#define MAX_THREADS 4 // Can be any number, represents concurrent threads calling the profiler. #define HISTORY_SIZE 128 // Must be power of 2 #ifndef _DEBUG @@ -27,33 +30,63 @@ struct Category { struct CategoryFrame { CategoryFrame() { memset(time_taken, 0, sizeof(time_taken)); + memset(count, 0, sizeof(count)); } float time_taken[MAX_CATEGORIES]; int count[MAX_CATEGORIES]; }; struct Profiler { - int frameCount; int historyPos; - int depth; - int parentCategory[MAX_DEPTH]; - double eventStart[MAX_CATEGORIES]; + int depth[MAX_THREADS]; + int parentCategory[MAX_THREADS][MAX_DEPTH]; + double eventStart[MAX_THREADS][MAX_CATEGORIES]; double curFrameStart; }; static Profiler profiler; static Category categories[MAX_CATEGORIES]; +static std::mutex categoriesLock; +static int threadIdAfterLast = 0; +static std::mutex threadsLock; static CategoryFrame *history; +#if MAX_THREADS > 1 +thread_local int profilerThreadId = -1; +#else +static int profilerThreadId = 0; +#endif void internal_profiler_init() { memset(&profiler, 0, sizeof(profiler)); - for (int i = 0; i < MAX_DEPTH; i++) { - profiler.parentCategory[i] = -1; +#if MAX_THREADS == 1 + threadIdAfterLast = 1; +#endif + for (int i = 0; i < MAX_THREADS; i++) { + for (int j = 0; j < MAX_DEPTH; j++) { + profiler.parentCategory[i][j] = -1; + } } - history = new CategoryFrame[HISTORY_SIZE]; + history = new CategoryFrame[HISTORY_SIZE * MAX_THREADS]; } -int internal_profiler_find_cat(const char *category_name) { +static int internal_profiler_find_thread() { + int thread_id = profilerThreadId; + if (thread_id != -1) { + return thread_id; + } + + std::lock_guard guard(threadsLock); + if (threadIdAfterLast < MAX_THREADS) { + thread_id = threadIdAfterLast++; + profilerThreadId = thread_id; + return thread_id; + } + + // Just keep reusing the last one. + return threadIdAfterLast - 1; +} + +int internal_profiler_find_cat(const char *category_name, bool create_missing) { int i; for (i = 0; i < MAX_CATEGORIES; i++) { const char *catname = categories[i].name; @@ -68,8 +101,11 @@ int internal_profiler_find_cat(const char *category_name) { } } - if (i < MAX_CATEGORIES && category_name) { - categories[i].name = category_name; + if (i < MAX_CATEGORIES && category_name && create_missing) { + std::lock_guard guard(categoriesLock); + int race_check = internal_profiler_find_cat(category_name, false); + if (race_check == -1) + categories[i].name = category_name; return i; } @@ -77,79 +113,86 @@ int internal_profiler_find_cat(const char *category_name) { } // Suspend, also used to prepare for leaving. -static void internal_profiler_suspend(int category, double now) { - double diff = now - profiler.eventStart[category]; - history[profiler.historyPos].time_taken[category] += (float)diff; - profiler.eventStart[category] = 0.0; +static void internal_profiler_suspend(int thread_id, int category, double now) { + double diff = now - profiler.eventStart[thread_id][category]; + history[MAX_THREADS * profiler.historyPos + thread_id].time_taken[category] += (float)diff; + profiler.eventStart[thread_id][category] = 0.0; } // Resume, also used as part of entering. -static void internal_profiler_resume(int category, double now) { - profiler.eventStart[category] = now; +static void internal_profiler_resume(int thread_id, int category, double now) { + profiler.eventStart[thread_id][category] = now; } -int internal_profiler_enter(const char *category_name) { - int category = internal_profiler_find_cat(category_name); +int internal_profiler_enter(const char *category_name, int *out_thread_id) { + int category = internal_profiler_find_cat(category_name, true); + int thread_id = internal_profiler_find_thread(); if (category == -1 || !history) { return category; } - if (profiler.eventStart[category] == 0.0f) { + int &depth = profiler.depth[thread_id]; + if (profiler.eventStart[thread_id][category] == 0.0f) { double now = real_time_now(); - int parent = profiler.parentCategory[profiler.depth]; + int parent = profiler.parentCategory[thread_id][depth]; // Temporarily suspend the parent on entering a child. if (parent != -1) { - internal_profiler_suspend(parent, now); + internal_profiler_suspend(thread_id, parent, now); } - internal_profiler_resume(category, now); + internal_profiler_resume(thread_id, category, now); } else { DLOG("profiler: recursive enter (%i - %s)", category, category_name); } - profiler.depth++; - profiler.parentCategory[profiler.depth] = category; + depth++; + profiler.parentCategory[thread_id][depth] = category; + *out_thread_id = thread_id; return category; } -void internal_profiler_leave(int category) { +void internal_profiler_leave(int thread_id, int category) { if (category == -1 || !history) { return; } + + int &depth = profiler.depth[thread_id]; if (category < 0 || category >= MAX_CATEGORIES) { ELOG("Bad category index %d", category); - profiler.depth--; + depth--; return; } double now = real_time_now(); - profiler.depth--; - if (profiler.depth < 0) { + depth--; + if (depth < 0) { FLOG("Profiler enter/leave mismatch!"); } - int parent = profiler.parentCategory[profiler.depth]; + int parent = profiler.parentCategory[thread_id][depth]; // When there's recursion, we don't suspend or resume. if (parent != category) { - internal_profiler_suspend(category, now); - history[profiler.historyPos].count[category]++; + internal_profiler_suspend(thread_id, category, now); + history[MAX_THREADS * profiler.historyPos + thread_id].count[category]++; if (parent != -1) { // Resume tracking the parent. - internal_profiler_resume(parent, now); + internal_profiler_resume(thread_id, parent, now); } } } void internal_profiler_end_frame() { - if (profiler.depth != 0) { + int thread_id = internal_profiler_find_thread(); + if (profiler.depth[thread_id] != 0) { + // Threads may be off, but they'll fall into another frame. FLOG("Can't be inside a profiler scope at end of frame!"); } profiler.curFrameStart = real_time_now(); profiler.historyPos++; profiler.historyPos &= (HISTORY_SIZE - 1); - memset(&history[profiler.historyPos], 0, sizeof(history[profiler.historyPos])); + memset(&history[MAX_THREADS * profiler.historyPos], 0, sizeof(CategoryFrame) * MAX_THREADS); } const char *Profiler_GetCategoryName(int i) { @@ -168,13 +211,54 @@ int Profiler_GetNumCategories() { return 0; } -void Profiler_GetHistory(int category, float *data, int count) { +int Profiler_GetNumThreads() { + return threadIdAfterLast; +} + +void Profiler_GetSlowestThreads(int *data, int count) { + int numCategories = Profiler_GetNumCategories(); for (int i = 0; i < HISTORY_SIZE; i++) { int x = i - count + profiler.historyPos + 1; while (x < 0) x += HISTORY_SIZE; while (x >= HISTORY_SIZE) x -= HISTORY_SIZE; - data[i] = history[x].time_taken[category]; + + float slowestTime = 0.0f; + data[i] = 0; + for (int thread = 0; thread < threadIdAfterLast; ++thread) { + float sum = 0.0f; + for (int c = 0; c < numCategories; ++c) { + sum += history[MAX_THREADS * x + thread].time_taken[c]; + } + if (sum > slowestTime) { + slowestTime = sum; + data[i] = thread; + } + } + } +} + +void Profiler_GetSlowestHistory(int category, int *slowestThreads, float *data, int count) { + for (int i = 0; i < HISTORY_SIZE; i++) { + int x = i - count + profiler.historyPos + 1; + while (x < 0) + x += HISTORY_SIZE; + while (x >= HISTORY_SIZE) + x -= HISTORY_SIZE; + + int thread = slowestThreads[i]; + data[i] = history[MAX_THREADS * x + thread].time_taken[category]; + } +} + +void Profiler_GetHistory(int category, int thread, float *data, int count) { + for (int i = 0; i < HISTORY_SIZE; i++) { + int x = i - count + profiler.historyPos + 1; + while (x < 0) + x += HISTORY_SIZE; + while (x >= HISTORY_SIZE) + x -= HISTORY_SIZE; + data[i] = history[MAX_THREADS * x + thread].time_taken[category]; } } diff --git a/ext/native/profiler/profiler.h b/ext/native/profiler/profiler.h index f615e2e88c..e5addb1ac4 100644 --- a/ext/native/profiler/profiler.h +++ b/ext/native/profiler/profiler.h @@ -2,8 +2,6 @@ #include -// NOTE : This profiler is very single-threaded. Cannot be used from multiple threads yet. - // #define USE_PROFILER #ifdef USE_PROFILER @@ -13,24 +11,28 @@ class DrawBuffer; void internal_profiler_init(); void internal_profiler_end_frame(); -int internal_profiler_enter(const char *category_name); // Returns the category number. -void internal_profiler_leave(int category); +int internal_profiler_enter(const char *category_name, int *thread_id); // Returns the category number. +void internal_profiler_leave(int thread_id, int category); const char *Profiler_GetCategoryName(int i); int Profiler_GetNumCategories(); int Profiler_GetHistoryLength(); -void Profiler_GetHistory(int i, float *data, int count); +int Profiler_GetNumThreads(); +void Profiler_GetSlowestThreads(int *data, int count); +void Profiler_GetSlowestHistory(int category, int *slowestThreads, float *data, int count); +void Profiler_GetHistory(int category, int thread, float *data, int count); class ProfileThis { public: ProfileThis(const char *category) { - cat_ = internal_profiler_enter(category); + cat_ = internal_profiler_enter(category, &thread_); } ~ProfileThis() { - internal_profiler_leave(cat_); + internal_profiler_leave(thread_, cat_); } private: int cat_; + int thread_; }; #define PROFILE_INIT() internal_profiler_init();