diff --git a/README.md b/README.md index 8c8b727e76..9126d13b35 100644 --- a/README.md +++ b/README.md @@ -464,6 +464,7 @@ Parameters that are enabled by default have to be explicitly disabled. These (cu | `winesync` | Show wine sync method in use | | `present_mode` | Shows current vulkan [present mode](https://registry.khronos.org/vulkan/specs/1.3-extensions/man/html/VkPresentModeKHR.html) or vsync status in opengl | | `network` | Show network interfaces tx and rx kb/s. You can specify interface with `network=eth0` | +| `fex_stats` | Show FEX-Emu statistics. Default = `status+apptype+hotthreads+jitload+sigbus+smc+softfloat` | Example: `MANGOHUD_CONFIG=cpu_temp,gpu_temp,position=top-right,height=500,font_size=32` Because comma is also used as option delimiter and needs to be escaped for values with a backslash, you can use `+` like `MANGOHUD_CONFIG=fps_limit=60+30+0` instead. diff --git a/data/MangoHud.conf b/data/MangoHud.conf index 7244ff9b4a..064128d664 100644 --- a/data/MangoHud.conf +++ b/data/MangoHud.conf @@ -276,6 +276,10 @@ text_outline ### Disable / hide the hud by default # no_display +### Show FEX-Emu statistics +## Only useful for Arm64 devices running applications under emulation +# fex_stats + ### Hud position offset # offset_x=0 # offset_y=0 diff --git a/meson_options.txt b/meson_options.txt index 818c3b9079..2bdaed1c5e 100644 --- a/meson_options.txt +++ b/meson_options.txt @@ -13,3 +13,4 @@ option('mangohudctl', type: 'boolean', value : false) option('tests', type: 'feature', value: 'auto', description: 'Run tests') option('mangoplot', type: 'feature', value: 'enabled') option('dynamic_string_tokens', type: 'boolean', value: true, description: 'Use dynamic string tokens in LD_PRELOAD') +option('with_fex', type : 'boolean', value : false) diff --git a/src/fex.cpp b/src/fex.cpp new file mode 100644 index 0000000000..ffd4a14c47 --- /dev/null +++ b/src/fex.cpp @@ -0,0 +1,400 @@ +#include +#include +#include +#include +#include + +#include "fex.h" +#include "hud_elements.h" +#include "mesa/util/macros.h" + +namespace fex { +const char* fex_status = "Not Found!"; +std::string fex_version; +std::vector fex_load_data(200,0.f); + +fex_event_counts sigbus_counts; +fex_event_counts smc_counts; +fex_event_counts softfloat_counts; + +std::vector fex_max_thread_loads; + +constexpr static uint32_t MAXIMUM_THREAD_WAIT_TIME = 10; + +// FEX-Emu stats definitions +// Semantically these match upstream FEX-Emu. +constexpr uint32_t FEX_STATS_VERSION = 2; +enum class AppType : uint8_t { + LINUX_32, + LINUX_64, + WIN_ARM64EC, + WIN_WOW64, +}; + +// The profile statistics header that is at the base of the shared memory mapped from FEX. +// The version member is guaranteed to be first, to ensure that any version changes can be picked up immediately. +struct fex_stats_header { + uint8_t Version; + AppType app_type; + uint8_t _pad[2]; + char fex_version[48]; + // Atomic variables. std::atomic_ref isn't available until C++20, so need to use GCC builtin atomics to access. + uint32_t Head; + uint32_t Size; + uint32_t Pad; +}; + +// The thread-specific datapoints. If TID is zero then it is deallocated and happens to still be in the linked list. +struct fex_thread_stats { + // Atomic variables. + uint32_t Next; + uint32_t TID; + // Thread-specific stats. + uint64_t AccumulatedJITTime; + uint64_t AccumulatedSignalTime; + uint64_t AccumulatedSIGBUSCount; + uint64_t AccumulatedSMCEvents; + uint64_t AccumulatedFloatFallbackCount; +}; + +// Sampled stats information +struct fex_stats { + int pid {-1}; + int shm_fd {-1}; + bool first_sample = true; + uint32_t shm_size{}; + uint64_t cycle_counter_frequency{}; + size_t hardware_concurrency{}; + size_t page_size{}; + + void* shm_base{}; + fex_stats_header* head{}; + fex_thread_stats* stats{}; + + struct retained_stats { + std::chrono::time_point last_seen{}; + fex_thread_stats previous{}; + fex_thread_stats current{}; + }; + std::chrono::time_point previous_sample_period; + std::map sampled_stats; +}; + +fex_stats g_stats {}; + +const char* get_fex_app_type() { + if (!g_stats.head) { + return "Unknown"; + } + + // These are the only application types that FEX-Emu supports today. + // Linux32: A 32-bit x86 Linux application + // Linux64: A 64-bit x86_64 Linux application + // arm64ec: A 64-bit x86_64 WINE application + // wow64: A 32-bit x86 WINE application + switch (g_stats.head->app_type) { + case AppType::LINUX_32: return "Linux32"; + case AppType::LINUX_64: return "Linux64"; + case AppType::WIN_ARM64EC: return "arm64ec"; + case AppType::WIN_WOW64: return "wow64"; + default: return "Unknown"; + } +} + +static fex_thread_stats *offset_to_stats(void* shm_base, uint32_t *offset) { + const auto ld = __atomic_load_n(offset, __ATOMIC_RELAXED); + if (ld == 0) return nullptr; + return reinterpret_cast(reinterpret_cast(shm_base) + ld); +} + +static fex_thread_stats *offset_to_stats(void* shm_base, uint32_t offset) { + if (offset == 0) return nullptr; + return reinterpret_cast(reinterpret_cast(shm_base) + offset); +} + +#ifdef __aarch64__ +static void memory_barrier() { + asm volatile("dmb ishst" ::: "memory"); +} +static uint64_t get_cycle_counter_frequency() { + uint64_t result; + asm ("mrs %[res], CNTFRQ_EL0;" + : [res] "=r" (result)); + return result; +} +bool is_fex_capable() { + // All aarch64 systems are fex capable. + return true; +} + +#elif defined(__x86_64__) || defined(__i386__) +static void memory_barrier() { + // Intentionally empty. +} +static void cpuid(uint32_t leaf, uint32_t &eax, uint32_t &ebx, uint32_t &ecx, uint32_t &edx) { + asm volatile("cpuid" + : "=a"(eax), "=b"(ebx), "=c"(ecx), "=d"(edx) + : "a"(leaf), "c"(0)); +} + +bool is_fex_capable() { + // FEX-Emu CPUID documentation: https://github.com/FEX-Emu/FEX/blob/main/docs/CPUID.md + const uint32_t HYPERVISOR_BIT = 1U << 31; + const char FEXHypervisorString[] = "FEXIFEXIEMU"; + char HypervisorString[4 * 3]; + + uint32_t eax, ebx, ecx, edx; + // Check that the hypervisor bit is set first. Not required, but good to do. + cpuid(1, eax, ebx, ecx, edx); + if ((ecx & HYPERVISOR_BIT) != HYPERVISOR_BIT) return false; + + // Once the hypervisor bit is set, query the hypervisor leaf. + cpuid(0x4000'0000U, eax, ebx, ecx, edx); + if (eax == 0) return false; + + // If the hypervisor description matches FEX then we're good. + memcpy(&HypervisorString[0], &ebx, sizeof(uint32_t)); + memcpy(&HypervisorString[4], &ecx, sizeof(uint32_t)); + memcpy(&HypervisorString[8], &edx, sizeof(uint32_t)); + if (strncmp(HypervisorString, FEXHypervisorString, sizeof(HypervisorString)) != 0) return false; + + return true; +} + +static uint64_t get_cycle_counter_frequency() { + // In a FEX-Emu environment, the cycle counter frequency is exposed in CPUID leaf 0x15. + // This matches x86 Intel semantics on latest CPUs, see their documentation for the exact implementation details. + uint32_t eax, ebx, ecx, edx; + cpuid(0, eax, ebx, ecx, edx); + if (eax < 0x15) return 0; + + cpuid(0x15U, eax, ebx, ecx, edx); + + // Ignore scale in ebx + // Baseline clock is provided in ecx. + return ecx; +} +#endif + +static void destroy_shm() { + munmap(g_stats.shm_base, g_stats.shm_size); + close(g_stats.shm_fd); + g_stats.shm_fd = -1; + g_stats.shm_size = 0; + g_stats.shm_base = nullptr; + g_stats.head = nullptr; + g_stats.stats = nullptr; + g_stats.sampled_stats.clear(); +} + +static void init_shm(int pid) { + if (g_stats.shm_fd != -1) { + // Destroy first if the FD changed. + destroy_shm(); + } + + // Initialize global hardware stats. + g_stats.cycle_counter_frequency = get_cycle_counter_frequency(); + g_stats.hardware_concurrency = std::thread::hardware_concurrency(); + g_stats.page_size = sysconf(_SC_PAGESIZE); + if (g_stats.page_size <= 0) g_stats.page_size = 4096; + + // Try and open a FEX stats file that relates to the PID in focus. + // If this fails then it is non-fatal, just means FEX isn't creating stats for that process. + std::string f = "fex-"; + f += std::to_string(pid); + f += "-stats"; + int fd {-1}; + struct stat buf{}; + uint64_t shm_size{}; + void* shm_base{MAP_FAILED}; + fex_stats_header *header{}; + + fd = shm_open(f.c_str(), O_RDONLY, 0); + if (fd == -1) { + goto err; + } + + if (fstat(fd, &buf) == -1) { + goto err; + } + + if (buf.st_size < static_cast(sizeof(fex_stats_header))) { + goto err; + } + + shm_size = ALIGN_POT(buf.st_size, g_stats.page_size); + + shm_base = mmap(nullptr, shm_size, PROT_READ, MAP_SHARED, fd, 0); + if (shm_base == MAP_FAILED) { + goto err; + } + + memory_barrier(); + header = reinterpret_cast(shm_base); + if (header->Version != FEX_STATS_VERSION) { + // If the version read doesn't match the implementation then we can't read. + fex_status = "version mismatch"; + goto err; + } + + // Cache off the information, we have successfully loaded the stats SHM. + g_stats.pid = pid; + g_stats.shm_fd = fd; + g_stats.shm_size = shm_size; + g_stats.shm_base = shm_base; + g_stats.head = header; + g_stats.stats = offset_to_stats(shm_base, &header->Head); + g_stats.previous_sample_period = std::chrono::steady_clock::now(); + g_stats.first_sample = true; + g_stats.sampled_stats.clear(); + fex_version = std::string {header->fex_version, strnlen(header->fex_version, sizeof(header->fex_version))}; + sigbus_counts.account_time(g_stats.previous_sample_period); + smc_counts.account_time(g_stats.previous_sample_period); + softfloat_counts.account_time(g_stats.previous_sample_period); + std::fill(fex_load_data.begin(), fex_load_data.end(), 0.0); + fex_max_thread_loads.clear(); + return; +err: + if (fd != -1) { + close(fd); + } + + if (shm_base != MAP_FAILED) { + munmap(shm_base, shm_size); + } +} + +static void check_shm_update_necessary() { + // If the SHM has changed size then we need to unmap and remap with the new size. + // Required since FEX may grow the SHM region to fit more threads, although previous thread data won't be invalidated. + memory_barrier(); + auto new_shm_size = ALIGN_POT(__atomic_load_n(&g_stats.head->Size, __ATOMIC_RELAXED), g_stats.page_size); + if (g_stats.shm_size == new_shm_size) { + return; + } + + munmap(g_stats.shm_base, g_stats.shm_size); + g_stats.shm_size = new_shm_size; + g_stats.shm_base = mmap(nullptr, new_shm_size, PROT_READ, MAP_SHARED, g_stats.shm_fd, 0); + g_stats.head = reinterpret_cast(g_stats.shm_base); + g_stats.stats = offset_to_stats(g_stats.shm_base, &g_stats.head->Head); +} + +bool is_fex_pid_found() { + return g_stats.pid != -1; +} + +void update_fex_stats() { + auto gs_pid = HUDElements.g_gamescopePid > 0 ? HUDElements.g_gamescopePid : ::getpid(); + if (gs_pid < 1) { + // No PID yet. + return; + } + + if (g_stats.pid != gs_pid) { + // PID changed, likely gamescope changed focus. + init_shm(gs_pid); + } + + if (g_stats.pid == -1) { + // PID became invalid. Likely due to error reading SHM. + return; + } + + // Check if SHM changed first. + check_shm_update_necessary(); + + // Before reading stats, a memory barrier needs to be done. + // This ensures visibility of the stats before reading, as they use weak atomics for writes. + memory_barrier(); + + // Sample the stats and store them off. + // Sampling these quickly lets us become a loose sampling profiler, since FEX updates these constantly. + uint32_t *header_offset_atomic = &g_stats.head->Head; + auto now = std::chrono::steady_clock::now(); + for (auto header_offset = __atomic_load_n(header_offset_atomic, __ATOMIC_RELAXED); + header_offset != 0; + header_offset = __atomic_load_n(header_offset_atomic, __ATOMIC_RELAXED)) { + if (header_offset >= g_stats.shm_size) break; + + fex_thread_stats *stat = offset_to_stats(g_stats.shm_base, header_offset); + const auto TID = __atomic_load_n(&stat->TID, __ATOMIC_RELAXED); + if (TID != 0) { + fex_stats::retained_stats &sampled_stats = g_stats.sampled_stats[TID]; + memcpy(&sampled_stats.current, stat, sizeof(fex_thread_stats)); + sampled_stats.last_seen = now; + } + + header_offset_atomic = &stat->Next; + } + + if (g_stats.first_sample) { + // Skip first sample, it'll look crazy. + g_stats.first_sample = false; + fex_status = "Accumulating"; + return; + } + + // Update the status with the FEX version. + fex_status = fex_version.c_str(); + + // Accumulate full JIT time + uint64_t total_jit_time{}; + uint64_t total_sigbus_events{}; + uint64_t total_smc_events{}; + uint64_t total_softfloat_events{}; + size_t threads_sampled{}; +#define accumulate(dest, name) dest += it->second.current.name - it->second.previous.name + std::vector hottest_threads{}; + for (auto it = g_stats.sampled_stats.begin(); it != g_stats.sampled_stats.end();) { + ++threads_sampled; + uint64_t total_time{}; + accumulate(total_time, AccumulatedJITTime); + accumulate(total_time, AccumulatedSignalTime); + accumulate(total_sigbus_events, AccumulatedSIGBUSCount); + accumulate(total_smc_events, AccumulatedSMCEvents); + accumulate(total_softfloat_events, AccumulatedFloatFallbackCount); + + memcpy(&it->second.previous, &it->second.current, sizeof(fex_thread_stats)); + + total_jit_time += total_time; + if ((now - it->second.last_seen) >= std::chrono::seconds(MAXIMUM_THREAD_WAIT_TIME)) { + it = g_stats.sampled_stats.erase(it); + continue; + } + hottest_threads.emplace_back(total_time); + ++it; + } + + std::sort(hottest_threads.begin(), hottest_threads.end(), std::greater()); + + // Calculate loads based on the sample period that occurred. + // FEX-Emu only counts cycles for the amount of time, so we need to calculate load based on the number of cycles that the sample period has. + const auto sample_period = now - g_stats.previous_sample_period; + + const double NanosecondsInSeconds = 1'000'000'000.0; + const double SamplePeriodNanoseconds = std::chrono::duration_cast(sample_period).count(); + const double MaximumCyclesInSecond = (double)g_stats.cycle_counter_frequency; + const double MaximumCyclesInSamplePeriod = MaximumCyclesInSecond * (SamplePeriodNanoseconds / NanosecondsInSeconds); + const double MaximumCoresThreadsPossible = std::min(g_stats.hardware_concurrency, threads_sampled); + + // Calculate the percentage of JIT time that could possibly exist inside the sample period. + double fex_load = ((double)total_jit_time / (MaximumCyclesInSamplePeriod * MaximumCoresThreadsPossible)) * 100.0; + size_t minimum_hot_threads = std::min(g_stats.hardware_concurrency, hottest_threads.size()); + // For the top thread-loads, we are only ever showing up to how many hardware threads are available. + fex_max_thread_loads.resize(minimum_hot_threads); + for (size_t i = 0; i < minimum_hot_threads; ++i) { + fex_max_thread_loads[i] = ((double)hottest_threads[i] / MaximumCyclesInSamplePeriod) * 100.0; + } + + sigbus_counts.account(total_sigbus_events, now); + smc_counts.account(total_smc_events, now); + softfloat_counts.account(total_softfloat_events, now); + + g_stats.previous_sample_period = now; + + fex_load_data.push_back(fex_load); + fex_load_data.erase(fex_load_data.begin()); +} +} diff --git a/src/fex.h b/src/fex.h new file mode 100644 index 0000000000..213f284ffd --- /dev/null +++ b/src/fex.h @@ -0,0 +1,57 @@ +#pragma once +#ifdef HAVE_FEX +#ifndef MANGOHUD_FEX_H +#define MANGOHUD_FEX_H +#include +#include +#include + +namespace fex { +bool is_fex_capable(); +bool is_fex_pid_found(); +const char* get_fex_app_type(); + +extern const char* fex_status; +extern std::string fex_version; + +extern std::vector fex_load_data; + +struct fex_event_counts { + public: + void account(uint64_t total, std::chrono::time_point now) { + count = total; + last_sample_count += total; + + const auto diff = now - last_chrono; + if (diff >= std::chrono::seconds(1)) { + // Calculate the average over the last second. + const double NanosecondsInSeconds = 1'000'000'000.0; + const auto diff_ns = std::chrono::duration_cast(diff).count(); + const double Percentage = (double)diff_ns / NanosecondsInSeconds; + average_sec = double(last_sample_count) * Percentage; + last_sample_count = 0; + last_chrono = now; + } + } + + void account_time(std::chrono::time_point now) { + last_chrono = now; + } + uint64_t Count() const { return count; } + double Avg() const { return average_sec; } + private: + uint64_t count{}; + uint64_t last_sample_count{}; + double average_sec{}; + std::chrono::time_point last_chrono{}; +}; +extern fex_event_counts sigbus_counts; +extern fex_event_counts smc_counts; +extern fex_event_counts softfloat_counts; + +extern std::vector fex_max_thread_loads; +void update_fex_stats(); +} + +#endif //MANGOHUD_FEX_H +#endif //HAVE_FEX diff --git a/src/hud_elements.cpp b/src/hud_elements.cpp index 47bc807739..e8209088e3 100644 --- a/src/hud_elements.cpp +++ b/src/hud_elements.cpp @@ -25,6 +25,7 @@ #endif #include "amdgpu.h" #include "fps_metrics.h" +#include "fex.h" #define CHAR_CELSIUS "\xe2\x84\x83" #define CHAR_FAHRENHEIT "\xe2\x84\x89" @@ -1534,6 +1535,118 @@ void HudElements::_display_session() { ImGui::PopFont(); } +void HudElements::fex_stats() +{ +#ifdef HAVE_FEX + if (!HUDElements.params->fex_stats.enabled) { + return; + } + + ImGui::PushFont(HUDElements.sw_stats->font1); + + if (HUDElements.params->fex_stats.status) { + ImguiNextColumnFirstItem(); + HUDElements.TextColored(HUDElements.colors.engine, "%s", "FEX"); + ImguiNextColumnOrNewRow(); + ImGui::Dummy(ImVec2(0.0f, real_font_size.y)); + ImguiNextColumnOrNewRow(); + right_aligned_text(HUDElements.colors.text, HUDElements.ralign_width, "%s", fex::fex_status); + } + + if (!fex::is_fex_pid_found()) { + ImGui::PopFont(); + return; + } + + if (HUDElements.params->fex_stats.app_type) { + ImguiNextColumnFirstItem(); + HUDElements.TextColored(HUDElements.colors.engine, "%s", "Type"); + ImguiNextColumnOrNewRow(); + ImGui::Dummy(ImVec2(0.0f, real_font_size.y)); + ImguiNextColumnOrNewRow(); + right_aligned_text(HUDElements.colors.text, HUDElements.ralign_width, "%s", fex::get_fex_app_type()); + } + + if (HUDElements.params->fex_stats.sigbus_counts) { + ImguiNextColumnFirstItem(); + HUDElements.TextColored(HUDElements.colors.engine, "%s", "SIGBUS"); + ImguiNextColumnOrNewRow(); + ImGui::Dummy(ImVec2(0.0f, real_font_size.y)); + ImguiNextColumnOrNewRow(); + right_aligned_text(HUDElements.colors.text, HUDElements.ralign_width, "%d - %.0f avg/s", fex::sigbus_counts.Count(), fex::sigbus_counts.Avg()); + } + + if (HUDElements.params->fex_stats.smc_counts) { + ImguiNextColumnFirstItem(); + HUDElements.TextColored(HUDElements.colors.engine, "%s", "SMC"); + ImguiNextColumnOrNewRow(); + ImGui::Dummy(ImVec2(0.0f, real_font_size.y)); + ImguiNextColumnOrNewRow(); + right_aligned_text(HUDElements.colors.text, HUDElements.ralign_width, "%d - %.0f avg/s", fex::smc_counts.Count(), fex::smc_counts.Avg()); + } + + if (HUDElements.params->fex_stats.softfloat_counts) { + ImguiNextColumnFirstItem(); + HUDElements.TextColored(HUDElements.colors.engine, "%s", "Softfloat"); + ImguiNextColumnOrNewRow(); + ImGui::Dummy(ImVec2(0.0f, real_font_size.y)); + ImguiNextColumnOrNewRow(); + right_aligned_text(HUDElements.colors.text, HUDElements.ralign_width, "%d - %.0f avg/s", fex::softfloat_counts.Count(), fex::softfloat_counts.Avg()); + } + + ImGui::PopFont(); + + ImguiNextColumnFirstItem(); + ImGui::Dummy(ImVec2(0.0f, real_font_size.y)); + + if (HUDElements.params->fex_stats.hot_threads) { + // Draw hot threads + bool Warning = false; + ImVec4 WarningColor; + + for (auto it : fex::fex_max_thread_loads){ + if (it >= 75.0) { + Warning = true; + WarningColor = ImVec4(1.0f, 0.0f, 0.0f, 1.0f); + } + else if (it >= 50.0) { + Warning = true; + WarningColor = ImVec4(1.0f, 1.0f, 0.0f, 1.0f); + } + } + + ImGui::PushFont(HUDElements.sw_stats->font1); + HUDElements.TextColored(HUDElements.colors.engine, "%s", "FEX JIT top loaded threads"); + ImGui::PopFont(); + + ImGui::PushStyleColor(ImGuiCol_FrameBg, ImVec4(0.0f, 0.0f, 0.0f, 0.0f)); + if (Warning) { + ImGui::PushStyleColor(ImGuiCol_PlotHistogram, WarningColor); + } + + ImGui::PlotHistogram("", fex::fex_max_thread_loads.data(), + fex::fex_max_thread_loads.size(), 0, + NULL, 0, 100, + ImVec2(ImGui::GetWindowContentRegionWidth(), 50)); + ImGui::PopStyleColor(1 + (Warning ? 1 : 0)); + } + + if (HUDElements.params->fex_stats.jit_load) { + ImGui::PushFont(HUDElements.sw_stats->font1); + HUDElements.TextColored(HUDElements.colors.engine, "%s", "FEX JIT Load"); + ImGui::PopFont(); + + ImGui::PushStyleColor(ImGuiCol_FrameBg, ImVec4(0.0f, 0.0f, 0.0f, 0.0f)); + + ImGui::PlotLines("", fex::fex_load_data.data(), + fex::fex_load_data.size(), 0, + NULL, 0, 100, + ImVec2(ImGui::GetWindowContentRegionWidth(), 50)); + ImGui::PopStyleColor(1); + } +#endif //HAVE_FEX +} + void HudElements::sort_elements(const std::pair& option) { const auto& param = option.first; const auto& value = option.second; @@ -1582,8 +1695,8 @@ void HudElements::sort_elements(const std::pair& optio {"winesync", {winesync}}, {"present_mode", {present_mode}}, {"network", {network}}, - {"display_server", {_display_session}} - + {"display_server", {_display_session}}, + {"fex_stats", {fex_stats}}, }; auto check_param = display_params.find(param); @@ -1712,6 +1825,8 @@ void HudElements::legacy_elements(){ ordered_functions.push_back({refresh_rate, "refresh_rate", value}); if (params->enabled[OVERLAY_PARAM_ENABLED_display_server]) ordered_functions.push_back({_display_session, "display_session", value}); + if (params->fex_stats.enabled) + ordered_functions.push_back({fex_stats, "fex_stats", value}); } void HudElements::update_exec(){ diff --git a/src/hud_elements.h b/src/hud_elements.h index 6da8df1aa1..2fa512cbb4 100644 --- a/src/hud_elements.h +++ b/src/hud_elements.h @@ -117,6 +117,7 @@ class HudElements{ static void present_mode(); static void network(); static void _display_session(); + static void fex_stats(); void convert_colors(const struct overlay_params& params); void convert_colors(bool do_conv, const struct overlay_params& params); diff --git a/src/meson.build b/src/meson.build index a05207910a..5713539bdf 100644 --- a/src/meson.build +++ b/src/meson.build @@ -93,9 +93,16 @@ if is_unixy 'control.cpp', 'device.cpp', 'net.cpp', - 'shell.cpp' + 'shell.cpp', ) + if get_option('with_fex') + pre_args += '-DHAVE_FEX' + vklayer_files += files( + 'fex.cpp', + ) + endif + opengl_files = files( 'gl/glad.c', 'gl/gl_renderer.cpp', diff --git a/src/overlay.cpp b/src/overlay.cpp index c9aa4f802a..a2b741fd10 100644 --- a/src/overlay.cpp +++ b/src/overlay.cpp @@ -25,6 +25,7 @@ #include "amdgpu.h" #include "fps_metrics.h" #include "net.h" +#include "fex.h" #ifdef __linux__ #include @@ -247,6 +248,9 @@ void update_hud_info_with_frametime(struct swapchain_stats& sw_stats, const stru #ifdef __linux__ if (gpus) gpus->update_throttling(); +#endif +#ifdef HAVE_FEX + fex::update_fex_stats(); #endif frametime = frametime_ms; fps = double(1000 / frametime_ms); diff --git a/src/overlay_params.cpp b/src/overlay_params.cpp index cf4446a87f..7863e2c5ef 100644 --- a/src/overlay_params.cpp +++ b/src/overlay_params.cpp @@ -29,6 +29,7 @@ #include "blacklist.h" #include "mesa/util/os_socket.h" #include "file_utils.h" +#include "fex.h" #if defined(HAVE_X11) || defined(HAVE_WAYLAND) #include @@ -418,6 +419,43 @@ parse_fps_metrics(const char *str){ return metrics; } +static overlay_params::fex_stats_options +parse_fex_stats(const char *str) { + overlay_params::fex_stats_options options { +#ifdef HAVE_FEX + .enabled = fex::is_fex_capable(), +#endif + }; + + auto tokens = str_tokenize(str); +#define option_check(str, option) do { \ + if (token == #str) options.option = true; \ + } while (0) + + // If we have any tokens then default disable. + if (!tokens.empty()) { + options.status = false; + options.app_type = false; + options.hot_threads = false; + options.jit_load = false; + options.sigbus_counts = false; + options.smc_counts = false; + options.softfloat_counts = false; + } + + for (auto& token : tokens) { + option_check(status, status); + option_check(apptype, app_type); + option_check(hotthreads, hot_threads); + option_check(jitload, jit_load); + option_check(sigbus, sigbus_counts); + option_check(smc, smc_counts); + option_check(softfloat, softfloat_counts); + } + + return options; +} + #define parse_width(s) parse_unsigned(s) #define parse_height(s) parse_unsigned(s) #define parse_vsync(s) parse_unsigned(s) diff --git a/src/overlay_params.h b/src/overlay_params.h index 5ad56a8d5b..b6c59ea69a 100644 --- a/src/overlay_params.h +++ b/src/overlay_params.h @@ -200,6 +200,7 @@ typedef unsigned long KeySym; OVERLAY_PARAM_CUSTOM(fps_metrics) \ OVERLAY_PARAM_CUSTOM(network) \ OVERLAY_PARAM_CUSTOM(gpu_list) \ + OVERLAY_PARAM_CUSTOM(fex_stats) \ enum overlay_param_position { LAYER_POSITION_TOP_LEFT, @@ -327,6 +328,24 @@ struct overlay_params { std::vector fps_metrics; std::vector network; std::vector gpu_list; + + struct fex_stats_options { + bool enabled {false}; + + // Enabled Texts + bool status {true}; + bool app_type {true}; + + // Graphs + bool hot_threads {true}; + bool jit_load {true}; + + // Counts + bool sigbus_counts {true}; + bool smc_counts {true}; + bool softfloat_counts {true}; + }; + fex_stats_options fex_stats{}; }; const extern char *overlay_param_names[];