From 9b2ed6910ef456057a75316ae98e07cecbfc145c Mon Sep 17 00:00:00 2001 From: icex2 Date: Sun, 8 Oct 2023 15:03:53 +0200 Subject: [PATCH] feat: ddrio-async wrapper/shim driver for async IO Wrapper/shim library to drive another ddrio in a dedicated IO thread. Depending on the other ddrio backend used, this can significantly improve performance while staying compatible to the existing ddrio API interface This turned out to be a good solution to solve performance problems when running MAME with ddrio-p3io that is (currently) implemented with synchronous IO calls that are very costly: ~12 ms for a write over the ACIO protocol, ~4 ms for a read using an IOCTL. As this already adds up to nearly a full frame (60 fps) regarding latency, there isn't a lot of time left to do other stuff in a synchronous update loop. MAME's performance was unstable and dropped all the time below 100%. The result was a choppy gameplay experience. Combining ddrio-async with ddrio-p3io, the combined backend is able to drive inputs/outputs at a rate of ~250hz = ~4 updates per frame. This results in an average input latency of ~4 ms which is as good as it can get with the p3io hardware's performance limitations that I measured (see the 4 ms for the IOCTL mentioned above). This is more than good enough as as update frequency of the 573 hardware is slightly less than that (I got told something ~180 hz?). tl;dr: Gameplay on MAME is great, smooth frame rate, IO feels amazing and responsive. --- Module.mk | 3 + src/main/ddrio-async/Module.mk | 9 + src/main/ddrio-async/ddrio-async.def | 11 ++ src/main/ddrio-async/ddrio.c | 268 +++++++++++++++++++++++++++ 4 files changed, 291 insertions(+) create mode 100644 src/main/ddrio-async/Module.mk create mode 100644 src/main/ddrio-async/ddrio-async.def create mode 100644 src/main/ddrio-async/ddrio.c diff --git a/Module.mk b/Module.mk index 9480207..4178a42 100644 --- a/Module.mk +++ b/Module.mk @@ -104,6 +104,7 @@ include src/main/d3d9exhook/Module.mk include src/main/ddrhook-util/Module.mk include src/main/ddrhook1/Module.mk include src/main/ddrhook2/Module.mk +include src/main/ddrio-async/Module.mk include src/main/ddrio-p3io/Module.mk include src/main/ddrio-mm/Module.mk include src/main/ddrio-smx/Module.mk @@ -730,6 +731,7 @@ $(zipdir)/ddr-16-x64.zip: \ $(V)zip -j $@ $^ $(zipdir)/ddr-hwio-x86.zip: \ + build/bin/indep-32/ddrio-async.dll \ build/bin/indep-32/ddrio-p3io.dll \ build/bin/indep-32/ddrio-mm.dll \ build/bin/indep-32/ddrio-smx.dll \ @@ -741,6 +743,7 @@ $(zipdir)/ddr-hwio-x86.zip: \ $(V)zip -j $@ $^ $(zipdir)/ddr-hwio-x64.zip: \ + build/bin/indep-64/ddrio-async.dll \ build/bin/indep-64/ddrio-p3io.dll \ build/bin/indep-64/ddrio-mm.dll \ build/bin/indep-64/ddrio-smx.dll \ diff --git a/src/main/ddrio-async/Module.mk b/src/main/ddrio-async/Module.mk new file mode 100644 index 0000000..62006cf --- /dev/null +++ b/src/main/ddrio-async/Module.mk @@ -0,0 +1,9 @@ +dlls += ddrio-async + +ldflags_ddrio-async:= \ + +libs_ddrio-async := \ + util \ + +src_ddrio-async := \ + ddrio.c \ diff --git a/src/main/ddrio-async/ddrio-async.def b/src/main/ddrio-async/ddrio-async.def new file mode 100644 index 0000000..9a399dd --- /dev/null +++ b/src/main/ddrio-async/ddrio-async.def @@ -0,0 +1,11 @@ +LIBRARY ddrio-async + +EXPORTS + ddr_io_set_loggers + ddr_io_fini + ddr_io_init + ddr_io_read_pad + ddr_io_set_lights_extio + ddr_io_set_lights_p3io + ddr_io_set_lights_hdxs_panel + ddr_io_set_lights_hdxs_rgb diff --git a/src/main/ddrio-async/ddrio.c b/src/main/ddrio-async/ddrio.c new file mode 100644 index 0000000..9b857fd --- /dev/null +++ b/src/main/ddrio-async/ddrio.c @@ -0,0 +1,268 @@ +#define LOG_MODULE "ddrio-async" + +#include + +#include +#include +#include +#include + +#include + +#include "bemanitools/ddrio.h" + +#include "util/log.h" +#include "util/thread.h" +#include "util/time.h" + +typedef void (*ddr_io_set_loggers_t)( + log_formatter_t misc, + log_formatter_t info, + log_formatter_t warning, + log_formatter_t fatal); +typedef bool (*ddr_io_init_t)( + thread_create_t thread_create, + thread_join_t thread_join, + thread_destroy_t thread_destroy); +typedef uint32_t (*ddr_io_read_pad_t)(void); +typedef void (*ddr_io_set_lights_extio_t)(uint32_t extio_lights); +typedef void (*ddr_io_set_lights_p3io_t)(uint32_t p3io_lights); +typedef void (*ddr_io_set_lights_hdxs_panel_t)(uint32_t hdxs_lights); +typedef void (*ddr_io_set_lights_hdxs_rgb_t)(uint8_t idx, uint8_t r, uint8_t g, uint8_t b); +typedef void (*ddr_io_fini_t)(void); + +static HMODULE _child_ddr_io_module; + +static ddr_io_set_loggers_t _child_ddr_io_set_loggers; +static ddr_io_init_t _child_ddr_io_init; +static ddr_io_read_pad_t _child_ddr_io_read_pad; +static ddr_io_set_lights_extio_t _child_ddr_io_set_lights_extio; +static ddr_io_set_lights_p3io_t _child_ddr_io_set_lights_p3io; +static ddr_io_set_lights_hdxs_panel_t _child_ddr_io_set_lights_hdxs_panel; +static ddr_io_set_lights_hdxs_rgb_t _child_ddr_io_set_lights_hdxs_rgb; +static ddr_io_fini_t _child_ddr_io_fini; + +static log_formatter_t _log_formatter_misc; +static log_formatter_t _log_formatter_info; +static log_formatter_t _log_formatter_warning; +static log_formatter_t _log_formatter_fatal; + +static _Atomic(bool) _io_thread_proc_loop; +static _Atomic(bool) _io_thread_proc_running; + +static _Atomic(uint32_t) _child_ddr_io_data_pad; +static _Atomic(uint32_t) _child_ddr_io_data_extio_lights; +static _Atomic(uint32_t) _child_ddr_io_data_p3io_lights; + +static int _io_thread_proc(void *ctx) +{ + uint64_t time_start; + uint64_t time_end; + uint64_t loop_counter; + uint64_t total_time; + + uint32_t prev_child_ddr_io_data_extio_lights; + uint32_t prev_child_ddr_io_data_p3io_lights; + + uint32_t local_tmp; + + atomic_store_explicit( + &_io_thread_proc_running, + true, + memory_order_seq_cst); + + log_info("IO thread running"); + + prev_child_ddr_io_data_extio_lights = atomic_load_explicit( + &_child_ddr_io_data_extio_lights, memory_order_seq_cst); + prev_child_ddr_io_data_p3io_lights = atomic_load_explicit( + &_child_ddr_io_data_p3io_lights, memory_order_seq_cst); + + time_start = time_get_counter(); + loop_counter = 0; + + while (atomic_load_explicit(&_io_thread_proc_loop, memory_order_seq_cst)) { + local_tmp = _child_ddr_io_read_pad(); + + atomic_store_explicit( + &_child_ddr_io_data_pad, + local_tmp, + memory_order_relaxed); + + // Only update outputs when they change gives this loop a major performance boost + // The write calls on a p3io for the outputs are very expensive (~12 ms) as they + // are executed over the ACIO protocol compared to only the input read + // calls (~4 ms) which have a dedicated IOCTL call/endpoint + + local_tmp = atomic_load_explicit( + &_child_ddr_io_data_extio_lights, + memory_order_relaxed); + + if (local_tmp != prev_child_ddr_io_data_extio_lights) { + _child_ddr_io_set_lights_extio(local_tmp); + prev_child_ddr_io_data_extio_lights = local_tmp; + } + + local_tmp = atomic_load_explicit( + &_child_ddr_io_data_p3io_lights, + memory_order_relaxed); + + if (local_tmp != prev_child_ddr_io_data_p3io_lights) { + _child_ddr_io_set_lights_p3io(local_tmp); + prev_child_ddr_io_data_p3io_lights = local_tmp; + } + + // Don't hog the CPU + SwitchToThread(); + + loop_counter++; + } + + time_end = time_get_counter(); + total_time = time_get_elapsed_us(time_end - time_start); + + log_info("IO thread performance: total iterations %lld, avg. loop cycle time %f us", + loop_counter, ((double) total_time) / loop_counter); + + atomic_store_explicit( + &_io_thread_proc_running, + false, + memory_order_seq_cst); + + log_info("IO thread shut down"); + + return 0; +} + +static void* _load_function(HMODULE module, const char* name) +{ + void* ptr; + + ptr = GetProcAddress(module, name); + + if (ptr == NULL) { + log_fatal("Could not find function %s in ddr3io child library", name); + } + + return ptr; +} + +void ddr_io_set_loggers( + log_formatter_t misc, + log_formatter_t info, + log_formatter_t warning, + log_formatter_t fatal) +{ + _log_formatter_misc = misc; + _log_formatter_info = info; + _log_formatter_warning = warning; + _log_formatter_fatal = fatal; + + log_to_external(misc, info, warning, fatal); +} + +bool ddr_io_init( + thread_create_t thread_create, + thread_join_t thread_join, + thread_destroy_t thread_destroy) +{ + log_info("Loading ddrio-async-child.dll as child ddrio library..."); + + _child_ddr_io_module = LoadLibraryA("ddrio-async-child.dll"); + + if (_child_ddr_io_module == NULL) { + log_warning("Loading ddrio-async-child.dll failed"); + return false; + } + + _child_ddr_io_set_loggers = _load_function(_child_ddr_io_module, "ddr_io_set_loggers"); + _child_ddr_io_init = _load_function(_child_ddr_io_module, "ddr_io_init"); + _child_ddr_io_read_pad = _load_function(_child_ddr_io_module, "ddr_io_read_pad"); + _child_ddr_io_set_lights_extio = _load_function(_child_ddr_io_module, "ddr_io_set_lights_extio"); + _child_ddr_io_set_lights_p3io = _load_function(_child_ddr_io_module, "ddr_io_set_lights_p3io"); + _child_ddr_io_set_lights_hdxs_panel = _load_function(_child_ddr_io_module, "ddr_io_set_lights_hdxs_panel"); + _child_ddr_io_set_lights_hdxs_rgb = _load_function(_child_ddr_io_module, "ddr_io_set_lights_hdxs_rgb"); + _child_ddr_io_fini = _load_function(_child_ddr_io_module, "ddr_io_fini"); + + _child_ddr_io_set_loggers( + _log_formatter_misc, + _log_formatter_info, + _log_formatter_warning, + _log_formatter_fatal); + + log_info("Calling child ddr_io_init..."); + + if (!_child_ddr_io_init(thread_create, thread_join, thread_destroy)) { + log_warning("Child ddr_io_init failed"); + FreeLibrary(_child_ddr_io_module); + + return false; + } + + atomic_store_explicit( + &_io_thread_proc_loop, + true, + memory_order_seq_cst); + + if (!thread_create(_io_thread_proc, NULL, 16384, 0)) { + log_warning("Creating IO thread failed"); + + _child_ddr_io_fini(); + FreeLibrary(_child_ddr_io_module); + + return false; + } + + return true; +} + +uint32_t ddr_io_read_pad(void) +{ + return atomic_load_explicit(&_child_ddr_io_data_pad, memory_order_relaxed); +} + +void ddr_io_set_lights_extio(uint32_t extio_lights) +{ + atomic_store_explicit( + &_child_ddr_io_data_extio_lights, + extio_lights, + memory_order_relaxed); +} + +void ddr_io_set_lights_p3io(uint32_t p3io_lights) +{ + atomic_store_explicit( + &_child_ddr_io_data_p3io_lights, + p3io_lights, + memory_order_relaxed); +} + +void ddr_io_set_lights_hdxs_panel(uint32_t lights) +{ + // Not implemented for now +} + +void ddr_io_set_lights_hdxs_rgb(uint8_t idx, uint8_t r, uint8_t g, uint8_t b) +{ + // Not implemented for now +} + +void ddr_io_fini(void) +{ + atomic_store_explicit( + &_io_thread_proc_loop, + false, + memory_order_seq_cst); + + log_info("Shutting down IO thread and waiting for it to finish..."); + + while (atomic_load_explicit(&_io_thread_proc_running, memory_order_seq_cst)) { + Sleep(1); + } + + log_info("IO thread finished"); + + _child_ddr_io_fini(); + + FreeLibrary(_child_ddr_io_module); +}