From 7eeb1964d07346a7ba111473b37af608218a329f Mon Sep 17 00:00:00 2001 From: Sebastian Pipping Date: Wed, 1 Mar 2023 02:07:40 +0100 Subject: [PATCH] Render from main thread rather than input plugin thread by default Motivation: - Some vis plugins turned out to not be prepared to receive window messages from multiple threads concurrently. - When then input plugin thread drives the vis plugin, visdriver has no control over vis plugin render rate. Assumptions: - Vis plugins want to present the latest data available. The second latest data may be better than no data, but the latest data should win over older data wherever possible, forwarding all data is not the goal. - Allowing to both read and write a block of vis data at the same time could make the vis plugin render inconsistent data, so that should be avoided. Access to each bucket's state needs to be atomic. - There is at most one reader (copying to the vis plugin) and at most one writer (copying from the input plugin) at any point in time. - If one of reader and writer are ever way slower than the other part, two buckets will not be enough. E.g. if bucket 0 is locked by the reader and the writer just finished writing to bucket 1 but hits again while the reader is still blocking bucket 1 the writer would then need to (a) not write the most recent data at all or (b) overwrite bucket 2 and force the reader to serve data from bucket 1 again that is less recent than ideal. - Use of pessimistic locking would not be ideal in the input plugin thread. --- src/config.c | 8 ++ src/config.h | 3 + src/main.c | 6 +- src/visualization.c | 186 ++++++++++++++++++++++++++++++++++++++++++-- src/visualization.h | 4 + 5 files changed, 200 insertions(+), 7 deletions(-) diff --git a/src/config.c b/src/config.c index d6f5c93..e65af99 100644 --- a/src/config.c +++ b/src/config.c @@ -100,6 +100,14 @@ void parse_command_line(visdriver_config_t *config, int argc, OPT_STRING('W', "vis", &config->vis_plugin_filename, "vis plug-in to use", NULL, 0, 0), + OPT_GROUP("Advanced configuration:"), + OPT_BOOLEAN(0, "render-from-input-plugin-thread", + &config->render_from_input_plugin_thread, + "render from input plugin thread" + " (reduces lag but will crash with some plugins)" + " [default: render from main thread]", + NULL, 0, OPT_NONEG), + OPT_END(), }; diff --git a/src/config.h b/src/config.h index 35e2480..d2993a2 100644 --- a/src/config.h +++ b/src/config.h @@ -18,12 +18,15 @@ #ifndef CONFIG_H #define CONFIG_H +#include + typedef struct _visdriver_config_t { const char *input_plugin_filename; const char *output_plugin_filename; const char *vis_plugin_filename; const char *const *tracks; int track_count; + bool render_from_input_plugin_thread; } visdriver_config_t; void parse_command_line(visdriver_config_t *config, int argc, diff --git a/src/main.c b/src/main.c index a4327a5..329d504 100644 --- a/src/main.c +++ b/src/main.c @@ -107,13 +107,15 @@ void self_identify() { } int main(int argc, char **argv) { - visdriver_config_t config = {NULL}; + visdriver_config_t config = {.render_from_input_plugin_thread = false}; parse_command_line(&config, argc, argv); // may exit log_auto_configure_indent(); self_identify(); + vis_configure(config.render_from_input_plugin_thread); + int current_track_index = -1; bool playing = false; @@ -229,6 +231,8 @@ int main(int argc, char **argv) { } } + vis_render(); + sleep_milliseconds(1); // to avoid 100% CPU usage } diff --git a/src/visualization.c b/src/visualization.c index cd1266b..4989ea4 100644 --- a/src/visualization.c +++ b/src/visualization.c @@ -21,7 +21,9 @@ #define _GNU_SOURCE // for M_PI from math.h #endif +#include #include +#include #include #include // memset @@ -32,9 +34,25 @@ #define VIS_FRAMES 576 // dictated by vis.h +typedef enum _vis_frame_state_t { + NOT_LOCKED = '_', + LOCKED_BY_READER = 'R', + LOCKED_BY_WRITER = 'W', +} vis_frame_state_t; + +typedef struct _vis_bucket_t { + volatile LONG *p_state; // only a pointer because InterlockedCompareExchange + // needs aligned memory + unsigned char spectrumData[2][VIS_FRAMES]; + unsigned char waveformData[2][VIS_FRAMES]; +} vis_bucket_t; + winampVisModule *g_active_vis_module = NULL; static kiss_fftr_cfg g_kiss_fft_cfg = NULL; static int16_t g_prev_interleaved[VIS_FRAMES * 2]; +static vis_bucket_t g_buckets[3] = {0}; +static volatile int g_last_bucket_written = -1; +static bool g_render_from_input_plugin_thread = false; static kiss_fft_scalar g_hann_factors[VIS_FRAMES * 2]; static kiss_fft_scalar hann_factor(size_t index, size_t samples); @@ -45,6 +63,15 @@ static void compute_hann_factors() { } } +static LONG compare_and_swap_long(LONG volatile *target, LONG expected, + LONG new_value) { +#if defined(_MSC_VER) + return InterlockedCompareExchange(target, new_value, expected); +#else + return __sync_val_compare_and_swap(target, expected, new_value); +#endif +} + void __cdecl SAVSAInit(int maxlatency_in_ms, int srate) { log_debug("Input plugin announced: Maximum latency %dms, sampling rate %d " "(SAVSAInit).", @@ -52,12 +79,26 @@ void __cdecl SAVSAInit(int maxlatency_in_ms, int srate) { g_active_vis_module->sRate = srate; memset(g_prev_interleaved, 0, sizeof(g_prev_interleaved)); + const size_t vis_bucket_count = sizeof(g_buckets) / sizeof(g_buckets[0]); + for (size_t index = 0; index < vis_bucket_count; index++) { + g_buckets[index].p_state = malloc(sizeof(LONG)); + if (g_buckets[index].p_state) { + *g_buckets[index].p_state = NOT_LOCKED; + } + } + g_kiss_fft_cfg = kiss_fftr_alloc(VIS_FRAMES * 2, 0, NULL, NULL); compute_hann_factors(); } void __cdecl SAVSADeInit() { + const size_t vis_bucket_count = sizeof(g_buckets) / sizeof(g_buckets[0]); + for (size_t index = 0; index < vis_bucket_count; index++) { + free((LONG *)g_buckets[index].p_state); + g_buckets[index].p_state = NULL; + } + kiss_fftr_free(g_kiss_fft_cfg); g_kiss_fft_cfg = NULL; } @@ -68,6 +109,106 @@ static kiss_fft_scalar hann_factor(size_t index, size_t samples) { (1 + cosf(2.0f * (float)M_PI * (index - samples / 2) / samples)); } +static int vis_lock_for_writing() { + const size_t count = sizeof(g_buckets) / sizeof(g_buckets[0]); + const size_t start_index = g_last_bucket_written + 1; + for (size_t distance = 0; distance < count; distance++) { + const size_t index = (start_index + distance) % count; + if (g_buckets[index].p_state && + compare_and_swap_long(g_buckets[index].p_state, NOT_LOCKED, + LOCKED_BY_WRITER) == NOT_LOCKED) { + return index; + } + } + return -1; +} + +static void vis_unlock_after_writing(int index) { +#if !defined(NDEBUG) + const int count = sizeof(g_buckets) / sizeof(g_buckets[0]); + assert(index <= count); +#endif + + *g_buckets[index].p_state = NOT_LOCKED; + g_last_bucket_written = index; +} + +static int vis_try_lock_for_reading() { + if (g_last_bucket_written < 0) { + return -1; + } + const size_t start_index = g_last_bucket_written; + const size_t count = sizeof(g_buckets) / sizeof(g_buckets[0]); + for (size_t distance = 0; distance < count; distance++) { + // NOTE: The reader tries most recent first and least recent last + // (i.e. backwards) + const size_t index = (start_index + count - distance) % count; + if (g_buckets[index].p_state && + compare_and_swap_long(g_buckets[index].p_state, NOT_LOCKED, + LOCKED_BY_READER) == NOT_LOCKED) { + return index; + } + } + return -1; +} + +static void vis_unlock_after_reading(int index) { + if (index < 0) { + return; + } + +#if !defined(NDEBUG) + const int count = sizeof(g_buckets) / sizeof(g_buckets[0]); + assert(index <= count); +#endif + + *g_buckets[index].p_state = NOT_LOCKED; +} + +static void vis_read(int index, unsigned char spectrumData[2][VIS_FRAMES], + unsigned char waveformData[2][VIS_FRAMES]) { + if (index < 0) { + memset(spectrumData, 0, sizeof(g_buckets[index].spectrumData)); + memset(waveformData, 0, sizeof(g_buckets[index].waveformData)); + return; + } + +#if !defined(NDEBUG) + const int count = sizeof(g_buckets) / sizeof(g_buckets[0]); + assert(index <= count); +#endif + + memcpy(spectrumData, g_buckets[index].spectrumData, + sizeof(g_buckets[index].spectrumData)); + memcpy(waveformData, g_buckets[index].waveformData, + sizeof(g_buckets[index].waveformData)); +} + +void vis_configure(bool render_from_input_plugin_thread) { + g_render_from_input_plugin_thread = render_from_input_plugin_thread; + if (render_from_input_plugin_thread) { + log_info("Will render from: input plugin thread."); + } else { + log_info("Will render from: main thread."); + } +} + +void vis_render() { + if (g_render_from_input_plugin_thread) { + return; + } + + const int index = vis_try_lock_for_reading(); + + vis_read(index, g_active_vis_module->spectrumData, + g_active_vis_module->waveformData); + + g_active_vis_module->nCh = 2; + g_active_vis_module->Render(g_active_vis_module); + + vis_unlock_after_reading(index); +} + void __cdecl SAAddPCMData(void *PCMData, int nch, int bps, int timestamp) { if (nch != 2 || bps != 16) { log_error("Need 16 bit stereo samples at the moment, " @@ -78,10 +219,39 @@ void __cdecl SAAddPCMData(void *PCMData, int nch, int bps, int timestamp) { const uint16_t *const interleaved = (uint16_t *)PCMData; + int bucket_index = -1; + unsigned char *target_spectrumData[2]; + unsigned char *target_waveformData[2]; + if (g_render_from_input_plugin_thread) { + target_spectrumData[0] = + (unsigned char *)&g_active_vis_module->spectrumData[0]; + target_spectrumData[1] = + (unsigned char *)&g_active_vis_module->spectrumData[1]; + target_waveformData[0] = + (unsigned char *)&g_active_vis_module->waveformData[0]; + target_waveformData[1] = + (unsigned char *)&g_active_vis_module->waveformData[1]; + } else { + bucket_index = vis_lock_for_writing(); + if (bucket_index < 0) { // not expected to ever happen + const int count = sizeof(g_buckets) / sizeof(g_buckets[0]); + log_error("All %d vis buckets are currently locked.", count); + return; + } + target_spectrumData[0] = + (unsigned char *)&g_buckets[bucket_index].spectrumData[0]; + target_spectrumData[1] = + (unsigned char *)&g_buckets[bucket_index].spectrumData[1]; + target_waveformData[0] = + (unsigned char *)&g_buckets[bucket_index].waveformData[0]; + target_waveformData[1] = + (unsigned char *)&g_buckets[bucket_index].waveformData[1]; + } + // For waveform: De-interleave and scale to 8bit for (int i = 0; i < VIS_FRAMES; i++) { - g_active_vis_module->waveformData[0][i] = interleaved[2 * i] / 256; - g_active_vis_module->waveformData[1][i] = interleaved[2 * i + 1] / 256; + target_waveformData[0][i] = interleaved[2 * i] / 256; + target_waveformData[1][i] = interleaved[2 * i + 1] / 256; } // For spectrum: De-interleave, do spectral analysis, and scale to 8bit @@ -125,15 +295,19 @@ void __cdecl SAAddPCMData(void *PCMData, int nch, int bps, int timestamp) { ? UINT8_MAX : ((amplitude < 0) ? 0 : (unsigned char)amplitude); - g_active_vis_module->spectrumData[channel][i] = final_amplitude; + target_spectrumData[channel][i] = final_amplitude; } } + if (g_render_from_input_plugin_thread) { + g_active_vis_module->nCh = 2; + g_active_vis_module->Render(g_active_vis_module); + } else { + vis_unlock_after_writing(bucket_index); + } + // Feed future FFT memcpy(g_prev_interleaved, interleaved, sizeof(g_prev_interleaved)); - - g_active_vis_module->nCh = 2; - g_active_vis_module->Render(g_active_vis_module); } int __cdecl SAGetMode() { diff --git a/src/visualization.h b/src/visualization.h index 46db128..cbec749 100644 --- a/src/visualization.h +++ b/src/visualization.h @@ -43,4 +43,8 @@ int __cdecl VSAAdd(void *data, int timestamp); void __cdecl VSASetInfo(int srate, int nch); +void vis_configure(bool render_from_input_plugin_thread); + +void vis_render(); + #endif // ifndef VISUALIZATION_H