From ea26cd733ca7f172785a30755bc85ed45351bca4 Mon Sep 17 00:00:00 2001 From: Christian Aistleitner Date: Sat, 1 Jul 2023 23:01:26 +0200 Subject: [PATCH 1/3] Allow to sleep instead of busy waiting when limiting refresh rate On single core boards, busy waiting renders the system unresponsive. We add the --led-no-busy-waiting flag, to allow the process to sleep instead of busy waiting, thereby freeing up CPU cycles for other processes at the cost of slightly less accurate frame timing. On a Raspberry Pi 3B on two 32x32 panels, busy waiting keeps a core 98% busy. Sleeping reduces it to 14%. On a Raspberry Zero on two 32x32 panels, busy waiting keeps the single core 95% busy and gives an unresponsive system. Sleeping reduces it to 32% and keeps the system responsive. Sleeping degrades the timing accuracy a bit, but is still good enough by a far margin for many uses. The following numbers are for driving two 32x32 panels and averaging the refresh rate across a minute (the 1kHz refresh rate limit is of course not reached, and is only there to make sure that the relevant code path is taken): +--------------+-----------+ | busy waiting | sleeping | +--------------------------------------------------+--------------+-----------+ | Raspberry Pi 3B, limit 70Hz, hardw. pulsing | 70.000Hz | 69.977Hz | | Raspberry Pi 3B, limit 70Hz, no hardw. pulsing | 70.000Hz | 69.977Hz | | Raspberry Pi 3B, limit 1kHz, hardw. pulsing | 430.824Hz | 430.855Hz | | Raspberry Pi 3B, limit 1kHz, no hardw. pulsing | 365.498Hz | 365.456Hz | | Raspberry Pi Zero, limit 70Hz, hardw. pulsing | 69.986Hz | 69.643Hz | | Raspberry Pi Zero, limit 70Hz, no hardw. pulsing | 69.986Hz | 69.638Hz | | Raspberry Pi Zero, limit 1kHz, hardw. pulsing | 263.877Hz | 264.538Hz | | Raspberry Pi Zero, limit 1kHz, no hardw. pulsing | 233.166Hz | 231.650Hz | +--------------------------------------------------+--------------+-----------+ --- include/led-matrix-c.h | 6 ++++++ include/led-matrix.h | 4 ++++ lib/Makefile | 7 +++++++ lib/gpio.cc | 5 +++++ lib/gpio.h | 2 ++ lib/led-matrix-c.cc | 2 ++ lib/led-matrix.cc | 26 ++++++++++++++++++++------ lib/options-initialize.cc | 13 +++++++++++-- 8 files changed, 57 insertions(+), 8 deletions(-) diff --git a/include/led-matrix-c.h b/include/led-matrix-c.h index e6907311b..15ad90c9c 100644 --- a/include/led-matrix-c.h +++ b/include/led-matrix-c.h @@ -152,6 +152,12 @@ struct RGBLedMatrixOptions { * to keep a constant refresh rate. <= 0 for no limit. */ int limit_refresh_rate_hz; /* Corresponding flag: --led-limit-refresh */ + + /* Sleep instead of busy waiting when limiting refresh rate. This gives + * slightly less accurate frame timing, but lets the CPU work on other + * processes when waiting and renders single core boards more responsive. + */ + bool disable_busy_waiting; /* Corresponding flag: --led-busy-waiting */ }; /** diff --git a/include/led-matrix.h b/include/led-matrix.h index 5e401fe9c..ed7960efe 100644 --- a/include/led-matrix.h +++ b/include/led-matrix.h @@ -155,6 +155,10 @@ class RGBMatrix : public Canvas { // Limit refresh rate of LED panel. This will help on a loaded system // to keep a constant refresh rate. <= 0 for no limit. int limit_refresh_rate_hz; // Flag: --led-limit-refresh + + // Sleep instead of busy wait to free CPU cycles but get slightly less + // accurate frame timing. + bool disable_busy_waiting; // Flag: --led-busy-waiting }; // Factory to create a matrix. Additional functionality includes dropping diff --git a/lib/Makefile b/lib/Makefile index 335936388..5664b1d31 100644 --- a/lib/Makefile +++ b/lib/Makefile @@ -129,6 +129,13 @@ HARDWARE_DESC?=regular # Flag: --led-limit-refresh #DEFINES+=-DFIXED_FRAME_MICROSECONDS=5000 +# When limiting refrash rate, a CPU core is busy waiting to get accurate +# timing. On single board systems, this results in an unresponsive system. +# By disabling busy waiting, CPU cycles are freed up, leading to a more +# responsive system at the cost of slightly less accurate frame timing. +# Flag: --led-no-busy-waiting +#DEFINES+=-DDISABLE_BUSY_WAITING + # Enable wide 64 bit GPIO offered with the compute module. # This will use more memory to internally represent the frame buffer, so # caches can't be utilized as much. diff --git a/lib/gpio.cc b/lib/gpio.cc index 0861482a1..d1cb8775c 100644 --- a/lib/gpio.cc +++ b/lib/gpio.cc @@ -814,4 +814,9 @@ uint32_t GetMicrosecondCounter() { return epoch_usec & 0xFFFFFFFF; } +// For external use, e.g. to lessen busy waiting. +void SleepMicroseconds(long t) { + Timers::sleep_nanos(t * 1000); +} + } // namespace rgb_matrix diff --git a/lib/gpio.h b/lib/gpio.h index 2114b1834..b9927bc8b 100644 --- a/lib/gpio.h +++ b/lib/gpio.h @@ -144,6 +144,8 @@ class PinPulser { // if possible and a terrible slow fallback otherwise. uint32_t GetMicrosecondCounter(); +void SleepMicroseconds(long); + } // end namespace rgb_matrix #endif // RPI_GPIO_INGERNALH diff --git a/lib/led-matrix-c.cc b/lib/led-matrix-c.cc index 74df44e03..4e62af913 100644 --- a/lib/led-matrix-c.cc +++ b/lib/led-matrix-c.cc @@ -88,6 +88,7 @@ static struct RGBLedMatrix *led_matrix_create_from_options_optional_edit( OPT_COPY_IF_SET(pixel_mapper_config); OPT_COPY_IF_SET(panel_type); OPT_COPY_IF_SET(limit_refresh_rate_hz); + OPT_COPY_IF_SET(disable_busy_waiting); #undef OPT_COPY_IF_SET } @@ -134,6 +135,7 @@ static struct RGBLedMatrix *led_matrix_create_from_options_optional_edit( ACTUAL_VALUE_BACK_TO_OPT(pixel_mapper_config); ACTUAL_VALUE_BACK_TO_OPT(panel_type); ACTUAL_VALUE_BACK_TO_OPT(limit_refresh_rate_hz); + ACTUAL_VALUE_BACK_TO_OPT(disable_busy_waiting); #undef ACTUAL_VALUE_BACK_TO_OPT } diff --git a/lib/led-matrix.cc b/lib/led-matrix.cc index 3740da7e1..2467e79da 100644 --- a/lib/led-matrix.cc +++ b/lib/led-matrix.cc @@ -122,9 +122,10 @@ class RGBMatrix::Impl::UpdateThread : public Thread { public: UpdateThread(GPIO *io, FrameCanvas *initial_frame, int pwm_dither_bits, bool show_refresh, - int limit_refresh_hz) + int limit_refresh_hz, bool allow_busy_waiting) : io_(io), show_refresh_(show_refresh), target_frame_usec_(limit_refresh_hz < 1 ? 0 : 1e6/limit_refresh_hz), + allow_busy_waiting_(allow_busy_waiting), running_(true), current_frame_(initial_frame), next_frame_(NULL), requested_frame_multiple_(1) { @@ -199,8 +200,13 @@ class RGBMatrix::Impl::UpdateThread : public Thread { ++low_bit_sequence; if (target_frame_usec_) { - while ((GetMicrosecondCounter() - start_time_us) < target_frame_usec_) { - // busy wait. We have our dedicated core, so ok to burn cycles. + if (allow_busy_waiting_) { + while ((GetMicrosecondCounter() - start_time_us) < target_frame_usec_) { + // busy wait. We have our dedicated core, so ok to burn cycles. + } + } else { + long spent_us = GetMicrosecondCounter() - start_time_us; + SleepMicroseconds(target_frame_usec_ - spent_us); } } @@ -245,6 +251,7 @@ class RGBMatrix::Impl::UpdateThread : public Thread { GPIO *const io_; const bool show_refresh_; const uint32_t target_frame_usec_; + const bool allow_busy_waiting_; uint32_t start_bit_[4]; Mutex running_mutex_; @@ -314,9 +321,14 @@ RGBMatrix::Options::Options() : pixel_mapper_config(NULL), panel_type(NULL), #ifdef FIXED_FRAME_MICROSECONDS - limit_refresh_rate_hz(1e6 / FIXED_FRAME_MICROSECONDS) + limit_refresh_rate_hz(1e6 / FIXED_FRAME_MICROSECONDS), +#else + limit_refresh_rate_hz(0), +#endif +#ifdef DISABLE_BUSY_WAITING + disable_busy_waiting(true) #else - limit_refresh_rate_hz(0) + disable_busy_waiting(false) #endif { // Nothing to see here. @@ -348,6 +360,7 @@ static void PrintOptions(const RGBMatrix::Options &o) { P_STR(pixel_mapper_config); P_STR(panel_type); P_INT(limit_refresh_rate_hz); + P_BOOL(disable_busy_waiting); #undef P_INT #undef P_STR #undef P_BOOL @@ -469,7 +482,8 @@ bool RGBMatrix::Impl::StartRefresh() { if (updater_ == NULL && io_ != NULL) { updater_ = new UpdateThread(io_, active_, params_.pwm_dither_bits, params_.show_refresh_rate, - params_.limit_refresh_rate_hz); + params_.limit_refresh_rate_hz, + !params_.disable_busy_waiting); // If we have multiple processors, the kernel // jumps around between these, creating some global flicker. // So let's tie it to the last CPU available. diff --git a/lib/options-initialize.cc b/lib/options-initialize.cc index 3c2abd625..46c90a319 100644 --- a/lib/options-initialize.cc +++ b/lib/options-initialize.cc @@ -209,6 +209,12 @@ static bool FlagInit(int &argc, char **&argv, continue; } + bool allow_busy_waiting = !mopts->disable_busy_waiting; + if (ConsumeBoolFlag("busy-waiting", it, &allow_busy_waiting)) { + mopts->disable_busy_waiting = !allow_busy_waiting; + continue; + } + bool request_help = false; if (ConsumeBoolFlag("help", it, &request_help) && request_help) { // In that case, we pretend to have failure in parsing, which will @@ -338,7 +344,8 @@ void PrintMatrixFlags(FILE *out, const RGBMatrix::Options &d, "\t--led-pwm-dither-bits=<0..2> : Time dithering of lower bits " "(Default: 0)\n" "\t--led-%shardware-pulse : %sse hardware pin-pulse generation.\n" - "\t--led-panel-type= : Needed to initialize special panels. Supported: 'FM6126A', 'FM6127'\n", + "\t--led-panel-type= : Needed to initialize special panels. Supported: 'FM6126A', 'FM6127'\n" + "\t--led-%sbusy-waiting : %sse busy waiting when limiting refresh rate.\n", d.hardware_mapping, d.rows, d.cols, d.chain_length, d.parallel, (int) muxers.size(), CreateAvailableMultiplexString(muxers).c_str(), @@ -350,7 +357,9 @@ void PrintMatrixFlags(FILE *out, const RGBMatrix::Options &d, d.inverse_colors ? "no-" : "", d.inverse_colors ? "off" : "on", d.pwm_lsb_nanoseconds, !d.disable_hardware_pulsing ? "no-" : "", - !d.disable_hardware_pulsing ? "Don't u" : "U"); + !d.disable_hardware_pulsing ? "Don't u" : "U", + !d.disable_busy_waiting ? "no-" : "", + !d.disable_busy_waiting ? "Don't u" : "U"); fprintf(out, "\t--led-slowdown-gpio=<0..4>: " "Slowdown GPIO. Needed for faster Pis/slower panels " From 2986ce6b2fa2865bf97d0083fc8a996a105eb4ff Mon Sep 17 00:00:00 2001 From: Christian Aistleitner Date: Thu, 15 Feb 2024 11:02:05 +0100 Subject: [PATCH 2/3] README: Convert `A` tag from self-closing to explicit close tag GitHub's Markdown renderer underlined the whole text from `--led-no-drop-privs` until the start of the next section, which made it hard to read. Be converting `A` tag from self-closing to explicitly closed tag, GitHub renders the file without the extra underlining again, which makes it easier to read. --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index c85d15ae8..093625062 100644 --- a/README.md +++ b/README.md @@ -445,7 +445,7 @@ to debug if it has something to do with the sound subsystem (see Troubleshooting section). This is really only recommended for debugging; typically you actually want the hardware pulses as it results in a much more stable picture. - + ``` --led-no-drop-privs : Don't drop privileges from 'root' after initializing the hardware. From f3c32435c928e8d6f5e3ff1db5273c7b9bbb59bd Mon Sep 17 00:00:00 2001 From: Christian Aistleitner Date: Thu, 15 Feb 2024 11:29:59 +0100 Subject: [PATCH 3/3] README: Add documentation for `--led-no-busy-waiting` --- README.md | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/README.md b/README.md index 093625062..2e0977285 100644 --- a/README.md +++ b/README.md @@ -380,6 +380,20 @@ Use this also if you want to have a stable baseline refresh rate when using the vsync-multiple flag `-V` in the [led-image-viewer] or [video-viewer] utility programs. +``` +--led-no-busy-waiting : Don't use busy waiting when limiting refresh rate. +``` + +This allows to switch from busy waiting to sleep waiting when limiting the +refresh rate (`--led-limit-refresh`). + +By default, refresh rate limiting uses busy waiting, which is CPU intensive but +gives most accurate timings. This is fine for multi-core boards. + +On single core boards (e.g.: Raspberry Pi Zero) busy waiting makes the system +unresponsive for other/background tasks. There, sleep waiting improves the +system's responsiveness at the cost of slightly less accurate timings. + ``` --led-scan-mode=<0..1> : 0 = progressive; 1 = interlaced (Default: 0). ```