From 5a610f4dbadd3c8f1f667c8ea1e49d2d757a1fa1 Mon Sep 17 00:00:00 2001 From: Micah Elizabeth Scott <micah@scanlime.org> Date: Mon, 22 Jul 2013 20:57:53 -0700 Subject: [PATCH] Optimize and debug dithering and clamping code --- firmware/Makefile | 2 +- firmware/fadecandy.cpp | 38 +++++++++++++++++++++++++++----------- firmware/fc_usb.h | 4 ++-- 3 files changed, 30 insertions(+), 14 deletions(-) diff --git a/firmware/Makefile b/firmware/Makefile index bfe6eb8..9c54d50 100644 --- a/firmware/Makefile +++ b/firmware/Makefile @@ -36,7 +36,7 @@ CPP_FILES = \ INCLUDES = -I. # CPPFLAGS = compiler options for C and C++ -CPPFLAGS = -Wall -g -Os -mcpu=cortex-m4 -mthumb -nostdlib -MMD $(OPTIONS) $(INCLUDES) +CPPFLAGS = -Wall -Wno-sign-compare -Wno-strict-aliasing -g -Os -mcpu=cortex-m4 -mthumb -nostdlib -MMD $(OPTIONS) $(INCLUDES) # compiler options for C++ only CXXFLAGS = -std=gnu++0x -felide-constructors -fno-exceptions -fno-rtti diff --git a/firmware/fadecandy.cpp b/firmware/fadecandy.cpp index c896fbd..7638bbe 100644 --- a/firmware/fadecandy.cpp +++ b/firmware/fadecandy.cpp @@ -24,6 +24,7 @@ #include <math.h> #include <algorithm> #include "OctoWS2811z.h" +#include "arm_math.h" #include "fc_usb.h" #include "fc_defs.h" @@ -38,7 +39,7 @@ static OctoWS2811z leds(LEDS_PER_STRIP, ledBuffer, WS2811_800kHz); static int8_t residual[CHANNELS_TOTAL]; -static uint32_t ALWAYS_INLINE lutInterpolate(const uint16_t *lut, uint32_t arg) +ALWAYS_INLINE static inline uint32_t lutInterpolate(const uint16_t *lut, uint32_t arg) { /* * Using our color LUT for the indicated channel, convert the @@ -53,7 +54,7 @@ static uint32_t ALWAYS_INLINE lutInterpolate(const uint16_t *lut, uint32_t arg) return (lut[index] * invAlpha + lut[index + 1] * alpha) >> 8; } -static uint32_t ALWAYS_INLINE updatePixel(uint32_t icPrev, uint32_t icNext, unsigned n) +ALWAYS_INLINE static inline uint32_t updatePixel(uint32_t icPrev, uint32_t icNext, unsigned n) { /* * Update pipeline for one pixel: @@ -67,9 +68,9 @@ static uint32_t ALWAYS_INLINE updatePixel(uint32_t icPrev, uint32_t icNext, unsi const uint8_t *pixelNext = buffers.fbNext->pixel(n); // Per-channel linear interpolation and conversion to 16-bit color. - uint32_t iR = (pixelPrev[0] * icPrev + pixelNext[0] * icNext) >> 16; - uint32_t iG = (pixelPrev[1] * icPrev + pixelNext[1] * icNext) >> 16; - uint32_t iB = (pixelPrev[2] * icPrev + pixelNext[2] * icNext) >> 16; + int iR = (pixelPrev[0] * icPrev + pixelNext[0] * icNext) >> 16; + int iG = (pixelPrev[1] * icPrev + pixelNext[1] * icNext) >> 16; + int iB = (pixelPrev[2] * icPrev + pixelNext[2] * icNext) >> 16; // Pass through our color LUT iR = lutInterpolate(&buffers.lutCurrent[0 * 256], iR); @@ -84,12 +85,27 @@ static uint32_t ALWAYS_INLINE updatePixel(uint32_t icPrev, uint32_t icNext, unsi iG += pResidual[1]; iB += pResidual[2]; - // Round to the nearest 8-bit value - int r8 = std::min<int>(0xff, std::max<int>(0, (iR + 0x80) >> 8)); - int g8 = std::min<int>(0xff, std::max<int>(0, (iG + 0x80) >> 8)); - int b8 = std::min<int>(0xff, std::max<int>(0, (iB + 0x80) >> 8)); + /* + * Round to the nearest 8-bit value. Clamping is necessary! + * This value might be as low as -128 prior to adding 0x80 + * for rounding. After this addition, the result is guaranteed + * to be >= 0, but it may be over 0xffff. + * + * This rules out clamping using the UQADD16 instruction, + * since the addition itself needs to allow overflow. Instead, + * we clamp using a separate USAT instruction. + */ + + int r8 = __USAT(iR + 0x80, 16) >> 8; + int g8 = __USAT(iG + 0x80, 16) >> 8; + int b8 = __USAT(iB + 0x80, 16) >> 8; + + /* + * Compute the error, after expanding the 8-bit value back to 16-bit. + * Clamping (e.g. via __SSAT) is not necessary, since the error will not + * be greater than +/- 127. + */ - // Compute the error, after expanding the 8-bit value back to 16-bit. pResidual[0] = iR - (r8 * 257); pResidual[1] = iG - (g8 * 257); pResidual[2] = iB - (b8 * 257); @@ -353,7 +369,7 @@ extern "C" int main() while (1) { buffers.handleUSB(); - updateDrawBuffer((millis() << 6) & 0xFFFF); + updateDrawBuffer((millis() << 4) & 0xFFFF); leds.show(); } } diff --git a/firmware/fc_usb.h b/firmware/fc_usb.h index 72bcfcc..ba9a9d3 100644 --- a/firmware/fc_usb.h +++ b/firmware/fc_usb.h @@ -68,7 +68,7 @@ struct fcPacketBuffer struct fcFramebuffer : public fcPacketBuffer<PACKETS_PER_FRAME> { - const uint8_t* ALWAYS_INLINE pixel(unsigned index) + ALWAYS_INLINE const uint8_t* pixel(unsigned index) { return &packets[index / PIXELS_PER_PACKET]->buf[1 + (index % PIXELS_PER_PACKET) * 3]; } @@ -81,7 +81,7 @@ struct fcFramebuffer : public fcPacketBuffer<PACKETS_PER_FRAME> struct fcColorLUT : public fcPacketBuffer<PACKETS_PER_LUT> { - const unsigned ALWAYS_INLINE entry(unsigned index) + ALWAYS_INLINE const unsigned entry(unsigned index) { const uint8_t *p = &packets[index / LUTENTRIES_PER_PACKET]->buf[2 + (index % LUTENTRIES_PER_PACKET) * 2]; return *(uint16_t*)p; -- GitLab