From 7017969f6bd92b8ffecc05fa6f9ec1ed24c78f9b Mon Sep 17 00:00:00 2001 From: Micah Elizabeth Scott <micah@scanlime.org> Date: Tue, 23 Jul 2013 15:30:44 -0700 Subject: [PATCH] Revert "Framebuffer iterator scheme, to avoid division" This reverts commit 07106959f6340912fc47c94ee40cc072479ec44e. --- firmware/fadecandy.cpp | 79 ++++++++++-------------------------------- firmware/fc_usb.h | 40 ++------------------- 2 files changed, 21 insertions(+), 98 deletions(-) diff --git a/firmware/fadecandy.cpp b/firmware/fadecandy.cpp index 7e7df52..3eaa5ec 100644 --- a/firmware/fadecandy.cpp +++ b/firmware/fadecandy.cpp @@ -54,7 +54,7 @@ ALWAYS_INLINE static inline uint32_t lutInterpolate(const uint16_t *lut, uint32_ return (lut[index] * invAlpha + lut[index + 1] * alpha) >> 8; } -static inline uint32_t updatePixel(uint32_t icPrev, uint32_t icNext, fcFramebufferIter iter) +static inline uint32_t updatePixel(uint32_t icPrev, uint32_t icNext, unsigned n) { /* * Update pipeline for one pixel: @@ -64,8 +64,8 @@ static inline uint32_t updatePixel(uint32_t icPrev, uint32_t icNext, fcFramebuff * 3. Dithering */ - const uint8_t *pixelPrev = buffers.fbPrev->pixel(iter); - const uint8_t *pixelNext = buffers.fbNext->pixel(iter); + const uint8_t *pixelPrev = buffers.fbPrev->pixel(n); + const uint8_t *pixelNext = buffers.fbNext->pixel(n); // Per-channel linear interpolation and conversion to 16-bit color. int iR = (pixelPrev[0] * icPrev + pixelNext[0] * icNext) >> 16; @@ -78,7 +78,7 @@ static inline uint32_t updatePixel(uint32_t icPrev, uint32_t icNext, fcFramebuff iB = lutInterpolate(&buffers.lutCurrent[2 * 256], iB); // Pointer to the residual buffer for this pixel - int8_t *pResidual = &residual[iter.component]; + int8_t *pResidual = &residual[n * 3]; // Incorporate the residual from last frame iR += pResidual[0]; @@ -133,30 +133,7 @@ static void updateDrawBuffer(unsigned interpCoefficient) uint32_t icPrev = 257 * (0x10000 - interpCoefficient); uint32_t icNext = 257 * interpCoefficient; - /* - * Iterators for each LED strip. (Avoids division later on). - * - * We set these to one LED prior to the one we actually want to start with, - * since it makes for more efficient register usage later on if we pre-increment - * instead of post-incrementing. - */ - - fcFramebufferIter i0, i1, i2, i3, i4, i5, i6, i7; - - i0.setNegativeOne(); - i1.set(LEDS_PER_STRIP * 1 - 1); - i2.set(LEDS_PER_STRIP * 2 - 1); - i3.set(LEDS_PER_STRIP * 3 - 1); - i4.set(LEDS_PER_STRIP * 4 - 1); - i5.set(LEDS_PER_STRIP * 5 - 1); - i6.set(LEDS_PER_STRIP * 6 - 1); - i7.set(LEDS_PER_STRIP * 7 - 1); - - /* - * Process each LED strip in parallel, and re-pack into a planar DMA buffer. - */ - - for (int i = 0; i < LEDS_PER_STRIP; ++i, out += 6) { + for (int i = 0; i < LEDS_PER_STRIP; ++i) { // Six output words union { @@ -174,8 +151,7 @@ static void updateDrawBuffer(unsigned interpCoefficient) * This generates fairly efficient code using the UBFX and BFI instructions. */ - i0.next(); - uint32_t p0 = updatePixel(icPrev, icNext, i0); + uint32_t p0 = updatePixel(icPrev, icNext, i + LEDS_PER_STRIP * 0); o5.p0d = p0; o5.p0c = p0 >> 1; @@ -202,8 +178,7 @@ static void updateDrawBuffer(unsigned interpCoefficient) o0.p0b = p0 >> 22; o0.p0a = p0 >> 23; - i1.next(); - uint32_t p1 = updatePixel(icPrev, icNext, i1); + uint32_t p1 = updatePixel(icPrev, icNext, i + LEDS_PER_STRIP * 0); o5.p1d = p1; o5.p1c = p1 >> 1; @@ -230,8 +205,7 @@ static void updateDrawBuffer(unsigned interpCoefficient) o0.p1b = p1 >> 22; o0.p1a = p1 >> 23; - i2.next(); - uint32_t p2 = updatePixel(icPrev, icNext, i2); + uint32_t p2 = updatePixel(icPrev, icNext, i + LEDS_PER_STRIP * 0); o5.p2d = p2; o5.p2c = p2 >> 1; @@ -258,8 +232,7 @@ static void updateDrawBuffer(unsigned interpCoefficient) o0.p2b = p2 >> 22; o0.p2a = p2 >> 23; - i3.next(); - uint32_t p3 = updatePixel(icPrev, icNext, i3); + uint32_t p3 = updatePixel(icPrev, icNext, i + LEDS_PER_STRIP * 0); o5.p3d = p3; o5.p3c = p3 >> 1; @@ -286,8 +259,7 @@ static void updateDrawBuffer(unsigned interpCoefficient) o0.p3b = p3 >> 22; o0.p3a = p3 >> 23; - i4.next(); - uint32_t p4 = updatePixel(icPrev, icNext, i4); + uint32_t p4 = updatePixel(icPrev, icNext, i + LEDS_PER_STRIP * 0); o5.p4d = p4; o5.p4c = p4 >> 1; @@ -314,8 +286,7 @@ static void updateDrawBuffer(unsigned interpCoefficient) o0.p4b = p4 >> 22; o0.p4a = p4 >> 23; - i5.next(); - uint32_t p5 = updatePixel(icPrev, icNext, i5); + uint32_t p5 = updatePixel(icPrev, icNext, i + LEDS_PER_STRIP * 0); o5.p5d = p5; o5.p5c = p5 >> 1; @@ -342,8 +313,7 @@ static void updateDrawBuffer(unsigned interpCoefficient) o0.p5b = p5 >> 22; o0.p5a = p5 >> 23; - i6.next(); - uint32_t p6 = updatePixel(icPrev, icNext, i6); + uint32_t p6 = updatePixel(icPrev, icNext, i + LEDS_PER_STRIP * 0); o5.p6d = p6; o5.p6c = p6 >> 1; @@ -370,50 +340,39 @@ static void updateDrawBuffer(unsigned interpCoefficient) o0.p6b = p6 >> 22; o0.p6a = p6 >> 23; - i7.next(); - uint32_t p7 = updatePixel(icPrev, icNext, i7); + uint32_t p7 = updatePixel(icPrev, icNext, i + LEDS_PER_STRIP * 0); o5.p7d = p7; o5.p7c = p7 >> 1; o5.p7b = p7 >> 2; o5.p7a = p7 >> 3; - - out[5] = o5.word; - o4.p7d = p7 >> 4; o4.p7c = p7 >> 5; o4.p7b = p7 >> 6; o4.p7a = p7 >> 7; - - out[4] = o4.word; - o3.p7d = p7 >> 8; o3.p7c = p7 >> 9; o3.p7b = p7 >> 10; o3.p7a = p7 >> 11; - - out[3] = o3.word; - o2.p7d = p7 >> 12; o2.p7c = p7 >> 13; o2.p7b = p7 >> 14; o2.p7a = p7 >> 15; - - out[2] = o2.word; - o1.p7d = p7 >> 16; o1.p7c = p7 >> 17; o1.p7b = p7 >> 18; o1.p7a = p7 >> 19; - - out[1] = o1.word; - o0.p7d = p7 >> 20; o0.p7c = p7 >> 21; o0.p7b = p7 >> 22; o0.p7a = p7 >> 23; - out[0] = o0.word; + *(out++) = o0.word; + *(out++) = o1.word; + *(out++) = o2.word; + *(out++) = o3.word; + *(out++) = o4.word; + *(out++) = o5.word; } } diff --git a/firmware/fc_usb.h b/firmware/fc_usb.h index ffe47f5..ba9a9d3 100644 --- a/firmware/fc_usb.h +++ b/firmware/fc_usb.h @@ -62,51 +62,15 @@ struct fcPacketBuffer }; -/* - * Iterator referring to one pixel in the framebuffer - */ - -struct fcFramebufferIter -{ - unsigned packet; - unsigned index; - unsigned component; - - ALWAYS_INLINE void setNegativeOne() - { - // Special case: After one call to next(), this points to LED #0. - index = -1; - packet = 0; - component = -3; - } - - ALWAYS_INLINE void set(unsigned led) - { - packet = led / PIXELS_PER_PACKET; - index = 1 + (led % PIXELS_PER_PACKET) * 3; - component = led * 3; - } - - ALWAYS_INLINE void next() - { - if (++index >= (1 + PIXELS_PER_PACKET * 3)) { - index = 0; - packet++; - } - component += 3; - } -}; - - /* * Framebuffer */ struct fcFramebuffer : public fcPacketBuffer<PACKETS_PER_FRAME> { - ALWAYS_INLINE const uint8_t* pixel(fcFramebufferIter iter) const + ALWAYS_INLINE const uint8_t* pixel(unsigned index) { - return &packets[iter.packet]->buf[iter.index]; + return &packets[index / PIXELS_PER_PACKET]->buf[1 + (index % PIXELS_PER_PACKET) * 3]; } }; -- GitLab