Skip to content
Snippets Groups Projects
Commit 21d90f5e authored by Micah Elizabeth Scott's avatar Micah Elizabeth Scott
Browse files

Shorter instructions for the swizzle, save 0.25ms

Woo!
parent 7d95de5c
No related branches found
No related tags found
No related merge requests found
......@@ -135,17 +135,6 @@ static void updateDrawBuffer(unsigned interpCoefficient)
for (int i = 0; i < LEDS_PER_STRIP; ++i) {
// Eight bit planes
union {
uint32_t word;
struct {
uint32_t x0:1, x1:1, x2:1, x3:1, x4:1, x5:1, x6:1, x7:1,
y0:1, y1:1, y2:1, y3:1, y4:1, y5:1, y6:1, y7:1,
z0:1, z1:1, z2:1, z3:1, z4:1, z5:1, z6:1, z7:1,
spare:8;
};
} p0, p1, p2, p3, p4, p5, p6, p7;
// Six output words
union {
uint32_t word;
......@@ -162,221 +151,221 @@ static void updateDrawBuffer(unsigned interpCoefficient)
* This generates fairly efficient code using the UBFX and BFI instructions.
*/
p0.word = updatePixel(icPrev, icNext, i + LEDS_PER_STRIP * 0);
o5.p0d = p0.x0;
o5.p0c = p0.x1;
o5.p0b = p0.x2;
o5.p0a = p0.x3;
o4.p0d = p0.x4;
o4.p0c = p0.x5;
o4.p0b = p0.x6;
o4.p0a = p0.x7;
o3.p0d = p0.y0;
o3.p0c = p0.y1;
o3.p0b = p0.y2;
o3.p0a = p0.y3;
o2.p0d = p0.y4;
o2.p0c = p0.y5;
o2.p0b = p0.y6;
o2.p0a = p0.y7;
o1.p0d = p0.z0;
o1.p0c = p0.z1;
o1.p0b = p0.z2;
o1.p0a = p0.z3;
o0.p0d = p0.z4;
o0.p0c = p0.z5;
o0.p0b = p0.z6;
o0.p0a = p0.z7;
p1.word = updatePixel(icPrev, icNext, i + LEDS_PER_STRIP * 1);
o5.p1d = p1.x0;
o5.p1c = p1.x1;
o5.p1b = p1.x2;
o5.p1a = p1.x3;
o4.p1d = p1.x4;
o4.p1c = p1.x5;
o4.p1b = p1.x6;
o4.p1a = p1.x7;
o3.p1d = p1.y0;
o3.p1c = p1.y1;
o3.p1b = p1.y2;
o3.p1a = p1.y3;
o2.p1d = p1.y4;
o2.p1c = p1.y5;
o2.p1b = p1.y6;
o2.p1a = p1.y7;
o1.p1d = p1.z0;
o1.p1c = p1.z1;
o1.p1b = p1.z2;
o1.p1a = p1.z3;
o0.p1d = p1.z4;
o0.p1c = p1.z5;
o0.p1b = p1.z6;
o0.p1a = p1.z7;
p2.word = updatePixel(icPrev, icNext, i + LEDS_PER_STRIP * 2);
o5.p2d = p2.x0;
o5.p2c = p2.x1;
o5.p2b = p2.x2;
o5.p2a = p2.x3;
o4.p2d = p2.x4;
o4.p2c = p2.x5;
o4.p2b = p2.x6;
o4.p2a = p2.x7;
o3.p2d = p2.y0;
o3.p2c = p2.y1;
o3.p2b = p2.y2;
o3.p2a = p2.y3;
o2.p2d = p2.y4;
o2.p2c = p2.y5;
o2.p2b = p2.y6;
o2.p2a = p2.y7;
o1.p2d = p2.z0;
o1.p2c = p2.z1;
o1.p2b = p2.z2;
o1.p2a = p2.z3;
o0.p2d = p2.z4;
o0.p2c = p2.z5;
o0.p2b = p2.z6;
o0.p2a = p2.z7;
p3.word = updatePixel(icPrev, icNext, i + LEDS_PER_STRIP * 3);
o5.p3d = p3.x0;
o5.p3c = p3.x1;
o5.p3b = p3.x2;
o5.p3a = p3.x3;
o4.p3d = p3.x4;
o4.p3c = p3.x5;
o4.p3b = p3.x6;
o4.p3a = p3.x7;
o3.p3d = p3.y0;
o3.p3c = p3.y1;
o3.p3b = p3.y2;
o3.p3a = p3.y3;
o2.p3d = p3.y4;
o2.p3c = p3.y5;
o2.p3b = p3.y6;
o2.p3a = p3.y7;
o1.p3d = p3.z0;
o1.p3c = p3.z1;
o1.p3b = p3.z2;
o1.p3a = p3.z3;
o0.p3d = p3.z4;
o0.p3c = p3.z5;
o0.p3b = p3.z6;
o0.p3a = p3.z7;
p4.word = updatePixel(icPrev, icNext, i + LEDS_PER_STRIP * 4);
o5.p4d = p4.x0;
o5.p4c = p4.x1;
o5.p4b = p4.x2;
o5.p4a = p4.x3;
o4.p4d = p4.x4;
o4.p4c = p4.x5;
o4.p4b = p4.x6;
o4.p4a = p4.x7;
o3.p4d = p4.y0;
o3.p4c = p4.y1;
o3.p4b = p4.y2;
o3.p4a = p4.y3;
o2.p4d = p4.y4;
o2.p4c = p4.y5;
o2.p4b = p4.y6;
o2.p4a = p4.y7;
o1.p4d = p4.z0;
o1.p4c = p4.z1;
o1.p4b = p4.z2;
o1.p4a = p4.z3;
o0.p4d = p4.z4;
o0.p4c = p4.z5;
o0.p4b = p4.z6;
o0.p4a = p4.z7;
p5.word = updatePixel(icPrev, icNext, i + LEDS_PER_STRIP * 5);
o5.p5d = p5.x0;
o5.p5c = p5.x1;
o5.p5b = p5.x2;
o5.p5a = p5.x3;
o4.p5d = p5.x4;
o4.p5c = p5.x5;
o4.p5b = p5.x6;
o4.p5a = p5.x7;
o3.p5d = p5.y0;
o3.p5c = p5.y1;
o3.p5b = p5.y2;
o3.p5a = p5.y3;
o2.p5d = p5.y4;
o2.p5c = p5.y5;
o2.p5b = p5.y6;
o2.p5a = p5.y7;
o1.p5d = p5.z0;
o1.p5c = p5.z1;
o1.p5b = p5.z2;
o1.p5a = p5.z3;
o0.p5d = p5.z4;
o0.p5c = p5.z5;
o0.p5b = p5.z6;
o0.p5a = p5.z7;
p6.word = updatePixel(icPrev, icNext, i + LEDS_PER_STRIP * 6);
o5.p6d = p6.x0;
o5.p6c = p6.x1;
o5.p6b = p6.x2;
o5.p6a = p6.x3;
o4.p6d = p6.x4;
o4.p6c = p6.x5;
o4.p6b = p6.x6;
o4.p6a = p6.x7;
o3.p6d = p6.y0;
o3.p6c = p6.y1;
o3.p6b = p6.y2;
o3.p6a = p6.y3;
o2.p6d = p6.y4;
o2.p6c = p6.y5;
o2.p6b = p6.y6;
o2.p6a = p6.y7;
o1.p6d = p6.z0;
o1.p6c = p6.z1;
o1.p6b = p6.z2;
o1.p6a = p6.z3;
o0.p6d = p6.z4;
o0.p6c = p6.z5;
o0.p6b = p6.z6;
o0.p6a = p6.z7;
p7.word = updatePixel(icPrev, icNext, i + LEDS_PER_STRIP * 7);
o5.p7d = p7.x0;
o5.p7c = p7.x1;
o5.p7b = p7.x2;
o5.p7a = p7.x3;
o4.p7d = p7.x4;
o4.p7c = p7.x5;
o4.p7b = p7.x6;
o4.p7a = p7.x7;
o3.p7d = p7.y0;
o3.p7c = p7.y1;
o3.p7b = p7.y2;
o3.p7a = p7.y3;
o2.p7d = p7.y4;
o2.p7c = p7.y5;
o2.p7b = p7.y6;
o2.p7a = p7.y7;
o1.p7d = p7.z0;
o1.p7c = p7.z1;
o1.p7b = p7.z2;
o1.p7a = p7.z3;
o0.p7d = p7.z4;
o0.p7c = p7.z5;
o0.p7b = p7.z6;
o0.p7a = p7.z7;
uint32_t p0 = updatePixel(icPrev, icNext, i + LEDS_PER_STRIP * 0);
o5.p0d = p0;
o5.p0c = p0 >> 1;
o5.p0b = p0 >> 2;
o5.p0a = p0 >> 3;
o4.p0d = p0 >> 4;
o4.p0c = p0 >> 5;
o4.p0b = p0 >> 6;
o4.p0a = p0 >> 7;
o3.p0d = p0 >> 8;
o3.p0c = p0 >> 9;
o3.p0b = p0 >> 10;
o3.p0a = p0 >> 11;
o2.p0d = p0 >> 12;
o2.p0c = p0 >> 13;
o2.p0b = p0 >> 14;
o2.p0a = p0 >> 15;
o1.p0d = p0 >> 16;
o1.p0c = p0 >> 17;
o1.p0b = p0 >> 18;
o1.p0a = p0 >> 19;
o0.p0d = p0 >> 20;
o0.p0c = p0 >> 21;
o0.p0b = p0 >> 22;
o0.p0a = p0 >> 23;
uint32_t p1 = updatePixel(icPrev, icNext, i + LEDS_PER_STRIP * 0);
o5.p1d = p1;
o5.p1c = p1 >> 1;
o5.p1b = p1 >> 2;
o5.p1a = p1 >> 3;
o4.p1d = p1 >> 4;
o4.p1c = p1 >> 5;
o4.p1b = p1 >> 6;
o4.p1a = p1 >> 7;
o3.p1d = p1 >> 8;
o3.p1c = p1 >> 9;
o3.p1b = p1 >> 10;
o3.p1a = p1 >> 11;
o2.p1d = p1 >> 12;
o2.p1c = p1 >> 13;
o2.p1b = p1 >> 14;
o2.p1a = p1 >> 15;
o1.p1d = p1 >> 16;
o1.p1c = p1 >> 17;
o1.p1b = p1 >> 18;
o1.p1a = p1 >> 19;
o0.p1d = p1 >> 20;
o0.p1c = p1 >> 21;
o0.p1b = p1 >> 22;
o0.p1a = p1 >> 23;
uint32_t p2 = updatePixel(icPrev, icNext, i + LEDS_PER_STRIP * 0);
o5.p2d = p2;
o5.p2c = p2 >> 1;
o5.p2b = p2 >> 2;
o5.p2a = p2 >> 3;
o4.p2d = p2 >> 4;
o4.p2c = p2 >> 5;
o4.p2b = p2 >> 6;
o4.p2a = p2 >> 7;
o3.p2d = p2 >> 8;
o3.p2c = p2 >> 9;
o3.p2b = p2 >> 10;
o3.p2a = p2 >> 11;
o2.p2d = p2 >> 12;
o2.p2c = p2 >> 13;
o2.p2b = p2 >> 14;
o2.p2a = p2 >> 15;
o1.p2d = p2 >> 16;
o1.p2c = p2 >> 17;
o1.p2b = p2 >> 18;
o1.p2a = p2 >> 19;
o0.p2d = p2 >> 20;
o0.p2c = p2 >> 21;
o0.p2b = p2 >> 22;
o0.p2a = p2 >> 23;
uint32_t p3 = updatePixel(icPrev, icNext, i + LEDS_PER_STRIP * 0);
o5.p3d = p3;
o5.p3c = p3 >> 1;
o5.p3b = p3 >> 2;
o5.p3a = p3 >> 3;
o4.p3d = p3 >> 4;
o4.p3c = p3 >> 5;
o4.p3b = p3 >> 6;
o4.p3a = p3 >> 7;
o3.p3d = p3 >> 8;
o3.p3c = p3 >> 9;
o3.p3b = p3 >> 10;
o3.p3a = p3 >> 11;
o2.p3d = p3 >> 12;
o2.p3c = p3 >> 13;
o2.p3b = p3 >> 14;
o2.p3a = p3 >> 15;
o1.p3d = p3 >> 16;
o1.p3c = p3 >> 17;
o1.p3b = p3 >> 18;
o1.p3a = p3 >> 19;
o0.p3d = p3 >> 20;
o0.p3c = p3 >> 21;
o0.p3b = p3 >> 22;
o0.p3a = p3 >> 23;
uint32_t p4 = updatePixel(icPrev, icNext, i + LEDS_PER_STRIP * 0);
o5.p4d = p4;
o5.p4c = p4 >> 1;
o5.p4b = p4 >> 2;
o5.p4a = p4 >> 3;
o4.p4d = p4 >> 4;
o4.p4c = p4 >> 5;
o4.p4b = p4 >> 6;
o4.p4a = p4 >> 7;
o3.p4d = p4 >> 8;
o3.p4c = p4 >> 9;
o3.p4b = p4 >> 10;
o3.p4a = p4 >> 11;
o2.p4d = p4 >> 12;
o2.p4c = p4 >> 13;
o2.p4b = p4 >> 14;
o2.p4a = p4 >> 15;
o1.p4d = p4 >> 16;
o1.p4c = p4 >> 17;
o1.p4b = p4 >> 18;
o1.p4a = p4 >> 19;
o0.p4d = p4 >> 20;
o0.p4c = p4 >> 21;
o0.p4b = p4 >> 22;
o0.p4a = p4 >> 23;
uint32_t p5 = updatePixel(icPrev, icNext, i + LEDS_PER_STRIP * 0);
o5.p5d = p5;
o5.p5c = p5 >> 1;
o5.p5b = p5 >> 2;
o5.p5a = p5 >> 3;
o4.p5d = p5 >> 4;
o4.p5c = p5 >> 5;
o4.p5b = p5 >> 6;
o4.p5a = p5 >> 7;
o3.p5d = p5 >> 8;
o3.p5c = p5 >> 9;
o3.p5b = p5 >> 10;
o3.p5a = p5 >> 11;
o2.p5d = p5 >> 12;
o2.p5c = p5 >> 13;
o2.p5b = p5 >> 14;
o2.p5a = p5 >> 15;
o1.p5d = p5 >> 16;
o1.p5c = p5 >> 17;
o1.p5b = p5 >> 18;
o1.p5a = p5 >> 19;
o0.p5d = p5 >> 20;
o0.p5c = p5 >> 21;
o0.p5b = p5 >> 22;
o0.p5a = p5 >> 23;
uint32_t p6 = updatePixel(icPrev, icNext, i + LEDS_PER_STRIP * 0);
o5.p6d = p6;
o5.p6c = p6 >> 1;
o5.p6b = p6 >> 2;
o5.p6a = p6 >> 3;
o4.p6d = p6 >> 4;
o4.p6c = p6 >> 5;
o4.p6b = p6 >> 6;
o4.p6a = p6 >> 7;
o3.p6d = p6 >> 8;
o3.p6c = p6 >> 9;
o3.p6b = p6 >> 10;
o3.p6a = p6 >> 11;
o2.p6d = p6 >> 12;
o2.p6c = p6 >> 13;
o2.p6b = p6 >> 14;
o2.p6a = p6 >> 15;
o1.p6d = p6 >> 16;
o1.p6c = p6 >> 17;
o1.p6b = p6 >> 18;
o1.p6a = p6 >> 19;
o0.p6d = p6 >> 20;
o0.p6c = p6 >> 21;
o0.p6b = p6 >> 22;
o0.p6a = p6 >> 23;
uint32_t p7 = updatePixel(icPrev, icNext, i + LEDS_PER_STRIP * 0);
o5.p7d = p7;
o5.p7c = p7 >> 1;
o5.p7b = p7 >> 2;
o5.p7a = p7 >> 3;
o4.p7d = p7 >> 4;
o4.p7c = p7 >> 5;
o4.p7b = p7 >> 6;
o4.p7a = p7 >> 7;
o3.p7d = p7 >> 8;
o3.p7c = p7 >> 9;
o3.p7b = p7 >> 10;
o3.p7a = p7 >> 11;
o2.p7d = p7 >> 12;
o2.p7c = p7 >> 13;
o2.p7b = p7 >> 14;
o2.p7a = p7 >> 15;
o1.p7d = p7 >> 16;
o1.p7c = p7 >> 17;
o1.p7b = p7 >> 18;
o1.p7a = p7 >> 19;
o0.p7d = p7 >> 20;
o0.p7c = p7 >> 21;
o0.p7b = p7 >> 22;
o0.p7a = p7 >> 23;
*(out++) = o0.word;
*(out++) = o1.word;
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment