diff --git a/testjig/production/arm_debug.cpp b/testjig/production/arm_debug.cpp
index 144b040759cba6263c4cfc7d12240621eb2f82da..bb6ea320d436dfa44e77a9b85bd41d575b235db3 100644
--- a/testjig/production/arm_debug.cpp
+++ b/testjig/production/arm_debug.cpp
@@ -31,6 +31,7 @@ bool ARMDebug::begin(unsigned clockPin, unsigned dataPin, LogLevel logLevel)
     this->clockPin = clockPin;
     this->dataPin = dataPin;
     this->logLevel = logLevel;
+    fastPins = dataPin == ARMDEBUG_FAST_DATA_PIN && clockPin == ARMDEBUG_FAST_CLOCK_PIN;
     pinMode(clockPin, OUTPUT);
     pinMode(dataPin, INPUT_PULLUP);
 
@@ -573,11 +574,25 @@ void ARMDebug::wireWrite(uint32_t data, unsigned nBits)
 {
     log(LOG_TRACE_SWD, "SWD Write %08x (%d)", data, nBits);
 
-    while (nBits--) {
-        digitalWrite(dataPin, data & 1);
-        data >>= 1;
-        digitalWrite(clockPin, LOW);
-        digitalWrite(clockPin, HIGH);
+    if (fastPins) {
+        // Fast path
+
+        while (nBits--) {
+            digitalWriteFast(ARMDEBUG_FAST_DATA_PIN, data & 1);
+            digitalWriteFast(ARMDEBUG_FAST_CLOCK_PIN, LOW);
+            data >>= 1;
+            digitalWriteFast(ARMDEBUG_FAST_CLOCK_PIN, HIGH);
+        }
+
+    } else {
+        // Slow (generic) path
+
+        while (nBits--) {
+            digitalWrite(dataPin, data & 1);
+            digitalWrite(clockPin, LOW);
+            data >>= 1;
+            digitalWrite(clockPin, HIGH);
+        }
     }
 }
 
@@ -593,13 +608,29 @@ uint32_t ARMDebug::wireRead(unsigned nBits)
     uint32_t mask = 1;
     unsigned count = nBits;
 
-    while (count--) {
-        if (digitalRead(dataPin)) {
-            result |= mask;
+    if (fastPins) {
+        // Fast path
+
+        while (count--) {
+            if (digitalReadFast(ARMDEBUG_FAST_DATA_PIN)) {
+                result |= mask;
+            }
+            digitalWriteFast(ARMDEBUG_FAST_CLOCK_PIN, LOW);
+            mask <<= 1;
+            digitalWriteFast(ARMDEBUG_FAST_CLOCK_PIN, HIGH);
+        }
+
+    } else {
+        // Slow (generic) path
+
+        while (count--) {
+            if (digitalRead(dataPin)) {
+                result |= mask;
+            }
+            digitalWrite(clockPin, LOW);
+            mask <<= 1;
+            digitalWrite(clockPin, HIGH);
         }
-        mask <<= 1;
-        digitalWrite(clockPin, LOW);
-        digitalWrite(clockPin, HIGH);
     }
 
     log(LOG_TRACE_SWD, "SWD Read  %08x (%d)", result, nBits);
@@ -610,21 +641,46 @@ void ARMDebug::wireWriteTurnaround()
 {
     log(LOG_TRACE_SWD, "SWD Write trn");
 
-    digitalWrite(dataPin, HIGH);
-    pinMode(dataPin, INPUT_PULLUP);
-    digitalWrite(clockPin, LOW);
-    digitalWrite(clockPin, HIGH);
-    pinMode(dataPin, OUTPUT);
+    if (fastPins) {
+        // Fast path
+
+        digitalWriteFast(ARMDEBUG_FAST_DATA_PIN, HIGH);
+        pinMode(ARMDEBUG_FAST_DATA_PIN, INPUT_PULLUP);
+        digitalWriteFast(ARMDEBUG_FAST_CLOCK_PIN, LOW);
+        digitalWriteFast(ARMDEBUG_FAST_CLOCK_PIN, HIGH);
+        pinMode(ARMDEBUG_FAST_DATA_PIN, OUTPUT);
+
+    } else {
+        // Slow (generic) path
+
+        digitalWrite(dataPin, HIGH);
+        pinMode(dataPin, INPUT_PULLUP);
+        digitalWrite(clockPin, LOW);
+        digitalWrite(clockPin, HIGH);
+        pinMode(dataPin, OUTPUT);
+    }
 }
 
 void ARMDebug::wireReadTurnaround()
 {
     log(LOG_TRACE_SWD, "SWD Read  trn");
 
-    digitalWrite(dataPin, HIGH);
-    pinMode(dataPin, INPUT_PULLUP);
-    digitalWrite(clockPin, LOW);
-    digitalWrite(clockPin, HIGH);
+    if (fastPins) {
+        // Fast path
+
+        digitalWriteFast(ARMDEBUG_FAST_DATA_PIN, HIGH);
+        pinMode(ARMDEBUG_FAST_DATA_PIN, INPUT_PULLUP);
+        digitalWriteFast(ARMDEBUG_FAST_CLOCK_PIN, LOW);
+        digitalWriteFast(ARMDEBUG_FAST_CLOCK_PIN, HIGH);
+
+    } else {
+        // Slow (generic) path
+
+        digitalWrite(dataPin, HIGH);
+        pinMode(dataPin, INPUT_PULLUP);
+        digitalWrite(clockPin, LOW);
+        digitalWrite(clockPin, HIGH);
+    }
 }
 
 void ARMDebug::log(int level, const char *fmt, ...)
diff --git a/testjig/production/arm_debug.h b/testjig/production/arm_debug.h
index 8dcaeeae5dd870dfb62930f1988c2e75694c4595..63299b75385d76cea4019ed458de989bf1e01299 100644
--- a/testjig/production/arm_debug.h
+++ b/testjig/production/arm_debug.h
@@ -25,6 +25,16 @@
 #include <stdint.h>
 #include <stdbool.h>
 
+/*
+ * Compile-time optimization for a particular clock and data pin.
+ *
+ * Any clock/data pin will work, and we can support multiple debug interfaces,
+ * but this particular set will be faster due to compile-time inlining of
+ * bit-banging code.
+ */
+#define ARMDEBUG_FAST_CLOCK_PIN     3
+#define ARMDEBUG_FAST_DATA_PIN      4
+
 
 class ARMDebug
 {
@@ -74,7 +84,7 @@ public:
     void setLogLevel(LogLevel newLevel, LogLevel &prevLevel);
 
 private:
-    uint8_t clockPin, dataPin;
+    uint8_t clockPin, dataPin, fastPins;
     LogLevel logLevel;
 
     // Cached versions of ARM debug registers
diff --git a/testjig/production/arm_kinetis_debug.cpp b/testjig/production/arm_kinetis_debug.cpp
index 7baef85f5df0c9934084609f4dc051173152df52..8198ce384e07beb6b1e3b62d2fb0a591027ce74f 100644
--- a/testjig/production/arm_kinetis_debug.cpp
+++ b/testjig/production/arm_kinetis_debug.cpp
@@ -117,13 +117,12 @@ bool ARMKinetisDebug::debugHalt()
         setLogLevel(savedLogLevel);
     }
 
-    if (haltRetries) {
-        log(LOG_NORMAL, "CPU halt successful. Now in debug mode.");
-        return true;
+    if (!haltRetries) {
+        log(LOG_ERROR, "ARMKinetisDebug: Failed to put CPU in debug halt state. (DHCSR: %08x)", dhcsr);
+        return false;
     }
 
-    log(LOG_ERROR, "ARMKinetisDebug: Failed to put CPU in debug halt state. (DHCSR: %08x)", dhcsr);
-    return false;
+    return true;
 }
 
 bool ARMKinetisDebug::peripheralInit()
@@ -332,21 +331,64 @@ bool ARMKinetisDebug::flashEraseAndProgram(const uint32_t *image, unsigned numSe
     if (!flashMassErase())
         return false;
 
+    // Reset again after mass erase, for new protection bits to take effect
+    if (!reset())
+        return false;
+    if (!debugHalt())
+        return false;
+
     uint32_t address = 0;
-    while (numSectors) {
+    uint32_t count = numSectors;
+    const uint32_t *ptr = image;
 
-        log(LOG_NORMAL, "FLASH: Programming at %08x, %d sectors left", address, numSectors);
+    while (count) {
+        log(LOG_NORMAL, "FLASH: Programming sector at %08x", address);
 
-        if (!flashSectorBufferWrite(0, image, FLASH_SECTOR_SIZE/4))
+        if (!flashSectorBufferWrite(0, ptr, FLASH_SECTOR_SIZE/4))
             return false;
         if (!flashSectorProgram(address))
             return false;
 
-        numSectors--;
+        count--;
+        address += FLASH_SECTOR_SIZE;
+        ptr += FLASH_SECTOR_SIZE/4;
+    }
+
+    // Another reset! Load new protection flags.
+    if (!reset())
+        return false;
+    if (!debugHalt())
+        return false;
+
+    // Verify flash memory
+
+    uint32_t buffer[FLASH_SECTOR_SIZE/4];
+    address = 0;
+    count = numSectors;
+    ptr = image;
+
+    while (count) {
+        log(LOG_NORMAL, "FLASH: Verifying sector at %08x", address);
+
+        if (!memLoad(address, buffer, FLASH_SECTOR_SIZE/4))
+            return false;
+
+        bool okay = true;
+        for (unsigned i = 0; i < FLASH_SECTOR_SIZE/4; i++) {
+            if (buffer[i] != ptr[i]) {
+                log(LOG_ERROR, "FLASH: Verify error at %08x. Expected %08x, actual %08x",
+                    address + i*4, ptr[i], buffer[i]);
+                okay = false;
+            }
+        }
+
+        if (!okay)
+            return false;
+        count--;
         address += FLASH_SECTOR_SIZE;
-        image += FLASH_SECTOR_SIZE/4;
+        ptr += FLASH_SECTOR_SIZE/4;
     }
 
-    log(LOG_NORMAL, "FLASH: Programming complete!");
+    log(LOG_NORMAL, "FLASH: Programming successful!");
     return true;
 }
diff --git a/testjig/production/production.ino b/testjig/production/production.ino
index 7ea262c71ecc15a94c51727875a5bdac7b58aeb3..29386a4efaf4138156fdcf56646cd58a7f2d7f5e 100644
--- a/testjig/production/production.ino
+++ b/testjig/production/production.ino
@@ -48,9 +48,6 @@ void loop()
     if (!target.flashEraseAndProgram(firmwareData, firmwareSectorCount))
         return;
 
-    // Hex dump programmed firmware a little
-    target.hexDump(0, 128);
-
     /*
      * Try blinking an LED on the target!
      */