As continue of bubbles, caches and predictors of transitions, a new version of Bubble sort à la Duff's device:
for (i = 1; i < N; i++) {
n = (i + 7) / 8;
s = i % 8;
src = a + (n - 1) * 8;
switch(s) {
case 0: do { if (src[8] < src[7]) swap(src[8], src[7]);
case 7: if (src[7] < src[6]) swap(src[7], src[6]);
case 6: if (src[6] < src[5]) swap(src[6], src[5]);
case 5: if (src[5] < src[4]) swap(src[5], src[4]);
case 4: if (src[4] < src[3]) swap(src[4], src[3]);
case 3: if (src[3] < src[2]) swap(src[3], src[2]);
case 2: if (src[2] < src[1]) swap(src[2], src[1]);
case 1: if (src[1] < src[0]) swap(src[1], src[0]);
src -= 8;
} while(--n > 0);
}
}
Updated code: bubble-duff.zip