Below is a faster implementation of the strncpy function (previous version is here). The comparison of the old, the new and the standard implementation on the test:
test1: <new dps_strncpy> 3.00593 test2: <old dps_strncpy> 3.39416 test3: <standard strncpy> 5.06081 ratio(1/2): 0.89 ratio(1/3): 0.59 ratio(2/3): 0.67
The results are for FreeBSD 7.1 running on Intel Duo E8400 3MHz. So the new implementation is about 40% faster than the standard, and about 10% faster than previous version.
Please provide results for your OS and processor in comments, if you would test it.
void * dps_strncpy(char *dst0, char *src0, size_t length) {
if (length) {
register size_t n = (length + 7) / 8;
register size_t r = (length % 8);
register char *dst = dst0, *src = src0;
if (r == 0) r = 8;
if (!(dst[0] = src[0])) return dst0;
if (r > 1) if (!(dst[1] = src[1])) return dst0;
if (r > 2) if (!(dst[2] = src[2])) return dst0;
if (r > 3) if (!(dst[3] = src[3])) return dst0;
if (r > 4) if (!(dst[4] = src[4])) return dst0;
if (r > 5) if (!(dst[5] = src[5])) return dst0;
if (r > 6) if (!(dst[6] = src[6])) return dst0;
if (r > 7) if (!(dst[7] = src[7])) return dst0;
src += r; dst += r;
while (--n > 0) {
if (!(dst[0] = src[0])) break;
if (!(dst[1] = src[1])) break;
if (!(dst[2] = src[2])) break;
if (!(dst[3] = src[3])) break;
if (!(dst[4] = src[4])) break;
if (!(dst[5] = src[5])) break;
if (!(dst[6] = src[6])) break;
if (!(dst[7] = src[7])) break;
src += 8; dst += 8;
}
}
return dst0;
}