Skip to content

Even faster strncpy, fixed

Even faster strncpy has a caveat of nonpadding by 0's if the length of source string is less than the length specified by a function parameter. The version below has this issue fixed, but it's still faster than standard version of strncpy, if the code is compiled with optimization on a modern processor.


typedef long long word;  /* up to 32 bytes long */
#define wsize sizeof(word)
#define wmask (wsize - 1)

inline void dps_minibzero(char *dst, size_t t) {
	if (t) { dst[0] = '\0'; 
	if (t > 1) { dst[1] = '\0'; 
	if (t > 2) { dst[2] = '\0'; 
	if (t > 3) { dst[3] = '\0'; 
	if (t > 4) { dst[4] = '\0'; 
	if (t > 5) { dst[5] = '\0'; 
	if (t > 6) { dst[6] = '\0'; 
	if (t > 7) { dst[7] = '\0'; 
	if (t > 8 ) { dst[8] = '\0'; 
	if (t > 9) { dst[9] = '\0'; 
	if (t > 10) { dst[10] = '\0'; 
	if (t > 11) { dst[11] = '\0'; 
	if (t > 12) { dst[12] = '\0'; 
	if (t > 13) { dst[13] = '\0'; 
	if (t > 14) { dst[14] = '\0'; 
	if (t > 15) { dst[15] = '\0'; 
	if (t > 16) { dst[16] = '\0'; 
	if (t > 17) { dst[17] = '\0'; 
	if (t > 18) { dst[18] = '\0'; 
	if (t > 19) { dst[19] = '\0'; 
	if (t > 20) { dst[20] = '\0'; 
	if (t > 21) { dst[21] = '\0'; 
	if (t > 22) { dst[22] = '\0'; 
	if (t > 23) { dst[23] = '\0'; 
	if (t > 24) { dst[24] = '\0'; 
	if (t > 25) { dst[25] = '\0'; 
	if (t > 26) { dst[26] = '\0'; 
	if (t > 27) { dst[27] = '\0'; 
	if (t > 28) { dst[28] = '\0'; 
	if (t > 29) { dst[29] = '\0'; 
	if (t > 30) { dst[30] = '\0'; 
	}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
}


void * dps_strncpy(char *dst0, char *src0, size_t length) {
  if (length) {
    register size_t n = length / 8;
    register size_t r = (length % 8);
    register char *dst = dst0, *src = src0;
    if (r == 0) r = 8; else n++;
    if (!(dst[0] = src[0])) { dst++; src++; goto dps_strncpy_second_pas; }
    if (r > 1) { if (!(dst[1] = src[1])) { dst += 2; src += 2; goto dps_strncpy_second_pas; }
    if (r > 2) { if (!(dst[2] = src[2])) { dst += 3; src += 3; goto dps_strncpy_second_pas; }
    if (r > 3) { if (!(dst[3] = src[3])) { dst += 4; src += 4; goto dps_strncpy_second_pas; }
    if (r > 4) { if (!(dst[4] = src[4])) { dst += 5; src += 5; goto dps_strncpy_second_pas; }
    if (r > 5) { if (!(dst[5] = src[5])) { dst += 6; src += 6; goto dps_strncpy_second_pas; }
    if (r > 6) { if (!(dst[6] = src[6])) { dst += 7; src += 7; goto dps_strncpy_second_pas; }
    if (r > 7) { if (!(dst[7] = src[7])) { dst += 8; src += 8; goto dps_strncpy_second_pas; }
    }}}}}}}
    src += r; dst += r;
    while (--n > 0) {
      if (!(dst[0] = src[0])) { dst++; src++; goto dps_strncpy_second_pas; }
      if (!(dst[1] = src[1])) { dst += 2; src += 2; goto dps_strncpy_second_pas; }
      if (!(dst[2] = src[2])) { dst += 3; src += 3; goto dps_strncpy_second_pas; }
      if (!(dst[3] = src[3])) { dst += 4; src += 4; goto dps_strncpy_second_pas; }
      if (!(dst[4] = src[4])) { dst += 5; src += 5; goto dps_strncpy_second_pas; }
      if (!(dst[5] = src[5])) { dst += 6; src += 6; goto dps_strncpy_second_pas; }
      if (!(dst[6] = src[6])) { dst += 7; src += 7; goto dps_strncpy_second_pas; }
      if (!(dst[7] = src[7])) { dst += 8; src += 8; goto dps_strncpy_second_pas; }
      src += 8; dst += 8;
    }
dps_strncpy_second_pas:
    if (dst < dst0 + length) {
      size_t t, restlen = length - (dst - dst0);
      t = (unsigned int)dst & wmask;
      if (t) {
    	if (restlen < wsize) {
		t = restlen;
    	} else {
		t = wsize - t;
    	}
	bzero(dst, t);
	dps_minibzero(dst, t);
	restlen -= t;
	dst += t;
      }
      t = restlen / wsize;
      if (t) {
	n = t / 8;
    	r = (t % 8 );
	register word *wdst = (word*)dst;
    	if (r == 0) r = 8; else n++;
    	wdst[0] = (word)0;
    	if (r > 1) { wdst[1] = (word)0;
    	if (r > 2) { wdst[2] = (word)0;
    	if (r > 3) { wdst[3] = (word)0;
    	if (r > 4) { wdst[4] = (word)0;
    	if (r > 5) { wdst[5] = (word)0;
    	if (r > 6) { wdst[6] = (word)0;
    	if (r > 7) { wdst[7] = (word)0;
	}}}}}}}
    	wdst += r;
    	while (--n > 0) {
    		wdst[0] = (word)0;
    		wdst[1] = (word)0;
    		wdst[2] = (word)0;
    		wdst[3] = (word)0;
    		wdst[4] = (word)0;
    		wdst[5] = (word)0;
    		wdst[6] = (word)0;
    		wdst[7] = (word)0;
    		wdst += 8;
    	}
 	dst = (char*)wdst;
      }
      if ( (t = (restlen & wmask)) ) dps_minibzero(dst, t);
    }
  }
  return dst0;
}

Leave a Reply

Your email address will not be published.