Skip to content

Even faster strncpy

Below is a faster implementation of the strncpy function (previous version is here). The comparison of the old, the new and the standard implementation on the test:

test1: <new dps_strncpy>  3.00593
test2: <old dps_strncpy>  3.39416
test3: <standard strncpy> 5.06081
ratio(1/2): 0.89
ratio(1/3): 0.59
ratio(2/3): 0.67


The results are for FreeBSD 7.1 running on Intel Duo E8400 3MHz. So the new implementation is about 40% faster than the standard, and about 10% faster than previous version.

Please provide results for your OS and processor in comments, if you would test it.


void * dps_strncpy(char *dst0, char *src0, size_t length) {
  if (length) {
    register size_t n = (length + 7) / 8;
    register size_t r = (length % 8);
    register char *dst = dst0, *src = src0;
    if (r == 0) r = 8;
    if (!(dst[0] = src[0])) return dst0;
    if (r > 1) if (!(dst[1] = src[1])) return dst0;
    if (r > 2) if (!(dst[2] = src[2])) return dst0;
    if (r > 3) if (!(dst[3] = src[3])) return dst0;
    if (r > 4) if (!(dst[4] = src[4])) return dst0;
    if (r > 5) if (!(dst[5] = src[5])) return dst0;
    if (r > 6) if (!(dst[6] = src[6])) return dst0;
    if (r > 7) if (!(dst[7] = src[7])) return dst0;
    src += r; dst += r;
    while (--n > 0) {
      if (!(dst[0] = src[0])) break;
      if (!(dst[1] = src[1])) break;
      if (!(dst[2] = src[2])) break;
      if (!(dst[3] = src[3])) break;
      if (!(dst[4] = src[4])) break;
      if (!(dst[5] = src[5])) break;
      if (!(dst[6] = src[6])) break;
      if (!(dst[7] = src[7])) break;
      src += 8; dst += 8;
    }
  }
  return dst0;
}

NB: This record in Russian.

2 thoughts on “Even faster strncpy

  1. Maxime

    Corrected version that solves the problem of integer overflow for length values larger than MAX_SIZE - 1:

    
    void * dps_strncpy(char *dst0, char *src0, size_t length) {
      if (length) {
        register size_t n = length / 8;
        register size_t r = (length % 8);
        register char *dst = dst0, *src = src0;
        if (r == 0) r = 8; else n++;
        if (!(dst[0] = src[0])) return dst0;
        if (r > 1) if (!(dst[1] = src[1])) return dst0;
        if (r > 2) if (!(dst[2] = src[2])) return dst0;
        if (r > 3) if (!(dst[3] = src[3])) return dst0;
        if (r > 4) if (!(dst[4] = src[4])) return dst0;
        if (r > 5) if (!(dst[5] = src[5])) return dst0;
        if (r > 6) if (!(dst[6] = src[6])) return dst0;
        if (r > 7) if (!(dst[7] = src[7])) return dst0;
        src += r; dst += r;
        while (--n > 0) {
          if (!(dst[0] = src[0])) break;
          if (!(dst[1] = src[1])) break;
          if (!(dst[2] = src[2])) break;
          if (!(dst[3] = src[3])) break;
          if (!(dst[4] = src[4])) break;
          if (!(dst[5] = src[5])) break;
          if (!(dst[6] = src[6])) break;
          if (!(dst[7] = src[7])) break;
          src += 8; dst += 8;
        }
      }
      return dst0;
    }
    
  2. Pingback: Even faster strncpy, fixed | Founds

Leave a Reply

Your email address will not be published.