Founds

Fast memcpy implementation


void * dps_memcpy(void *dst0, const void *src0, size_t length) {

  if (length == 0 || dst0 == src0)		/* nothing to do */
    return dst0;
  if ((unsigned long)dst0 < (unsigned long)src0) { /* copy forward */
    register size_t n = (length + 7) / 8;
    register char *dst = dst0, *src = src0;
    switch( length % 8 ) {
    case 0:	do {	*dst++ = *src++;
    case 7:		*dst++ = *src++;
    case 6:		*dst++ = *src++;
    case 5:		*dst++ = *src++;
    case 4:		*dst++ = *src++;
    case 3:		*dst++ = *src++;
    case 2:		*dst++ = *src++;
    case 1:		*dst++ = *src++;
                    } while(--n > 0);
    }
  } else { /* copy backward */
    register size_t n = (length + 7) / 8;
    register char *dst = dst0 + length, *src = src0 + length;
    switch( length % 8 ) {
    case 0:	do {	*--dst = *--src;
    case 7:		*--dst = *--src;
    case 6:		*--dst = *--src;
    case 5:		*--dst = *--src;
    case 4:		*--dst = *--src;
    case 3:		*--dst = *--src;
    case 2:		*--dst = *--src;
    case 1:		*--dst = *--src;
                    } while(--n > 0);
    }
  }
  return dst0;
}

N.B.: Code is under GPL.

Addendum: A faster version of memcpy