<?xml version="1.0" encoding="UTF-8"?><rss version="2.0"
	xmlns:content="http://purl.org/rss/1.0/modules/content/"
	xmlns:dc="http://purl.org/dc/elements/1.1/"
	xmlns:atom="http://www.w3.org/2005/Atom"
	xmlns:sy="http://purl.org/rss/1.0/modules/syndication/"
		>
<channel>
	<title>Comments on: Even faster memcpy</title>
	<atom:link href="http://blog.dataparksearch.org/208/feed" rel="self" type="application/rss+xml" />
	<link>http://blog.dataparksearch.org/208</link>
	<description>Just DataparkSearch weblog</description>
	<lastBuildDate>Sun, 01 Aug 2010 16:15:20 +0000</lastBuildDate>
	<sy:updatePeriod>hourly</sy:updatePeriod>
	<sy:updateFrequency>1</sy:updateFrequency>
	<generator>http://wordpress.org/?v=3.0.1</generator>
	<item>
		<title>By: wsrc</title>
		<link>http://blog.dataparksearch.org/208/comment-page-1#comment-21802</link>
		<dc:creator>wsrc</dc:creator>
		<pubDate>Mon, 05 Apr 2010 14:57:21 +0000</pubDate>
		<guid isPermaLink="false">http://blog.dataparksearch.org/?p=208#comment-21802</guid>
		<description>[...]     Even faster memcpy &#124; FoundsBelow is a faster implementation of the memcpy function (previous version is here). The comparison [...]</description>
		<content:encoded><![CDATA[<p>[...]     Even faster memcpy | FoundsBelow is a faster implementation of the memcpy function (previous version is here). The comparison [...]</p>
]]></content:encoded>
	</item>
	<item>
		<title>By: Maxime</title>
		<link>http://blog.dataparksearch.org/208/comment-page-1#comment-18969</link>
		<dc:creator>Maxime</dc:creator>
		<pubDate>Sat, 16 Jan 2010 22:52:34 +0000</pubDate>
		<guid isPermaLink="false">http://blog.dataparksearch.org/?p=208#comment-18969</guid>
		<description>Corrected version that solves the problem of integer overflow for length values larger than MAX_SIZE – 1:
&lt;pre&gt;&lt;code&gt;
typedef long long word;  // up to 32 bytes long
#define wsize sizeof(word)
#define wmask (wsize - 1)

void dps_minimove_forward(char *dst, const char *src, size_t t) {
	if (t) { dst[0] = src[0]; 
	if (t &gt; 1) { dst[1] = src[1]; 
	if (t &gt; 2) { dst[2] = src[2]; 
	if (t &gt; 3) { dst[3] = src[3]; 
	if (t &gt; 4) { dst[4] = src[4]; 
	if (t &gt; 5) { dst[5] = src[5]; 
	if (t &gt; 6) { dst[6] = src[6]; 
	if (t &gt; 7) { dst[7] = src[7]; 
	if (t &gt; 8 ) { dst[8] = src[8]; 
	if (t &gt; 9) { dst[9] = src[9]; 
	if (t &gt; 10) { dst[10] = src[10]; 
	if (t &gt; 11) { dst[11] = src[11]; 
	if (t &gt; 12) { dst[12] = src[12]; 
	if (t &gt; 13) { dst[13] = src[13]; 
	if (t &gt; 14) { dst[14] = src[14]; 
	if (t &gt; 15) { dst[15] = src[15]; 
	if (t &gt; 16) { dst[16] = src[16]; 
	if (t &gt; 17) { dst[17] = src[17]; 
	if (t &gt; 18) { dst[18] = src[18]; 
	if (t &gt; 19) { dst[19] = src[19]; 
	if (t &gt; 20) { dst[20] = src[20]; 
	if (t &gt; 21) { dst[21] = src[21]; 
	if (t &gt; 22) { dst[22] = src[22]; 
	if (t &gt; 23) { dst[23] = src[23]; 
	if (t &gt; 24) { dst[24] = src[24]; 
	if (t &gt; 25) { dst[25] = src[25]; 
	if (t &gt; 26) { dst[26] = src[26]; 
	if (t &gt; 27) { dst[27] = src[27]; 
	if (t &gt; 28) { dst[28] = src[28]; 
	if (t &gt; 29) { dst[29] = src[29]; 
	if (t &gt; 30) { dst[30] = src[30]; 
	}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
}

void dps_minimove_backward(char *dst, char *src, size_t t) {
	switch(t) {
	case 31: dst[30] = src[30];
	case 30: dst[29] = src[29];
	case 29: dst[28] = src[28];
	case 28: dst[27] = src[27];
	case 27: dst[26] = src[26];
	case 26: dst[25] = src[25];
	case 25: dst[24] = src[24];
	case 24: dst[23] = src[23];
	case 23: dst[22] = src[22];
	case 22: dst[21] = src[21];
	case 21: dst[20] = src[20];
	case 20: dst[19] = src[19];
	case 19: dst[18] = src[18];
	case 18: dst[17] = src[17];
	case 17: dst[16] = src[16];
	case 16: dst[15] = src[15];
	case 15: dst[14] = src[14];
	case 14: dst[13] = src[13];
	case 13: dst[12] = src[12];
	case 12: dst[11] = src[11];
	case 11: dst[10] = src[10];
	case 10: dst[9] = src[9];
	case 9: dst[8] = src[8];
	case 8: dst[7] = src[7];
	case 7: dst[6] = src[6];
	case 6: dst[5] = src[5];
	case 5: dst[4] = src[4];
	case 4: dst[3] = src[3];
	case 3: dst[2] = src[2];
	case 2: dst[1] = src[1];
	case 1: dst[0] = src[0];
	}
}

void * dps_memcpy_new(char *dst0, char *src0, size_t length) {
	size_t t;

  if (length == 0 &#124;&#124; dst0 == src0)		/* nothing to do */
    return dst0;
  if ((unsigned long long)dst0 &lt; (unsigned long long)src0) { /* copy forward */
    register char *dst = dst0, *src = src0;
    t = (unsigned int)src &amp; wmask;
    if (t) {
    	if (length &lt; wsize) {
		t = length;
    	} else {
		t = wsize - t;
    	}
	dps_minimove_forward(dst, src, t);
	length -= t;
	src += t; dst += t;
     }
     t = length / wsize;
     if (t) {
	register size_t n = t / 8;
    	register size_t r = (t % 8);
	register word *wdst = (word*)dst, *wsrc = (word*)src;
    	if (r == 0) r = 8; else n++;
    	wdst[0] = wsrc[0];
    	if (r &gt; 1) { wdst[1] = wsrc[1];
    	if (r &gt; 2) { wdst[2] = wsrc[2];
    	if (r &gt; 3) { wdst[3] = wsrc[3];
    	if (r &gt; 4) { wdst[4] = wsrc[4];
    	if (r &gt; 5) { wdst[5] = wsrc[5];
    	if (r &gt; 6) { wdst[6] = wsrc[6];
    	if (r &gt; 7) { wdst[7] = wsrc[7];
	}}}}}}}
    	wsrc += r; wdst += r;
    	while (--n &gt; 0) {
    		wdst[0] = wsrc[0];
    		wdst[1] = wsrc[1];
    		wdst[2] = wsrc[2];
    		wdst[3] = wsrc[3];
    		wdst[4] = wsrc[4];
    		wdst[5] = wsrc[5];
    		wdst[6] = wsrc[6];
    		wdst[7] = wsrc[7];
    		wsrc += 8; wdst += 8;
    	}
 	dst = (char*)wdst; src = (char *)wsrc;
    }
    if ( (t = (length &amp; wmask)) ) dps_minimove_forward(dst, src, t);

  } else { /* copy backward */
    register char *dst = dst0 + length, *src = src0 + length;
    t = (unsigned int)src &amp; wmask;
    if (t) {
	if (length &lt; wsize) {
		t = length;
	}
	dst -= t; src -= t;
	length -= t;
	dps_minimove_backward(dst, src, t);
    }
    t = length / wsize;
    if (t) {
    	register size_t n = t / 8;
    	register size_t r = (t % 8);
	register word *wdst = (word*)dst, *wsrc = (word*)src;
    	if (r == 0) r = 8; else n++;
	wsrc -= r; wdst -= r;
	switch(r) {
	case 8:wdst[7] = wsrc[7];
	case 7:wdst[6] = wsrc[6];
	case 6:wdst[5] = wsrc[5];
	case 5:wdst[4] = wsrc[4];
	case 4:wdst[3] = wsrc[3];
	case 3:wdst[2] = wsrc[2];
	case 2:wdst[1] = wsrc[1];
	case 1:wdst[0] = wsrc[0];
	}
     	while (--n &gt; 0) {
		wsrc -= 8; wdst -= 8;
		wdst[7] = wsrc[7];
		wdst[6] = wsrc[6];
		wdst[5] = wsrc[5];
		wdst[4] = wsrc[4];
		wdst[3] = wsrc[3];
		wdst[2] = wsrc[2];
		wdst[1] = wsrc[1];
		wdst[0] = wsrc[0];
	}
	dst = (char*)wdst; src = (char*)wsrc;
    }
    t = length &amp; wmask;
    dps_minimove_backward(dst - t, src - t, t);
 }
  return dst0;
}
&lt;/code&gt;&lt;/pre&gt;</description>
		<content:encoded><![CDATA[<p>Corrected version that solves the problem of integer overflow for length values larger than MAX_SIZE – 1:</p>
<pre><code>
typedef long long word;  // up to 32 bytes long
#define wsize sizeof(word)
#define wmask (wsize - 1)

void dps_minimove_forward(char *dst, const char *src, size_t t) {
	if (t) { dst[0] = src[0];
	if (t &gt; 1) { dst[1] = src[1];
	if (t &gt; 2) { dst[2] = src[2];
	if (t &gt; 3) { dst[3] = src[3];
	if (t &gt; 4) { dst[4] = src[4];
	if (t &gt; 5) { dst[5] = src[5];
	if (t &gt; 6) { dst[6] = src[6];
	if (t &gt; 7) { dst[7] = src[7];
	if (t &gt; 8 ) { dst[8] = src[8];
	if (t &gt; 9) { dst[9] = src[9];
	if (t &gt; 10) { dst[10] = src[10];
	if (t &gt; 11) { dst[11] = src[11];
	if (t &gt; 12) { dst[12] = src[12];
	if (t &gt; 13) { dst[13] = src[13];
	if (t &gt; 14) { dst[14] = src[14];
	if (t &gt; 15) { dst[15] = src[15];
	if (t &gt; 16) { dst[16] = src[16];
	if (t &gt; 17) { dst[17] = src[17];
	if (t &gt; 18) { dst[18] = src[18];
	if (t &gt; 19) { dst[19] = src[19];
	if (t &gt; 20) { dst[20] = src[20];
	if (t &gt; 21) { dst[21] = src[21];
	if (t &gt; 22) { dst[22] = src[22];
	if (t &gt; 23) { dst[23] = src[23];
	if (t &gt; 24) { dst[24] = src[24];
	if (t &gt; 25) { dst[25] = src[25];
	if (t &gt; 26) { dst[26] = src[26];
	if (t &gt; 27) { dst[27] = src[27];
	if (t &gt; 28) { dst[28] = src[28];
	if (t &gt; 29) { dst[29] = src[29];
	if (t &gt; 30) { dst[30] = src[30];
	}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
}

void dps_minimove_backward(char *dst, char *src, size_t t) {
	switch(t) {
	case 31: dst[30] = src[30];
	case 30: dst[29] = src[29];
	case 29: dst[28] = src[28];
	case 28: dst[27] = src[27];
	case 27: dst[26] = src[26];
	case 26: dst[25] = src[25];
	case 25: dst[24] = src[24];
	case 24: dst[23] = src[23];
	case 23: dst[22] = src[22];
	case 22: dst[21] = src[21];
	case 21: dst[20] = src[20];
	case 20: dst[19] = src[19];
	case 19: dst[18] = src[18];
	case 18: dst[17] = src[17];
	case 17: dst[16] = src[16];
	case 16: dst[15] = src[15];
	case 15: dst[14] = src[14];
	case 14: dst[13] = src[13];
	case 13: dst[12] = src[12];
	case 12: dst[11] = src[11];
	case 11: dst[10] = src[10];
	case 10: dst[9] = src[9];
	case 9: dst[8] = src[8];
	case 8: dst[7] = src[7];
	case 7: dst[6] = src[6];
	case 6: dst[5] = src[5];
	case 5: dst[4] = src[4];
	case 4: dst[3] = src[3];
	case 3: dst[2] = src[2];
	case 2: dst[1] = src[1];
	case 1: dst[0] = src[0];
	}
}

void * dps_memcpy_new(char *dst0, char *src0, size_t length) {
	size_t t;

  if (length == 0 || dst0 == src0)		/* nothing to do */
    return dst0;
  if ((unsigned long long)dst0 &lt; (unsigned long long)src0) { /* copy forward */
    register char *dst = dst0, *src = src0;
    t = (unsigned int)src & wmask;
    if (t) {
    	if (length &lt; wsize) {
		t = length;
    	} else {
		t = wsize - t;
    	}
	dps_minimove_forward(dst, src, t);
	length -= t;
	src += t; dst += t;
     }
     t = length / wsize;
     if (t) {
	register size_t n = t / 8;
    	register size_t r = (t % 8);
	register word *wdst = (word*)dst, *wsrc = (word*)src;
    	if (r == 0) r = 8; else n++;
    	wdst[0] = wsrc[0];
    	if (r &gt; 1) { wdst[1] = wsrc[1];
    	if (r &gt; 2) { wdst[2] = wsrc[2];
    	if (r &gt; 3) { wdst[3] = wsrc[3];
    	if (r &gt; 4) { wdst[4] = wsrc[4];
    	if (r &gt; 5) { wdst[5] = wsrc[5];
    	if (r &gt; 6) { wdst[6] = wsrc[6];
    	if (r &gt; 7) { wdst[7] = wsrc[7];
	}}}}}}}
    	wsrc += r; wdst += r;
    	while (--n &gt; 0) {
    		wdst[0] = wsrc[0];
    		wdst[1] = wsrc[1];
    		wdst[2] = wsrc[2];
    		wdst[3] = wsrc[3];
    		wdst[4] = wsrc[4];
    		wdst[5] = wsrc[5];
    		wdst[6] = wsrc[6];
    		wdst[7] = wsrc[7];
    		wsrc += 8; wdst += 8;
    	}
 	dst = (char*)wdst; src = (char *)wsrc;
    }
    if ( (t = (length &#038; wmask)) ) dps_minimove_forward(dst, src, t);

  } else { /* copy backward */
    register char *dst = dst0 + length, *src = src0 + length;
    t = (unsigned int)src & wmask;
    if (t) {
	if (length &lt; wsize) {
		t = length;
	}
	dst -= t; src -= t;
	length -= t;
	dps_minimove_backward(dst, src, t);
    }
    t = length / wsize;
    if (t) {
    	register size_t n = t / 8;
    	register size_t r = (t % 8);
	register word *wdst = (word*)dst, *wsrc = (word*)src;
    	if (r == 0) r = 8; else n++;
	wsrc -= r; wdst -= r;
	switch(r) {
	case 8:wdst[7] = wsrc[7];
	case 7:wdst[6] = wsrc[6];
	case 6:wdst[5] = wsrc[5];
	case 5:wdst[4] = wsrc[4];
	case 4:wdst[3] = wsrc[3];
	case 3:wdst[2] = wsrc[2];
	case 2:wdst[1] = wsrc[1];
	case 1:wdst[0] = wsrc[0];
	}
     	while (--n &gt; 0) {
		wsrc -= 8; wdst -= 8;
		wdst[7] = wsrc[7];
		wdst[6] = wsrc[6];
		wdst[5] = wsrc[5];
		wdst[4] = wsrc[4];
		wdst[3] = wsrc[3];
		wdst[2] = wsrc[2];
		wdst[1] = wsrc[1];
		wdst[0] = wsrc[0];
	}
	dst = (char*)wdst; src = (char*)wsrc;
    }
    t = length & wmask;
    dps_minimove_backward(dst - t, src - t, t);
 }
  return dst0;
}
</code></pre>
]]></content:encoded>
	</item>
</channel>
</rss>
