void
Choose64_4_cedrickMMX(unsigned __int64 *pDest )
{
unsigned
__int64 k[] = { 0x1LL, 0x8000000000000000LL,0xC000000000000000LL };
unsigned
__int64 *pSrc = k;
__asm
{
pusha
mov ESI, pSrc;
movq MM0, [ESI]
movq MM5, [ESI + 8]
movq MM6, [ESI + 16]
mov EDI, pDest
mov EAX, 64 – 3
movq MM4, MM0
loop1:
mov EBX, EAX
movq MM1, MM0
paddq MM1, MM1
paddq MM4, MM
loop2:
mov ECX, EBX
movq MM2, MM1
paddq MM2, MM2
paddq MM4, MM2
loop3:
mov EDX, ECX
movq MM3, MM2
paddq MM3, MM3
paddq MM4, MM3
loop4:
movntq [EDI], MM4
add EDI, 8
paddq MM4, MM3
paddq MM3, MM3
sub EDX, 1
jne loop4
paddq MM4, MM2
paddq MM2, MM2
sub ECX, 1
jne loop3
paddq MM4, MM5
paddq MM4, MM1
paddq MM1, MM1
sub EBX, 1
jne loop2
paddq MM4, MM6
paddq MM4, MM0
paddq MM0, MM0
sub EAX, 1
jne loop1
emms
sfence
popa
}
}