unit FastcodeIsPrimeUnit; (* ***** BEGIN LICENSE BLOCK ***** * Version: MPL 1.1 * * The contents of this file are subject to the Mozilla Public License Version * 1.1 (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * http://www.mozilla.org/MPL/ * * Software distributed under the License is distributed on an "AS IS" basis, * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License * for the specific language governing rights and limitations under the * License. * * The Original Code is Fastcode * * The Initial Developer of the Original Code is Fastcode * * Portions created by the Initial Developer are Copyright (C) 2002-2004 * the Initial Developer. All Rights Reserved. * * Contributor(s): Hagen Reddmann, John O'Harrow, Dennis Christensen * * ***** END LICENSE BLOCK ***** *) //Version : 1.0 //Only direct calling supported //Last update 26/2 2005 interface function IsPrimeFastcodeP4P(Number : Cardinal) : Boolean; function IsPrimeFastcodeP4N(Number : Cardinal) : Boolean; function IsPrimeFastcodePMB(Number : Cardinal) : Boolean; function IsPrimeFastcodeAMD64(Number : Cardinal) : Boolean; function IsPrimeFastcodeXP(Number : Cardinal) : Boolean; function IsPrimeFastcodeBlended(Number : Cardinal) : Boolean; function IsPrimeFastcodeRTL(Number : Cardinal) : Boolean; function IsPrimeFastcodePascal(Number : Cardinal) : Boolean; implementation uses Math; {$DEFINE SmallLookup} {$IFDEF SmallLookup} // Tablesize = 160 bytes const MaxPrime = 137; MaxTrial = MaxPrime * MaxPrime; // maximal trial div bounds MinPrime = 137; // trial div bounds if N >= MaxTrial, must be <= MaxPrime Primes: array[0..31] of Byte = ( 3, 5, 7, 11, 13, 17, 19, 23, 29, 31, 37, 41, 43, 47, 53, 59, 61, 67, 71, 73, 79, 83, 89, 97, 101, 103, 107, 109, 113, 127, 131, 137); InvPrimes: array[0..31] of Cardinal = // InvPrimes[i] = Primes[i]^-1 mod 2^32 ($AAAAAAAB,$CCCCCCCD,$B6DB6DB7,$BA2E8BA3,$C4EC4EC5,$F0F0F0F1,$286BCA1B,$E9BD37A7, $4F72C235,$BDEF7BDF,$914C1BAD,$C18F9C19,$2FA0BE83,$677D46CF,$8C13521D,$A08AD8F3, $C10C9715,$07A44C6B,$E327A977,$C7E3F1F9,$613716AF,$2B2E43DB,$FA3F47E9,$5F02A3A1, $7C32B16D,$D3431B57,$8D28AC43,$DA6C0965,$0FDBC091,$EFDFBF7F,$C9484E2B,$077975B9); {$ELSE} // Tablesize = 510 bytes, get small speedup compared to above lookup tables. // The speedup are ~100 cycles with small numbers < 1000000, and over full 2^32 range // are few cycles slower. Thus it's not worth to activate big lookup tables. // Currently on Dennis testcases the big lookup tables are faster. const MaxPrime = 443; MaxTrial = MaxPrime * MaxPrime; MinPrime = 173; Primes: array[0..84] of Word = ( 3, 5, 7, 11, 13, 17, 19, 23, 29, 31, 37, 41, 43, 47, 53, 59, 61, 67, 71, 73, 79, 83, 89, 97, 101, 103, 107, 109, 113, 127, 131, 137, 139, 149, 151, 157, 163, 167, 173, 179, 181, 191, 193, 197, 199, 211, 223, 227, 229, 233, 239, 241, 251, 257, 263, 269, 271, 277, 281, 283, 293, 307, 311, 313, 317, 331, 337, 347, 349, 353, 359, 367, 373, 379, 383, 389, 397, 401, 409, 419, 421, 431, 433, 439, 443); InvPrimes: array[0..84] of Cardinal = // InvPrimes[i] = Primes[i]^-1 mod 2^32 ($AAAAAAAB,$CCCCCCCD,$B6DB6DB7,$BA2E8BA3,$C4EC4EC5,$F0F0F0F1,$286BCA1B,$E9BD37A7, $4F72C235,$BDEF7BDF,$914C1BAD,$C18F9C19,$2FA0BE83,$677D46CF,$8C13521D,$A08AD8F3, $C10C9715,$07A44C6B,$E327A977,$C7E3F1F9,$613716AF,$2B2E43DB,$FA3F47E9,$5F02A3A1, $7C32B16D,$D3431B57,$8D28AC43,$DA6C0965,$0FDBC091,$EFDFBF7F,$C9484E2B,$077975B9, $70586723,$8CE2CABD,$BF937F27,$2C0685B5,$451AB30B,$DB35A717,$0D516325,$D962AE7B, $10F8ED9D,$EE936F3F,$90948F41,$3D137E0D,$EF46C0F7,$6E68575B,$DB43BB1F,$9BA144CB, $478BBCED,$1FDCD759,$437B2E0F,$10FEF011,$9A020A33,$FF00FF01,$70E99CB7,$6205B5C5, $A27ACDEF,$25E4463D,$0749CB29,$C9B97113,$84CE32AD,$C74BE1FB,$A7198487,$39409D09, $6F71DE15,$BFCE8063,$F61FE7B1,$70E046D3,$F1545AF5,$9A7862A1,$2A128A57,$B7747D8F, $BB5E06DD,$12E9B5B3,$EC9DBE7F,$EC41CF4D,$AEC02945,$8382DF71,$84B1C2A9,$75EB3A0B, $FA86FE2D,$3F8DF54F,$0975A751,$C3EFAC07,$A8299B73); {$ENDIF} //Author: John O'Harrow //Optimized for: Intel Pentium 4 Prescott //Instructionset(s): IA32 //Original Name: IsPrimeJOH_IA32_6 function IsPrimeFastcodeP4P(Number : Cardinal) : Boolean; const {Size = 348 Bytes + 80 Byte Tables = 431 Bytes} Primes : packed array[1..16] of Byte = {Excludes 2,3,5} (7,11,13,17,19,23,29,31,37,41,43,47,53,59,61,67); InversePrimes : array[1..16] of Cardinal = ($B6DB6DB7,$BA2E8BA3,$C4EC4EC5,$F0F0F0F1, $286BCA1B,$E9BD37A7,$4F72C235,$BDEF7BDF, $914C1BAD,$C18F9C19,$2FA0BE83,$677D46CF, $8C13521D,$A08AD8F3,$C10C9715,$07A44C6B); TableSize = SizeOf(Primes); MaxTablePrime = 67; asm test al, 1 {Check for Even Number} jz @@Even {Exit of Even} cmp eax, 7 {Check if Number <= 7} jbe @@SmallOdd {Check Low Odd Numbers} mov ecx, eax {Check Number MOD 3} mov edx, $AAAAAAAB mul edx shr edx, 1 lea eax, [edx+edx*2] sub eax, ecx jz @@Exit {Exit if Number MOD 3 = 0} mov eax, ecx {Check Number MOD 5} mov edx, $CCCCCCCD mul edx shr edx, 2 lea eax, [edx+edx*4] sub eax, ecx jz @@Exit {Exit if Number MOD 5 = 0} mov eax, ecx push ebx {Save Used Registers} push ebp push edi push esi mov ebx, eax {Keep Number in EBX} mov ecx, -TableSize {Set-Up Prime Table Lookup} mov esi, offset Primes + TableSize mov edi, offset InversePrimes + TableSize*4 cmp eax, MaxTablePrime * MaxTablePrime ja @@LargeLoop {Large if Number > Max Table Value Squared} push eax fild dword ptr [esp] fsqrt fistp dword ptr [esp] pop ebp {EBP = Sqrt(Number)} @@SmallLoop: {Test Small Value against Lookup Table} movzx edx, byte ptr [esi+ecx] cmp edx, ebp ja @@SetResult mov eax, ebx imul eax, [edi+ecx*4] inc ecx mul edx jc @@SmallLoop @@False: xor edx, edx @@SetResult: setnz al @@Done: pop esi {Restore Used Registers} pop edi pop ebp pop ebx @@Exit: ret @@Even: {Even Number - Only Prime if Number = 2} cmp eax, 2 sete al ret @@SmallOdd: {Small Odd Number - Prime if Number <> 1} cmp eax, 1 setne al ret @@LargeLoop: {Large Number} mov eax, ebx movzx edx, byte ptr [esi+ecx] imul eax, [edi+ecx*4] mul edx jnc @@False inc ecx jnz @@LargeLoop lea ebp, [ebx-1] bsf ecx, ebp dec ecx shr ebp, 2 shr ebp, cl push ecx cmp ebx, 267859 jae @@CheckLarge cmp ebx, 16229 je @@LargeDone mov eax, 227206 {Use 1 SPP Test} call @@SPPTest jmp @@LargeDone @@CheckLarge: cmp ebx, 64042903 jae @@VeryLarge mov eax, 4923 {Use 2 SPP Tests} call @@SPPTestSmallBase jc @@LargeDone mov eax, 463 jmp @@LastSPPTestSmall @@VeryLarge: {Use 3 SPP Tests} mov eax, 2 call @@SPPTestSmallBase jc @@LargeDone mov eax, 7 call @@SPPTestSmallBase jc @@LargeDone mov eax, 61 @@LastSPPTestSmall: call @@SPPTestSmallBase @@LargeDone: setnc al add esp, 4 jmp @@Done @@SPPTest: {Entry Point where Base^2 could be > Number} mov ecx, ebp mov esi, eax mov edi, eax jmp @@SPPEven @@SPPTestSmallBase: {Entry Point where Base^2 < Number - Skip First DIV} mov ecx, ebp mov esi, eax mul eax mov edi, eax jmp @@SPPCheck @@SPPLoop1: jnc @@SPPEven mov eax, esi mul edi jc @@DoDiv1 cmp eax, ebx mov esi, eax jna @@SppEven @@DoDiv1: div ebx mov esi, edx @@SPPEven: mov eax, edi mul edi jc @@DoDiv2 cmp eax, ebx mov edi, eax jna @@SppCheck @@DoDiv2: div ebx mov edi, edx @@SPPCheck: shr ecx, 1 jnz @@SPPLoop1 mov eax, esi mul edi div ebx cmp edx, 1 je @@SppExit lea edi, [ebx-1] mov ecx, [esp+4] @@SPPLoop2: cmp edx, edi je @@SppExit mov eax, edx mul edx div ebx sub ecx, 1 jnc @@SPPLoop2 @@SPPExit: end; //Author: John O'Harrow //Optimized for: Intel Pentium 4 Northwood //Instructionset(s): IA32 //Original Name: IsPrimeJOH_IA32_6 function IsPrimeFastcodeP4N(Number : Cardinal) : Boolean; const {Size = 348 Bytes + 80 Byte Tables = 431 Bytes} Primes : packed array[1..16] of Byte = {Excludes 2,3,5} (7,11,13,17,19,23,29,31,37,41,43,47,53,59,61,67); InversePrimes : array[1..16] of Cardinal = ($B6DB6DB7,$BA2E8BA3,$C4EC4EC5,$F0F0F0F1, $286BCA1B,$E9BD37A7,$4F72C235,$BDEF7BDF, $914C1BAD,$C18F9C19,$2FA0BE83,$677D46CF, $8C13521D,$A08AD8F3,$C10C9715,$07A44C6B); TableSize = SizeOf(Primes); MaxTablePrime = 67; asm test al, 1 {Check for Even Number} jz @@Even {Exit of Even} cmp eax, 7 {Check if Number <= 7} jbe @@SmallOdd {Check Low Odd Numbers} mov ecx, eax {Check Number MOD 3} mov edx, $AAAAAAAB mul edx shr edx, 1 lea eax, [edx+edx*2] sub eax, ecx jz @@Exit {Exit if Number MOD 3 = 0} mov eax, ecx {Check Number MOD 5} mov edx, $CCCCCCCD mul edx shr edx, 2 lea eax, [edx+edx*4] sub eax, ecx jz @@Exit {Exit if Number MOD 5 = 0} mov eax, ecx push ebx {Save Used Registers} push ebp push edi push esi mov ebx, eax {Keep Number in EBX} mov ecx, -TableSize {Set-Up Prime Table Lookup} mov esi, offset Primes + TableSize mov edi, offset InversePrimes + TableSize*4 cmp eax, MaxTablePrime * MaxTablePrime ja @@LargeLoop {Large if Number > Max Table Value Squared} push eax fild dword ptr [esp] fsqrt fistp dword ptr [esp] pop ebp {EBP = Sqrt(Number)} @@SmallLoop: {Test Small Value against Lookup Table} movzx edx, byte ptr [esi+ecx] cmp edx, ebp ja @@SetResult mov eax, ebx imul eax, [edi+ecx*4] inc ecx mul edx jc @@SmallLoop @@False: xor edx, edx @@SetResult: setnz al @@Done: pop esi {Restore Used Registers} pop edi pop ebp pop ebx @@Exit: ret @@Even: {Even Number - Only Prime if Number = 2} cmp eax, 2 sete al ret @@SmallOdd: {Small Odd Number - Prime if Number <> 1} cmp eax, 1 setne al ret @@LargeLoop: {Large Number} mov eax, ebx movzx edx, byte ptr [esi+ecx] imul eax, [edi+ecx*4] mul edx jnc @@False inc ecx jnz @@LargeLoop lea ebp, [ebx-1] bsf ecx, ebp dec ecx shr ebp, 2 shr ebp, cl push ecx cmp ebx, 267859 jae @@CheckLarge cmp ebx, 16229 je @@LargeDone mov eax, 227206 {Use 1 SPP Test} call @@SPPTest jmp @@LargeDone @@CheckLarge: cmp ebx, 64042903 jae @@VeryLarge mov eax, 4923 {Use 2 SPP Tests} call @@SPPTestSmallBase jc @@LargeDone mov eax, 463 jmp @@LastSPPTestSmall @@VeryLarge: {Use 3 SPP Tests} mov eax, 2 call @@SPPTestSmallBase jc @@LargeDone mov eax, 7 call @@SPPTestSmallBase jc @@LargeDone mov eax, 61 @@LastSPPTestSmall: call @@SPPTestSmallBase @@LargeDone: setnc al add esp, 4 jmp @@Done @@SPPTest: {Entry Point where Base^2 could be > Number} mov ecx, ebp mov esi, eax mov edi, eax jmp @@SPPEven @@SPPTestSmallBase: {Entry Point where Base^2 < Number - Skip First DIV} mov ecx, ebp mov esi, eax mul eax mov edi, eax jmp @@SPPCheck @@SPPLoop1: jnc @@SPPEven mov eax, esi mul edi jc @@DoDiv1 cmp eax, ebx mov esi, eax jna @@SppEven @@DoDiv1: div ebx mov esi, edx @@SPPEven: mov eax, edi mul edi jc @@DoDiv2 cmp eax, ebx mov edi, eax jna @@SppCheck @@DoDiv2: div ebx mov edi, edx @@SPPCheck: shr ecx, 1 jnz @@SPPLoop1 mov eax, esi mul edi div ebx cmp edx, 1 je @@SppExit lea edi, [ebx-1] mov ecx, [esp+4] @@SPPLoop2: cmp edx, edi je @@SppExit mov eax, edx mul edx div ebx sub ecx, 1 jnc @@SPPLoop2 @@SPPExit: end; //Author: Hagen Reddmann //Optimized for: Intel Pentium M Banias //Instructionset(s): IA32 //Original Name: IsPrimeHR_IA32_2 function IsPrimeFastcodePMB(Number: Cardinal): Boolean; // Montgomery Version of IsPrimeHR_IA32_1() asm TEST AL,1 JZ @@0 // even ?? CMP EAX,7 JA @@1 // > 7 ?? DEC EAX SETNZ AL RET @@0: CMP EAX,2 // even numbers SETZ AL RET @@1: PUSH EBP // do trial divsion to small primes PUSH EBX PUSH ESI PUSH EDI MOV EBX,EAX CMP EAX,MaxTrial MOV EBP,MinPrime JAE @@2 PUSH EAX FILD DWord Ptr [ESP] FSQRT FISTP DWord Ptr [ESP] POP EBP // EBP = Sqrt(N) @@2: MOV EDI,Offset Primes MOV ESI,Offset InvPrimes XOR ECX,ECX {$IFDEF SmallLookup} @@3: MOVZX EDX,Byte Ptr [EDI + ECX] // take care, InvPrimes[] MUST {$ELSE} // be after Primes[] declared @@3: MOVZX EDX,Word Ptr [EDI + ECX * 2] {$ENDIF} MOV EAX,EBX CMP EDX,EBP JA @@5 IMUL EAX,[ESI + ECX * 4] INC ECX MUL EDX JC @@3 TEST EDX,EDX @@4: POP EDI POP ESI POP EBX POP EBP SETNZ AL RET @@5: CMP EBX,MaxTrial // N <= MaxPrime^2 ?? MOV EAX,EBX JBE @@4 IMUL EAX,EBX // compute domain param U = -N^-1 mod 2^32 SUB EAX,2 // Lookuptable can reduce from 72 donwto 32 cycles IMUL EAX,EBX MOV EDX,EAX IMUL EAX,EBX ADD EAX,2 IMUL EAX,EDX MOV EDX,EAX IMUL EAX,EBX ADD EAX,2 IMUL EAX,EDX MOV EBP,EAX IMUL EBP,EBX ADD EBP,2 IMUL EBP,EAX // U = -N^-1 mod^2^32 in EBP MOV EDI,EBX MOV EAX,EBX DEC EDI // N -1 NEG EAX BSF ECX,EDI MUL EAX PUSH ECX // bits remain [ESP + 20] MOV ESI,EAX BSR ECX,EDI MOV EAX,EDX XOR EDX,EDX NEG ECX DIV EBX // div, can't be removed MOV EAX,ESI ADD ECX,32 DIV EBX // div SHL EDI,CL MOV EAX,EDX IMUL EAX,EBP MOV ESI,EDX // C = -N^2 mod N, to fast convert into MUL EBX // montgomery domain PUSH ESI // C [ESP + 16] ADD EAX,ESI ADC EDX,0 PUSH EDX // +1 in montgomery [ESP + 12] PUSH EDI // bit mask exponent [ESP + 8] NEG EDX ADD EDX,EBX CMP EBX,$08A8D7F // N < $08A8D7F, do SPP to bases 31,73 PUSH EDX // -1 in montgomery [ESP + 4] JAE @@6 MOV EAX,31 CALL @@9 MOV EAX,73 PUSH Offset @@7 JMP @@9 @@6: MOV EAX,2 // do SPP to bases 2,7,61 CALL @@9 MOV EAX,7 CALL @@9 MOV EAX,61 CALL @@9 @@7: INC EAX @@8: LEA ESP,[ESP + 4 * 5] // don't change flags !! JMP @@4 @@9: MUL DWord Ptr [ESP + 16] // convert base in montgomery MOV EDI,EAX // Base' = Base * C mod N IMUL EAX,EBP // montgomery REDC MOV ESI,EDX MUL EBX ADD EAX,EDI ADC EDX,ESI MOV ECX,[ESP + 8] // bit mask of exponent N -1 MOV EAX,EDX PUSH EDX @@A: MUL EAX // X = X^2 mod N MOV EDI,EAX IMUL EAX,EBP MOV ESI,EDX MUL EBX ADD EAX,EDI ADC EDX,ESI JNC @@B SUB EDX,EBX @@B: ADD ECX,ECX MOV EAX,EDX JNC @@D MUL DWord Ptr [ESP] // X = X * Base mod N MOV EDI,EAX IMUL EAX,EBP MOV ESI,EDX MUL EBX ADD EAX,EDI ADC EDX,ESI JNC @@C SUB EDX,EBX @@C: TEST ECX,ECX MOV EAX,EDX @@D: JNZ @@A CMP EAX,EBX JB @@E SUB EAX,EBX @@E: CMP EAX,[ESP + 16] // == +1 ?? MOV ECX,[ESP + 24] // bits remain JE @@J @@F: CMP EAX,[ESP + 8] // == -1 ?? JE @@J DEC ECX JNG @@I MUL EAX MOV EDI,EAX IMUL EAX,EBP MOV ESI,EDX MUL EBX ADD EAX,EDI ADC EDX,ESI JC @@G CMP EDX,EBX JB @@H @@G: SUB EDX,EBX @@H: CMP EDX,[ESP + 16] // <> +1 ?? MOV EAX,EDX JNE @@F @@I: ADD ESP,8 XOR EAX,EAX JMP @@8 @@J: POP EDX end; //Author: Hagen Reddmann //Optimized for: AMD 64, Opteron, FX, Athlon64 //Instructionset(s): IA32 //Original Name: IsPrimeHR_IA32_2 function IsPrimeFastcodeAMD64(Number: Cardinal): Boolean; // Montgomery Version of IsPrimeHR_IA32_1() asm TEST AL,1 JZ @@0 // even ?? CMP EAX,7 JA @@1 // > 7 ?? DEC EAX SETNZ AL RET @@0: CMP EAX,2 // even numbers SETZ AL RET @@1: PUSH EBP // do trial divsion to small primes PUSH EBX PUSH ESI PUSH EDI MOV EBX,EAX CMP EAX,MaxTrial MOV EBP,MinPrime JAE @@2 PUSH EAX FILD DWord Ptr [ESP] FSQRT FISTP DWord Ptr [ESP] POP EBP // EBP = Sqrt(N) @@2: MOV EDI,Offset Primes MOV ESI,Offset InvPrimes XOR ECX,ECX {$IFDEF SmallLookup} @@3: MOVZX EDX,Byte Ptr [EDI + ECX] // take care, InvPrimes[] MUST {$ELSE} // be after Primes[] declared @@3: MOVZX EDX,Word Ptr [EDI + ECX * 2] {$ENDIF} MOV EAX,EBX CMP EDX,EBP JA @@5 IMUL EAX,[ESI + ECX * 4] INC ECX MUL EDX JC @@3 TEST EDX,EDX @@4: POP EDI POP ESI POP EBX POP EBP SETNZ AL RET @@5: CMP EBX,MaxTrial // N <= MaxPrime^2 ?? MOV EAX,EBX JBE @@4 IMUL EAX,EBX // compute domain param U = -N^-1 mod 2^32 SUB EAX,2 // Lookuptable can reduce from 72 donwto 32 cycles IMUL EAX,EBX MOV EDX,EAX IMUL EAX,EBX ADD EAX,2 IMUL EAX,EDX MOV EDX,EAX IMUL EAX,EBX ADD EAX,2 IMUL EAX,EDX MOV EBP,EAX IMUL EBP,EBX ADD EBP,2 IMUL EBP,EAX // U = -N^-1 mod^2^32 in EBP MOV EDI,EBX MOV EAX,EBX DEC EDI // N -1 NEG EAX BSF ECX,EDI MUL EAX PUSH ECX // bits remain [ESP + 20] MOV ESI,EAX BSR ECX,EDI MOV EAX,EDX XOR EDX,EDX NEG ECX DIV EBX // div, can't be removed MOV EAX,ESI ADD ECX,32 DIV EBX // div SHL EDI,CL MOV EAX,EDX IMUL EAX,EBP MOV ESI,EDX // C = -N^2 mod N, to fast convert into MUL EBX // montgomery domain PUSH ESI // C [ESP + 16] ADD EAX,ESI ADC EDX,0 PUSH EDX // +1 in montgomery [ESP + 12] PUSH EDI // bit mask exponent [ESP + 8] NEG EDX ADD EDX,EBX CMP EBX,$08A8D7F // N < $08A8D7F, do SPP to bases 31,73 PUSH EDX // -1 in montgomery [ESP + 4] JAE @@6 MOV EAX,31 CALL @@9 MOV EAX,73 PUSH Offset @@7 JMP @@9 @@6: MOV EAX,2 // do SPP to bases 2,7,61 CALL @@9 MOV EAX,7 CALL @@9 MOV EAX,61 CALL @@9 @@7: INC EAX @@8: LEA ESP,[ESP + 4 * 5] // don't change flags !! JMP @@4 @@9: MUL DWord Ptr [ESP + 16] // convert base in montgomery MOV EDI,EAX // Base' = Base * C mod N IMUL EAX,EBP // montgomery REDC MOV ESI,EDX MUL EBX ADD EAX,EDI ADC EDX,ESI MOV ECX,[ESP + 8] // bit mask of exponent N -1 MOV EAX,EDX PUSH EDX @@A: MUL EAX // X = X^2 mod N MOV EDI,EAX IMUL EAX,EBP MOV ESI,EDX MUL EBX ADD EAX,EDI ADC EDX,ESI JNC @@B SUB EDX,EBX @@B: ADD ECX,ECX MOV EAX,EDX JNC @@D MUL DWord Ptr [ESP] // X = X * Base mod N MOV EDI,EAX IMUL EAX,EBP MOV ESI,EDX MUL EBX ADD EAX,EDI ADC EDX,ESI JNC @@C SUB EDX,EBX @@C: TEST ECX,ECX MOV EAX,EDX @@D: JNZ @@A CMP EAX,EBX JB @@E SUB EAX,EBX @@E: CMP EAX,[ESP + 16] // == +1 ?? MOV ECX,[ESP + 24] // bits remain JE @@J @@F: CMP EAX,[ESP + 8] // == -1 ?? JE @@J DEC ECX JNG @@I MUL EAX MOV EDI,EAX IMUL EAX,EBP MOV ESI,EDX MUL EBX ADD EAX,EDI ADC EDX,ESI JC @@G CMP EDX,EBX JB @@H @@G: SUB EDX,EBX @@H: CMP EDX,[ESP + 16] // <> +1 ?? MOV EAX,EDX JNE @@F @@I: ADD ESP,8 XOR EAX,EAX JMP @@8 @@J: POP EDX end; //Author: Hagen Reddmann //Optimized for: AMD Athlon XP //Instructionset(s): IA32 //Original Name: IsPrimeHR_IA32_2 function IsPrimeFastcodeXP(Number: Cardinal): Boolean; // Montgomery Version of IsPrimeHR_IA32_1() asm TEST AL,1 JZ @@0 // even ?? CMP EAX,7 JA @@1 // > 7 ?? DEC EAX SETNZ AL RET @@0: CMP EAX,2 // even numbers SETZ AL RET @@1: PUSH EBP // do trial divsion to small primes PUSH EBX PUSH ESI PUSH EDI MOV EBX,EAX CMP EAX,MaxTrial MOV EBP,MinPrime JAE @@2 PUSH EAX FILD DWord Ptr [ESP] FSQRT FISTP DWord Ptr [ESP] POP EBP // EBP = Sqrt(N) @@2: MOV EDI,Offset Primes MOV ESI,Offset InvPrimes XOR ECX,ECX {$IFDEF SmallLookup} @@3: MOVZX EDX,Byte Ptr [EDI + ECX] // take care, InvPrimes[] MUST {$ELSE} // be after Primes[] declared @@3: MOVZX EDX,Word Ptr [EDI + ECX * 2] {$ENDIF} MOV EAX,EBX CMP EDX,EBP JA @@5 IMUL EAX,[ESI + ECX * 4] INC ECX MUL EDX JC @@3 TEST EDX,EDX @@4: POP EDI POP ESI POP EBX POP EBP SETNZ AL RET @@5: CMP EBX,MaxTrial // N <= MaxPrime^2 ?? MOV EAX,EBX JBE @@4 IMUL EAX,EBX // compute domain param U = -N^-1 mod 2^32 SUB EAX,2 // Lookuptable can reduce from 72 donwto 32 cycles IMUL EAX,EBX MOV EDX,EAX IMUL EAX,EBX ADD EAX,2 IMUL EAX,EDX MOV EDX,EAX IMUL EAX,EBX ADD EAX,2 IMUL EAX,EDX MOV EBP,EAX IMUL EBP,EBX ADD EBP,2 IMUL EBP,EAX // U = -N^-1 mod^2^32 in EBP MOV EDI,EBX MOV EAX,EBX DEC EDI // N -1 NEG EAX BSF ECX,EDI MUL EAX PUSH ECX // bits remain [ESP + 20] MOV ESI,EAX BSR ECX,EDI MOV EAX,EDX XOR EDX,EDX NEG ECX DIV EBX // div, can't be removed MOV EAX,ESI ADD ECX,32 DIV EBX // div SHL EDI,CL MOV EAX,EDX IMUL EAX,EBP MOV ESI,EDX // C = -N^2 mod N, to fast convert into MUL EBX // montgomery domain PUSH ESI // C [ESP + 16] ADD EAX,ESI ADC EDX,0 PUSH EDX // +1 in montgomery [ESP + 12] PUSH EDI // bit mask exponent [ESP + 8] NEG EDX ADD EDX,EBX CMP EBX,$08A8D7F // N < $08A8D7F, do SPP to bases 31,73 PUSH EDX // -1 in montgomery [ESP + 4] JAE @@6 MOV EAX,31 CALL @@9 MOV EAX,73 PUSH Offset @@7 JMP @@9 @@6: MOV EAX,2 // do SPP to bases 2,7,61 CALL @@9 MOV EAX,7 CALL @@9 MOV EAX,61 CALL @@9 @@7: INC EAX @@8: LEA ESP,[ESP + 4 * 5] // don't change flags !! JMP @@4 @@9: MUL DWord Ptr [ESP + 16] // convert base in montgomery MOV EDI,EAX // Base' = Base * C mod N IMUL EAX,EBP // montgomery REDC MOV ESI,EDX MUL EBX ADD EAX,EDI ADC EDX,ESI MOV ECX,[ESP + 8] // bit mask of exponent N -1 MOV EAX,EDX PUSH EDX @@A: MUL EAX // X = X^2 mod N MOV EDI,EAX IMUL EAX,EBP MOV ESI,EDX MUL EBX ADD EAX,EDI ADC EDX,ESI JNC @@B SUB EDX,EBX @@B: ADD ECX,ECX MOV EAX,EDX JNC @@D MUL DWord Ptr [ESP] // X = X * Base mod N MOV EDI,EAX IMUL EAX,EBP MOV ESI,EDX MUL EBX ADD EAX,EDI ADC EDX,ESI JNC @@C SUB EDX,EBX @@C: TEST ECX,ECX MOV EAX,EDX @@D: JNZ @@A CMP EAX,EBX JB @@E SUB EAX,EBX @@E: CMP EAX,[ESP + 16] // == +1 ?? MOV ECX,[ESP + 24] // bits remain JE @@J @@F: CMP EAX,[ESP + 8] // == -1 ?? JE @@J DEC ECX JNG @@I MUL EAX MOV EDI,EAX IMUL EAX,EBP MOV ESI,EDX MUL EBX ADD EAX,EDI ADC EDX,ESI JC @@G CMP EDX,EBX JB @@H @@G: SUB EDX,EBX @@H: CMP EDX,[ESP + 16] // <> +1 ?? MOV EAX,EDX JNE @@F @@I: ADD ESP,8 XOR EAX,EAX JMP @@8 @@J: POP EDX end; //Author: Hagen Reddmann //Optimized for: Blended //Instructionset(s): IA32 //Original Name: IsPrimeHR_IA32_2 function IsPrimeFastcodeBlended(Number: Cardinal): Boolean; // Montgomery Version of IsPrimeHR_IA32_1() asm TEST AL,1 JZ @@0 // even ?? CMP EAX,7 JA @@1 // > 7 ?? DEC EAX SETNZ AL RET @@0: CMP EAX,2 // even numbers SETZ AL RET @@1: PUSH EBP // do trial divsion to small primes PUSH EBX PUSH ESI PUSH EDI MOV EBX,EAX CMP EAX,MaxTrial MOV EBP,MinPrime JAE @@2 PUSH EAX FILD DWord Ptr [ESP] FSQRT FISTP DWord Ptr [ESP] POP EBP // EBP = Sqrt(N) @@2: MOV EDI,Offset Primes MOV ESI,Offset InvPrimes XOR ECX,ECX {$IFDEF SmallLookup} @@3: MOVZX EDX,Byte Ptr [EDI + ECX] // take care, InvPrimes[] MUST {$ELSE} // be after Primes[] declared @@3: MOVZX EDX,Word Ptr [EDI + ECX * 2] {$ENDIF} MOV EAX,EBX CMP EDX,EBP JA @@5 IMUL EAX,[ESI + ECX * 4] INC ECX MUL EDX JC @@3 TEST EDX,EDX @@4: POP EDI POP ESI POP EBX POP EBP SETNZ AL RET @@5: CMP EBX,MaxTrial // N <= MaxPrime^2 ?? MOV EAX,EBX JBE @@4 IMUL EAX,EBX // compute domain param U = -N^-1 mod 2^32 SUB EAX,2 // Lookuptable can reduce from 72 donwto 32 cycles IMUL EAX,EBX MOV EDX,EAX IMUL EAX,EBX ADD EAX,2 IMUL EAX,EDX MOV EDX,EAX IMUL EAX,EBX ADD EAX,2 IMUL EAX,EDX MOV EBP,EAX IMUL EBP,EBX ADD EBP,2 IMUL EBP,EAX // U = -N^-1 mod^2^32 in EBP MOV EDI,EBX MOV EAX,EBX DEC EDI // N -1 NEG EAX BSF ECX,EDI MUL EAX PUSH ECX // bits remain [ESP + 20] MOV ESI,EAX BSR ECX,EDI MOV EAX,EDX XOR EDX,EDX NEG ECX DIV EBX // div, can't be removed MOV EAX,ESI ADD ECX,32 DIV EBX // div SHL EDI,CL MOV EAX,EDX IMUL EAX,EBP MOV ESI,EDX // C = -N^2 mod N, to fast convert into MUL EBX // montgomery domain PUSH ESI // C [ESP + 16] ADD EAX,ESI ADC EDX,0 PUSH EDX // +1 in montgomery [ESP + 12] PUSH EDI // bit mask exponent [ESP + 8] NEG EDX ADD EDX,EBX CMP EBX,$08A8D7F // N < $08A8D7F, do SPP to bases 31,73 PUSH EDX // -1 in montgomery [ESP + 4] JAE @@6 MOV EAX,31 CALL @@9 MOV EAX,73 PUSH Offset @@7 JMP @@9 @@6: MOV EAX,2 // do SPP to bases 2,7,61 CALL @@9 MOV EAX,7 CALL @@9 MOV EAX,61 CALL @@9 @@7: INC EAX @@8: LEA ESP,[ESP + 4 * 5] // don't change flags !! JMP @@4 @@9: MUL DWord Ptr [ESP + 16] // convert base in montgomery MOV EDI,EAX // Base' = Base * C mod N IMUL EAX,EBP // montgomery REDC MOV ESI,EDX MUL EBX ADD EAX,EDI ADC EDX,ESI MOV ECX,[ESP + 8] // bit mask of exponent N -1 MOV EAX,EDX PUSH EDX @@A: MUL EAX // X = X^2 mod N MOV EDI,EAX IMUL EAX,EBP MOV ESI,EDX MUL EBX ADD EAX,EDI ADC EDX,ESI JNC @@B SUB EDX,EBX @@B: ADD ECX,ECX MOV EAX,EDX JNC @@D MUL DWord Ptr [ESP] // X = X * Base mod N MOV EDI,EAX IMUL EAX,EBP MOV ESI,EDX MUL EBX ADD EAX,EDI ADC EDX,ESI JNC @@C SUB EDX,EBX @@C: TEST ECX,ECX MOV EAX,EDX @@D: JNZ @@A CMP EAX,EBX JB @@E SUB EAX,EBX @@E: CMP EAX,[ESP + 16] // == +1 ?? MOV ECX,[ESP + 24] // bits remain JE @@J @@F: CMP EAX,[ESP + 8] // == -1 ?? JE @@J DEC ECX JNG @@I MUL EAX MOV EDI,EAX IMUL EAX,EBP MOV ESI,EDX MUL EBX ADD EAX,EDI ADC EDX,ESI JC @@G CMP EDX,EBX JB @@H @@G: SUB EDX,EBX @@H: CMP EDX,[ESP + 16] // <> +1 ?? MOV EAX,EDX JNE @@F @@I: ADD ESP,8 XOR EAX,EAX JMP @@8 @@J: POP EDX end; //Author: Hagen Reddmann //Optimized for: RTL Suggestion //Instructionset(s): IA32 //Original Name: IsPrimeHR_IA32_2 function IsPrimeFastcodeRTL(Number: Cardinal): Boolean; // Montgomery Version of IsPrimeHR_IA32_1() asm TEST AL,1 JZ @@0 // even ?? CMP EAX,7 JA @@1 // > 7 ?? DEC EAX SETNZ AL RET @@0: CMP EAX,2 // even numbers SETZ AL RET @@1: PUSH EBP // do trial divsion to small primes PUSH EBX PUSH ESI PUSH EDI MOV EBX,EAX CMP EAX,MaxTrial MOV EBP,MinPrime JAE @@2 PUSH EAX FILD DWord Ptr [ESP] FSQRT FISTP DWord Ptr [ESP] POP EBP // EBP = Sqrt(N) @@2: MOV EDI,Offset Primes MOV ESI,Offset InvPrimes XOR ECX,ECX {$IFDEF SmallLookup} @@3: MOVZX EDX,Byte Ptr [EDI + ECX] // take care, InvPrimes[] MUST {$ELSE} // be after Primes[] declared @@3: MOVZX EDX,Word Ptr [EDI + ECX * 2] {$ENDIF} MOV EAX,EBX CMP EDX,EBP JA @@5 IMUL EAX,[ESI + ECX * 4] INC ECX MUL EDX JC @@3 TEST EDX,EDX @@4: POP EDI POP ESI POP EBX POP EBP SETNZ AL RET @@5: CMP EBX,MaxTrial // N <= MaxPrime^2 ?? MOV EAX,EBX JBE @@4 IMUL EAX,EBX // compute domain param U = -N^-1 mod 2^32 SUB EAX,2 // Lookuptable can reduce from 72 donwto 32 cycles IMUL EAX,EBX MOV EDX,EAX IMUL EAX,EBX ADD EAX,2 IMUL EAX,EDX MOV EDX,EAX IMUL EAX,EBX ADD EAX,2 IMUL EAX,EDX MOV EBP,EAX IMUL EBP,EBX ADD EBP,2 IMUL EBP,EAX // U = -N^-1 mod^2^32 in EBP MOV EDI,EBX MOV EAX,EBX DEC EDI // N -1 NEG EAX BSF ECX,EDI MUL EAX PUSH ECX // bits remain [ESP + 20] MOV ESI,EAX BSR ECX,EDI MOV EAX,EDX XOR EDX,EDX NEG ECX DIV EBX // div, can't be removed MOV EAX,ESI ADD ECX,32 DIV EBX // div SHL EDI,CL MOV EAX,EDX IMUL EAX,EBP MOV ESI,EDX // C = -N^2 mod N, to fast convert into MUL EBX // montgomery domain PUSH ESI // C [ESP + 16] ADD EAX,ESI ADC EDX,0 PUSH EDX // +1 in montgomery [ESP + 12] PUSH EDI // bit mask exponent [ESP + 8] NEG EDX ADD EDX,EBX CMP EBX,$08A8D7F // N < $08A8D7F, do SPP to bases 31,73 PUSH EDX // -1 in montgomery [ESP + 4] JAE @@6 MOV EAX,31 CALL @@9 MOV EAX,73 PUSH Offset @@7 JMP @@9 @@6: MOV EAX,2 // do SPP to bases 2,7,61 CALL @@9 MOV EAX,7 CALL @@9 MOV EAX,61 CALL @@9 @@7: INC EAX @@8: LEA ESP,[ESP + 4 * 5] // don't change flags !! JMP @@4 @@9: MUL DWord Ptr [ESP + 16] // convert base in montgomery MOV EDI,EAX // Base' = Base * C mod N IMUL EAX,EBP // montgomery REDC MOV ESI,EDX MUL EBX ADD EAX,EDI ADC EDX,ESI MOV ECX,[ESP + 8] // bit mask of exponent N -1 MOV EAX,EDX PUSH EDX @@A: MUL EAX // X = X^2 mod N MOV EDI,EAX IMUL EAX,EBP MOV ESI,EDX MUL EBX ADD EAX,EDI ADC EDX,ESI JNC @@B SUB EDX,EBX @@B: ADD ECX,ECX MOV EAX,EDX JNC @@D MUL DWord Ptr [ESP] // X = X * Base mod N MOV EDI,EAX IMUL EAX,EBP MOV ESI,EDX MUL EBX ADD EAX,EDI ADC EDX,ESI JNC @@C SUB EDX,EBX @@C: TEST ECX,ECX MOV EAX,EDX @@D: JNZ @@A CMP EAX,EBX JB @@E SUB EAX,EBX @@E: CMP EAX,[ESP + 16] // == +1 ?? MOV ECX,[ESP + 24] // bits remain JE @@J @@F: CMP EAX,[ESP + 8] // == -1 ?? JE @@J DEC ECX JNG @@I MUL EAX MOV EDI,EAX IMUL EAX,EBP MOV ESI,EDX MUL EBX ADD EAX,EDI ADC EDX,ESI JC @@G CMP EDX,EBX JB @@H @@G: SUB EDX,EBX @@H: CMP EDX,[ESP + 16] // <> +1 ?? MOV EAX,EDX JNE @@F @@I: ADD ESP,8 XOR EAX,EAX JMP @@8 @@J: POP EDX end; //Author: Dennis Kjaer Christensen //Date: 14/12 2003 //Optimized for: Pascal //Instructionset(s): IA32 //Original Name: IsPrimeDKCPas22 function IntPowerMod64FP(Base, Exponent, Divisor : Cardinal) : Cardinal; var I1, I2, I3, HalfExponent : Cardinal; Result1FP, Result2FP, DivisorFP : Extended; begin Result1FP := Base; Result2FP := 1; DivisorFP := Divisor; I1 := 2; I2 := Exponent; HalfExponent := Exponent div 2; repeat I3 := I2 and 1; if I3 = 1 then begin Result2FP := (Result2FP * Result1FP) - DivisorFP *(Trunc((Result2FP * Result1FP) / DivisorFP)); end; I2 := I2 shr 1; Result1FP := (Result1FP * Result1FP) - DivisorFP * Trunc((Result1FP * Result1FP) / DivisorFP); if I1 > HalfExponent then Break; I1 := I1 * 2; until(False); Result := Round((Result1FP * Result2FP) - DivisorFP * Trunc((Result1FP * Result2FP) / DivisorFP)); end; function StrongProbablePrimeDKCPas3(Number, Base: Cardinal): Boolean; var d, b, s, i, v : Cardinal; bFP, vFP, nFP : Extended; begin v := Number-1; d := v; //Calculate d and s s := 0; repeat d := d shr 1; Inc(s); until odd(d); b := IntPowerMod64FP(Base,d,Number); if b = 1 then begin Result := True; Exit; end; bFP := b; vFP := Number-1; nFP := Number; for i := 1 to s do begin if Abs(bFP - vFP) < 0.01 then begin Result := True; Exit; end else bFP := (bFP*bFP) - Trunc((bFP*bFP) / nFP) * nFP; end; Result := False; end; function IsPrimeFastcodePascal(Number : Cardinal) : Boolean; var SqrtNumber, Reminder1, Reminder2, J : Cardinal; CW8087 : Word; begin if Number <= 1 then Result := False else if Number <= 3 then Result := True else if Number mod 2 = 0 then Result := False else if Number mod 3 = 0 then Result := False else if Number = 5 then Result := True else if Number mod 5 = 0 then Result := False else if Number = 7 then Result := True else if Number mod 7 = 0 then Result := False else if (Number <= 10000000) then begin Result := True; SqrtNumber := Round(Sqrt(Number)); J := 7; repeat Reminder1 := Number mod J; Inc(J,2); Reminder2 := Number mod J; if (Reminder1 = 0) or (Reminder2 = 0) then begin Result := False; Break; end; Inc(J,2); until(J > SqrtNumber); end else begin CW8087 := Get8087CW; SetPrecisionMode(pmExtended); if StrongProbablePrimeDKCPas3(Number, 2) and StrongProbablePrimeDKCPas3(Number, 61) and StrongProbablePrimeDKCPas3(Number, 7) then begin Result := True; end else begin Result := False; end; Set8087CW(CW8087); end end; end.