unit FastcodePos; //Version : 0.1 Preliminary version //Only direct calling supported interface function PosFastcodeP3(const SubStr : AnsiString; const Str : AnsiString ) : Integer; function PosFastcodeP4(const SubStr : AnsiString; const Str : AnsiString ) : Integer; function PosFastcodeXP(const SubStr : AnsiString; const Str : AnsiString ) : Integer; function PosFastcodeOpteron(const SubStr : AnsiString; const Str : AnsiString ) : Integer; function PosFastcodeRTL(const SubStr : AnsiString; const Str : AnsiString ) : Integer; function PosFastcodePascal(const SubStr : AnsiString; const Str : AnsiString ) : Integer; function PosFastcodeBlended(const SubStr : AnsiString; const Str : AnsiString ) : Integer; implementation uses Windows, SysUtils, elCPUID, elCodeHook; //Author: John O'Harrow //Date: N/A //Optimized for: P3 //Instructionset(s): N/A //Original Name: PosJOH_SSE function PosFastcodeP3(const SubStr : AnsiString; const Str : AnsiString) : Integer; asm test eax, eax jz @NotFoundExit {Exit if SurStr = ''} test edx, edx jz @NotFound {Exit if Str = ''} mov ecx, [edx-4] {Length(Str)} cmp [eax-4], 1 {Length SubStr = 1?} je @SingleChar {Yes - Exit via CharPos} jl @NotFound {Exit if Length(SubStr) < 1} sub ecx, [eax-4] {Subtract Length(SubStr), -ve handled by CharPos} add ecx, 1 {Number of Chars to Check for 1st Char} push esi {Save Registers} push edi push ebx push ebp mov esi, eax {Start Address of SubStr} mov edi, ecx {Initial Remainder Count} mov eax, [eax] {AL = 1st Char of SubStr} mov ebp, edx {Start Address of Str} mov ebx, eax {Maintain 1st Search Char in BL} @StrLoop: mov eax, ebx {AL = 1st char of SubStr} mov ecx, edi {Remaining Length} push edx {Save Start Position} call @CharPos {Search for 1st Character} pop edx {Restore Start Position} test eax, eax {Result = 0?} jz @StrExit {Exit if 1st Character Not Found} mov ecx, [esi-4] {Length SubStr} add edx, eax {Update Start Position for Next Loop} sub edi, eax {Update Remaining Length for Next Loop} sub ecx, 1 {Remaining Characters to Compare} @StrCheck: mov al, [edx+ecx-1] {Compare Next Char of SubStr and Str} cmp al, [esi+ecx] jne @StrLoop {Different - Return to First Character Search} sub ecx, 1 jnz @StrCheck {Check each Remaining Character} mov eax, edx {All Characters Matched - Calculate Result} sub eax, ebp @StrExit: pop ebp {Restore Registers} pop ebx pop edi pop esi ret @NotFound: xor eax, eax {Return 0} @NotFoundExit: ret @SingleChar: mov al, [eax] {Search Character} @CharPos: CMP ECX, 8 JG @@NotSmall @@Small: or ecx, ecx jle @@NotFound {Exit if Length <= 0} CMP AL, [EDX] JZ @Found1 DEC ECX JZ @@NotFound CMP AL, [EDX+1] JZ @Found2 DEC ECX JZ @@NotFound CMP AL, [EDX+2] JZ @Found3 DEC ECX JZ @@NotFound CMP AL, [EDX+3] JZ @Found4 DEC ECX JZ @@NotFound CMP AL, [EDX+4] JZ @Found5 DEC ECX JZ @@NotFound CMP AL, [EDX+5] JZ @Found6 DEC ECX JZ @@NotFound CMP AL, [EDX+6] JZ @Found7 DEC ECX JZ @@NotFound CMP AL, [EDX+7] JZ @Found8 @@NotFound: XOR EAX, EAX RET @Found1: MOV EAX, 1 RET @Found2: MOV EAX, 2 RET @Found3: MOV EAX, 3 RET @Found4: MOV EAX, 4 RET @Found5: MOV EAX, 5 RET @Found6: MOV EAX, 6 RET @Found7: MOV EAX, 7 RET @Found8: MOV EAX, 8 RET @@NotSmall: MOV AH, AL ADD EDX, ECX MOVD MM0, EAX PSHUFW MM0, MM0, 0 PUSH ECX NEG ECX @@First8: MOVQ MM1, [EDX+ECX] ADD ECX, 8 PCMPEQB MM1, MM0 {Compare Next 8 Bytes} PMOVMSKB EAX, MM1 TEST EAX, EAX JNZ @@Matched {Exit on Match at any Position} CMP ECX, -8 {Check if Next Loop would pass String End} JGE @@Last8 @@Align: LEA EAX, [EDX+ECX] AND EAX, 7 SUB ECX, EAX @@Loop: {Loop Unrolled 2X} MOVQ MM1, [EDX+ECX] ADD ECX, 8 PCMPEQB MM1, MM0 {Compare Next 8 Bytes} PMOVMSKB EAX, MM1 TEST EAX, EAX JNZ @@Matched {Exit on Match at any Position} CMP ECX, -8 {Check if Next Loop would pass String End} {$IFNDEF NoUnroll} JGE @@Last8 MOVQ MM1, [EDX+ECX] ADD ECX, 8 PCMPEQB MM1, MM0 {Compare Next 8 Bytes} PMOVMSKB EAX, MM1 TEST EAX, EAX JNZ @@Matched {Exit on Match at any Position} CMP ECX, -8 {Check if Next Loop would pass String End} {$ENDIF} JL @@loop @@Last8: PCMPEQB MM0, [EDX-8] POP ECX {Original Length} PMOVMSKB EAX, MM0 TEST EAX, EAX JNZ @@Matched2 EMMS RET {Finished} @@Matched: {Set Result from 1st Match in EcX} POP EDX {Original Length} ADD ECX, EDX @@Matched2: EMMS BSF EDX, EAX LEA EAX, [EDX+ECX-7] end; //Author: John O'Harrow //Date: N/A //Optimized for: P4 //Instructionset(s): N/A //Original Name: PosJOH_SSE function PosFastcodeP4(const SubStr : AnsiString; const Str : AnsiString) : Integer; asm test eax, eax jz @NotFoundExit {Exit if SurStr = ''} test edx, edx jz @NotFound {Exit if Str = ''} mov ecx, [edx-4] {Length(Str)} cmp [eax-4], 1 {Length SubStr = 1?} je @SingleChar {Yes - Exit via CharPos} jl @NotFound {Exit if Length(SubStr) < 1} sub ecx, [eax-4] {Subtract Length(SubStr), -ve handled by CharPos} add ecx, 1 {Number of Chars to Check for 1st Char} push esi {Save Registers} push edi push ebx push ebp mov esi, eax {Start Address of SubStr} mov edi, ecx {Initial Remainder Count} mov eax, [eax] {AL = 1st Char of SubStr} mov ebp, edx {Start Address of Str} mov ebx, eax {Maintain 1st Search Char in BL} @StrLoop: mov eax, ebx {AL = 1st char of SubStr} mov ecx, edi {Remaining Length} push edx {Save Start Position} call @CharPos {Search for 1st Character} pop edx {Restore Start Position} test eax, eax {Result = 0?} jz @StrExit {Exit if 1st Character Not Found} mov ecx, [esi-4] {Length SubStr} add edx, eax {Update Start Position for Next Loop} sub edi, eax {Update Remaining Length for Next Loop} sub ecx, 1 {Remaining Characters to Compare} @StrCheck: mov al, [edx+ecx-1] {Compare Next Char of SubStr and Str} cmp al, [esi+ecx] jne @StrLoop {Different - Return to First Character Search} sub ecx, 1 jnz @StrCheck {Check each Remaining Character} mov eax, edx {All Characters Matched - Calculate Result} sub eax, ebp @StrExit: pop ebp {Restore Registers} pop ebx pop edi pop esi ret @NotFound: xor eax, eax {Return 0} @NotFoundExit: ret @SingleChar: mov al, [eax] {Search Character} @CharPos: CMP ECX, 8 JG @@NotSmall @@Small: or ecx, ecx jle @@NotFound {Exit if Length <= 0} CMP AL, [EDX] JZ @Found1 DEC ECX JZ @@NotFound CMP AL, [EDX+1] JZ @Found2 DEC ECX JZ @@NotFound CMP AL, [EDX+2] JZ @Found3 DEC ECX JZ @@NotFound CMP AL, [EDX+3] JZ @Found4 DEC ECX JZ @@NotFound CMP AL, [EDX+4] JZ @Found5 DEC ECX JZ @@NotFound CMP AL, [EDX+5] JZ @Found6 DEC ECX JZ @@NotFound CMP AL, [EDX+6] JZ @Found7 DEC ECX JZ @@NotFound CMP AL, [EDX+7] JZ @Found8 @@NotFound: XOR EAX, EAX RET @Found1: MOV EAX, 1 RET @Found2: MOV EAX, 2 RET @Found3: MOV EAX, 3 RET @Found4: MOV EAX, 4 RET @Found5: MOV EAX, 5 RET @Found6: MOV EAX, 6 RET @Found7: MOV EAX, 7 RET @Found8: MOV EAX, 8 RET @@NotSmall: MOV AH, AL ADD EDX, ECX MOVD MM0, EAX PSHUFW MM0, MM0, 0 PUSH ECX NEG ECX @@First8: MOVQ MM1, [EDX+ECX] ADD ECX, 8 PCMPEQB MM1, MM0 {Compare Next 8 Bytes} PMOVMSKB EAX, MM1 TEST EAX, EAX JNZ @@Matched {Exit on Match at any Position} CMP ECX, -8 {Check if Next Loop would pass String End} JGE @@Last8 @@Align: LEA EAX, [EDX+ECX] AND EAX, 7 SUB ECX, EAX @@Loop: {Loop Unrolled 2X} MOVQ MM1, [EDX+ECX] ADD ECX, 8 PCMPEQB MM1, MM0 {Compare Next 8 Bytes} PMOVMSKB EAX, MM1 TEST EAX, EAX JNZ @@Matched {Exit on Match at any Position} CMP ECX, -8 {Check if Next Loop would pass String End} {$IFNDEF NoUnroll} JGE @@Last8 MOVQ MM1, [EDX+ECX] ADD ECX, 8 PCMPEQB MM1, MM0 {Compare Next 8 Bytes} PMOVMSKB EAX, MM1 TEST EAX, EAX JNZ @@Matched {Exit on Match at any Position} CMP ECX, -8 {Check if Next Loop would pass String End} {$ENDIF} JL @@loop @@Last8: PCMPEQB MM0, [EDX-8] POP ECX {Original Length} PMOVMSKB EAX, MM0 TEST EAX, EAX JNZ @@Matched2 EMMS RET {Finished} @@Matched: {Set Result from 1st Match in EcX} POP EDX {Original Length} ADD ECX, EDX @@Matched2: EMMS BSF EDX, EAX LEA EAX, [EDX+ECX-7] end; //Author: John O'Harrow //Date: N/A //Optimized for: XP //Instructionset(s): N/A //Original Name: PosJOH_MMX function PosFastcodeXP(const SubStr : AnsiString; const Str : AnsiString) : Integer; asm test eax, eax jz @NotFoundExit {Exit if SurStr = ''} test edx, edx jz @NotFound {Exit if Str = ''} mov ecx, [edx-4] {Length(Str)} cmp [eax-4], 1 {Length SubStr = 1?} je @SingleChar {Yes - Exit via CharPos} jl @NotFound {Exit if Length(SubStr) < 1} sub ecx, [eax-4] {Subtract Length(SubStr), -ve handled by CharPos} add ecx, 1 {Number of Chars to Check for 1st Char} push esi {Save Registers} push edi push ebx push ebp mov esi, eax {Start Address of SubStr} mov edi, ecx {Initial Remainder Count} mov eax, [eax] {AL = 1st Char of SubStr} mov ebp, edx {Start Address of Str} mov ebx, eax {Maintain 1st Search Char in BL} @StrLoop: mov eax, ebx {AL = 1st char of SubStr} mov ecx, edi {Remaining Length} push edx {Save Start Position} call @CharPos {Search for 1st Character} pop edx {Restore Start Position} test eax, eax {Result = 0?} jz @StrExit {Exit if 1st Character Not Found} mov ecx, [esi-4] {Length SubStr} add edx, eax {Update Start Position for Next Loop} sub edi, eax {Update Remaining Length for Next Loop} sub ecx, 1 {Remaining Characters to Compare} @StrCheck: mov al, [edx+ecx-1] {Compare Next Char of SubStr and Str} cmp al, [esi+ecx] jne @StrLoop {Different - Return to First Character Search} sub ecx, 1 jnz @StrCheck {Check each Remaining Character} mov eax, edx {All Characters Matched - Calculate Result} sub eax, ebp @StrExit: pop ebp {Restore Registers} pop ebx pop edi pop esi ret @NotFound: xor eax, eax {Return 0} @NotFoundExit: ret @SingleChar: mov al, [eax] {Search Character} @CharPos: CMP ECX, 8 JG @@NotSmall @@Small: or ecx, ecx jle @@NotFound {Exit if Length <= 0} CMP AL, [EDX] JZ @Found1 DEC ECX JZ @@NotFound CMP AL, [EDX+1] JZ @Found2 DEC ECX JZ @@NotFound CMP AL, [EDX+2] JZ @Found3 DEC ECX JZ @@NotFound CMP AL, [EDX+3] JZ @Found4 DEC ECX JZ @@NotFound CMP AL, [EDX+4] JZ @Found5 DEC ECX JZ @@NotFound CMP AL, [EDX+5] JZ @Found6 DEC ECX JZ @@NotFound CMP AL, [EDX+6] JZ @Found7 DEC ECX JZ @@NotFound CMP AL, [EDX+7] JZ @Found8 @@NotFound: XOR EAX, EAX RET @Found1: MOV EAX, 1 RET @Found2: MOV EAX, 2 RET @Found3: MOV EAX, 3 RET @Found4: MOV EAX, 4 RET @Found5: MOV EAX, 5 RET @Found6: MOV EAX, 6 RET @Found7: MOV EAX, 7 RET @Found8: MOV EAX, 8 RET @@NotSmall: {Length(Str) > 8} MOV AH, AL ADD EDX, ECX MOVD MM0, EAX PUNPCKLWD MM0, MM0 PUNPCKLDQ MM0, MM0 PUSH ECX {Save Length} NEG ECX @@First8: MOVQ MM1, [EDX+ECX] ADD ECX, 8 PCMPEQB MM1, MM0 {Compare All 8 Bytes} PACKSSWB MM1, MM1 {Pack Result into 4 Bytes} MOVD EAX, MM1 TEST EAX, EAX JNZ @@Matched {Exit on Match at any Position} CMP ECX, -8 {Check if Next Loop would pass String End} JGE @@Last8 @@Align: {Align to Previous 8 Byte Boundary} LEA EAX, [EDX+ECX] AND EAX, 7 {EAX -> 0 or 4} SUB ECX, EAX @@Loop: MOVQ MM1, [EDX+ECX] ADD ECX, 8 PCMPEQB MM1, MM0 {Compare All 8 Bytes} PACKSSWB MM1, MM1 {Pack Result into 4 Bytes} MOVD EAX, MM1 TEST EAX, EAX JNZ @@Matched {Exit on Match at any Position} CMP ECX, -8 {Check if Next Loop would pass String End} {$IFNDEF NoUnroll} JGE @@Last8 MOVQ MM1, [EDX+ECX] ADD ECX, 8 PCMPEQB MM1, MM0 {Compare All 8 Bytes} PACKSSWB MM1, MM1 {Pack Result into 4 Bytes} MOVD EAX, MM1 TEST EAX, EAX JNZ @@Matched {Exit on Match at any Position} CMP ECX, -8 {Check if Next Loop would pass String End} {$ENDIF} JL @@Loop @@Last8: MOVQ MM1, [EDX-8] {Position for Last 8 Used Characters} POP EDX {Original Length} PCMPEQB MM1, MM0 {Compare All 8 Bytes} PACKSSWB MM1, MM1 {Pack Result into 4 Bytes} MOVD EAX, MM1 TEST EAX, EAX JNZ @@Matched2 {Exit on Match at any Position} EMMS RET {Finished - Not Found} @@Matched: {Set Result from 1st Match in EDX} POP EDX {Original Length} ADD EDX, ECX @@Matched2: EMMS SUB EDX, 8 {Adjust for Extra ADD ECX,8 in Loop} TEST AL, AL JNZ @@MatchDone {Match at Position 1 or 2} TEST AH, AH JNZ @@Match1 {Match at Position 3 or 4} SHR EAX, 16 TEST AL, AL JNZ @@Match2 {Match at Position 5 or 6} SHR EAX, 8 ADD EDX, 6 JMP @@MatchDone @@Match2: ADD EDX, 4 JMP @@MatchDone @@Match1: SHR EAX, 8 {AL <- AH} ADD EDX, 2 @@MatchDone: XOR EAX, 2 AND EAX, 3 {EAX <- 1 or 2} ADD EAX, EDX end; //Author: John O'Harrow //Date: N/A //Optimized for: Opteron //Instructionset(s): N/A //Original Name: PosJOH_SSE2 function PosFastcodeOpteron(const SubStr : AnsiString; const Str : AnsiString) : Integer; asm test eax, eax jz @NotFoundExit {Exit if SurStr = ''} test edx, edx jz @NotFound {Exit if Str = ''} mov ecx, [edx-4] {Length(Str)} cmp [eax-4], 1 {Length SubStr = 1?} je @SingleChar {Yes - Exit via CharPos} jl @NotFound {Exit if Length(SubStr) < 1} sub ecx, [eax-4] {Subtract Length(SubStr)} jl @NotFound {Exit if Length(SubStr) > Length(Str)} add ecx, 1 {Number of Chars to Check for 1st Char} push esi {Save Registers} push edi push ebx push ebp mov esi, eax {Start Address of SubStr} mov edi, ecx {Initial Remainder Count} mov eax, [eax] {AL = 1st Char of SubStr} mov ebp, edx {Start Address of Str} mov ebx, eax {Maintain 1st Search Char in BL} @StrLoop: mov eax, ebx {AL = 1st char of SubStr} mov ecx, edi {Remaining Length} push edx {Save Start Position} call @CharPos {Search for 1st Character} pop edx {Restore Start Position} test eax, eax {Result = 0?} jz @StrExit {Exit if 1st Character Not Found} mov ecx, [esi-4] {Length SubStr} add edx, eax {Update Start Position for Next Loop} sub edi, eax {Update Remaining Length for Next Loop} sub ecx, 1 {Remaining Characters to Compare} @StrCheck: mov al, [edx+ecx-1] {Compare Next Char of SubStr and Str} cmp al, [esi+ecx] jne @StrLoop {Different - Return to First Character Search} sub ecx, 1 jnz @StrCheck {Check each Remaining Character} mov eax, edx {All Characters Matched - Calculate Result} sub eax, ebp @StrExit: pop ebp {Restore Registers} pop ebx pop edi pop esi ret @NotFound: xor eax, eax {Return 0} @NotFoundExit: ret @SingleChar: mov al, [eax] {Search Character} @CharPos: PUSH EBX MOV EBX, EAX CMP ECX, 16 JL @@Small @@NotSmall: MOV AH, AL {Fill each Byte of XMM1 with AL} MOVD XMM1, EAX PSHUFLW XMM1, XMM1, 0 PSHUFD XMM1, XMM1, 0 @@First16: MOVUPS XMM0, [EDX] {Unaligned} PCMPEQB XMM0, XMM1 {Compare First 16 Characters} PMOVMSKB EAX, XMM0 TEST EAX, EAX JNZ @@FoundStart {Exit on any Match} CMP ECX, 32 JL @@Medium {If Length(Str) < 32, Check Remainder} @@Align: SUB ECX, 16 {Align Block Reads} PUSH ECX MOV EAX, EDX NEG EAX AND EAX, 15 ADD EDX, ECX NEG ECX ADD ECX, EAX @@Loop: MOVAPS XMM0, [EDX+ECX] {Aligned} PCMPEQB XMM0, XMM1 {Compare Next 16 Characters} PMOVMSKB EAX, XMM0 TEST EAX, EAX JNZ @@Found {Exit on any Match} ADD ECX, 16 JLE @@Loop @Remainder: POP EAX {Check Remaining Characters} ADD EDX, 16 ADD EAX, ECX {Count from Last Loop End Position} JMP DWORD PTR [@@JumpTable2-ECX*4] @@NullString: XOR EAX, EAX {Result = 0} RET @@FoundStart: BSF EAX, EAX {Get Set Bit} POP EBX ADD EAX, 1 {Set Result} RET @@Found: POP EDX BSF EAX, EAX {Get Set Bit} ADD EDX, ECX POP EBX LEA EAX, [EAX+EDX+1] {Set Result} RET @@Medium: ADD EDX, ECX {End of String} MOV EAX, 16 {Count from 16} JMP DWORD PTR [@@JumpTable1-64-ECX*4] @@Small: ADD EDX, ECX {End of String} XOR EAX, EAX {Count from 0} JMP DWORD PTR [@@JumpTable1-ECX*4] nop; nop; nop {Aligb Jump Tables} @@JumpTable1: DD @@NotFound, @@01, @@02, @@03, @@04, @@05, @@06, @@07 DD @@08, @@09, @@10, @@11, @@12, @@13, @@14, @@15, @@16 @@JumpTable2: DD @@16, @@15, @@14, @@13, @@12, @@11, @@10, @@09, @@08 DD @@07, @@06, @@05, @@04, @@03, @@02, @@01, @@NotFound @@16: ADD EAX, 1 CMP BL, [EDX-16] JE @@Done @@15: ADD EAX, 1 CMP BL, [EDX-15] JE @@Done @@14: ADD EAX, 1 CMP BL, [EDX-14] JE @@Done @@13: ADD EAX, 1 CMP BL, [EDX-13] JE @@Done @@12: ADD EAX, 1 CMP BL, [EDX-12] JE @@Done @@11: ADD EAX, 1 CMP BL, [EDX-11] JE @@Done @@10: ADD EAX, 1 CMP BL, [EDX-10] JE @@Done @@09: ADD EAX, 1 CMP BL, [EDX-9] JE @@Done @@08: ADD EAX, 1 CMP BL, [EDX-8] JE @@Done @@07: ADD EAX, 1 CMP BL, [EDX-7] JE @@Done @@06: ADD EAX, 1 CMP BL, [EDX-6] JE @@Done @@05: ADD EAX, 1 CMP BL, [EDX-5] JE @@Done @@04: ADD EAX, 1 CMP BL, [EDX-4] JE @@Done @@03: ADD EAX, 1 CMP BL, [EDX-3] JE @@Done @@02: ADD EAX, 1 CMP BL, [EDX-2] JE @@Done @@01: ADD EAX, 1 CMP BL, [EDX-1] JE @@Done @@NotFound: XOR EAX, EAX @@Done: POP EBX end; //Author: John O'Harrow //Date: N/A //Optimized for: RTL //Instructionset(s): IA32 //Original Name: PosJOH_IA32 function PosFastcodeRTL(const SubStr : AnsiString; const Str : AnsiString) : Integer; asm test eax, eax jz @NotFoundExit {Exit if SubStr = ''} test edx, edx jz @NotFound {Exit if Str = ''} mov ecx, [edx-4] {Length(Str)} cmp [eax-4], 1 {Length SubStr = 1?} je @SingleChar {Yes - Exit via CharPos} jl @NotFound {Exit if Length(SubStr) < 1} sub ecx, [eax-4] {Subtract Length(SubStr), -ve handled by CharPos} add ecx, 1 {Number of Chars to Check for 1st Char} push esi {Save Registers} push edi push ebx push ebp mov ebx, [eax] {BL = 1st Char of SubStr} mov esi, eax {Start Address of SubStr} mov edi, ecx {Initial Remainder Count} mov ebp, edx {Start Address of Str} @StrLoop: mov eax, ebx {AL = 1st char of SubStr for next Search} mov ecx, edi {Remaining Length} push edx {Save Start Position} call @CharPos {Search for 1st Character} pop edx {Restore Start Position} jz @StrExit {Exit with Zero Result if 1st Char Not Found} mov ecx, [esi-4] {Length SubStr} add edx, eax {Update Start Position for Next Loop} sub edi, eax {Update Remaining Length for Next Loop} sub ecx, 1 {Remaining Characters to Compare} @StrCheck: mov al, [edx+ecx-1] {Compare Next Char of SubStr and Str} cmp al, [esi+ecx] jne @StrLoop {Different - Return to First Character Search} sub ecx, 1 jg @StrCheck {Check each Remaining Character} mov eax, edx {All Characters Matched - Calculate Result} sub eax, ebp @StrExit: pop ebp {Restore Registers} pop ebx pop edi pop esi ret @NotFound: xor eax, eax {Return 0} @NotFoundExit: ret @SingleChar: mov al, [eax] {Search Character} {Return Position of Character AL within a String of Length ECX starting} {at Address EDX. If Found, Return Index in EAX and Clear Zero Flag, } {otherwise Return 0 in EAX and Set Zero Flag. Changes EAX, ECX and EDX} @CharPos: push ecx {Save Length} neg ecx cmp ecx, -4 jle @NotSmall {Length >= 4} or ecx, ecx jge @CharNotFound {Exit if Length <= 0} cmp al, [edx] {Check 1st Char} je @Found add ecx, 1 jz @CharNotFound cmp al, [edx+1] {Check 2nd Char} je @Found add ecx, 1 jz @CharNotFound cmp al, [edx+2] {Check 3rd Char} je @Found jmp @CharNotFound @NotSmall: sub edx, ecx {End of String} @Loop: cmp al, [edx+ecx] {Compare Next 4 Characters} je @Found cmp al, [edx+ecx+1] je @Found2 cmp al, [edx+ecx+2] je @Found3 cmp al, [edx+ecx+3] je @Found4 add ecx, 4 {Next Character Position} and ecx, -4 {Prevent Read Past Last Character} jnz @Loop {Loop until all Characters Compared} @CharNotFound: pop ecx {Restore Stack} xor eax, eax {Set Result to 0 and Set Zero Flag} ret {Finished} @Found4: add ecx, 1 @Found3: add ecx, 1 @Found2: add ecx, 1 @Found: add ecx, 1 pop eax add eax, ecx {Set Result and Clear Zero Flag} end; //Author: Aleksandr Sharahov //Date: N/A //Optimized for: Pascal //Instructionset(s): N/A //Original Name: PosShaPas function PosFastcodePascal(const SubStr: AnsiString; const Str: AnsiString): Integer; var len, lenSub: integer; ch: char; p, pSub, pStart, pEnd: pchar; label Ret, Ret0, Ret1, Next0, Next1; begin; p:=pointer(Str); pSub:=pointer(SubStr); //if need pure Pascal uncomment this paragraph //and comment out the next 3 paragraphs { len:=length(Str); lenSub:=length(SubStr); pEnd:=p+len; pStart:=p; pEnd:=pEnd-lenSub; if (len<=0) or (lenSub<=0) or (p>pEnd) then begin; Result:=0; exit; end; } if (p=nil) or (pSub=nil) then begin; Result:=0; exit; end; len:=pinteger(p-4)^; lenSub:=pinteger(pSub-4)^; if (lenpEnd; Result:=0; exit; end; repeat; if ch=p[0] then begin; len:=lensub; repeat; if psub[len-1]<>p[len-1] then goto Next0; if psub[len-2]<>p[len-2] then goto Next0; len:=len-2; until len<2; goto Ret0; Next0:end; if ch=p[1] then begin; len:=lensub; repeat; if psub[len-1]<>p[len] then goto Next1; if psub[len-2]<>p[len-1] then goto Next1; len:=len-2; until len<2; goto Ret1; Next1:end; p:=p+2; until p>pEnd; Result:=0; exit; Ret1: inc(pEnd); p:=p+2; if p<=pEnd then goto Ret; Result:=0; exit; Ret0: inc(p); Ret: Result:=p-pStart; end; //Author: John O'Harrow //Date: N/A //Optimized for: Blended //Instructionset(s): N/A //Original Name: PosJOH_MMX function PosFastcodeBlended(const SubStr : AnsiString; const Str : AnsiString) : Integer; asm test eax, eax jz @NotFoundExit {Exit if SurStr = ''} test edx, edx jz @NotFound {Exit if Str = ''} mov ecx, [edx-4] {Length(Str)} cmp [eax-4], 1 {Length SubStr = 1?} je @SingleChar {Yes - Exit via CharPos} jl @NotFound {Exit if Length(SubStr) < 1} sub ecx, [eax-4] {Subtract Length(SubStr), -ve handled by CharPos} add ecx, 1 {Number of Chars to Check for 1st Char} push esi {Save Registers} push edi push ebx push ebp mov esi, eax {Start Address of SubStr} mov edi, ecx {Initial Remainder Count} mov eax, [eax] {AL = 1st Char of SubStr} mov ebp, edx {Start Address of Str} mov ebx, eax {Maintain 1st Search Char in BL} @StrLoop: mov eax, ebx {AL = 1st char of SubStr} mov ecx, edi {Remaining Length} push edx {Save Start Position} call @CharPos {Search for 1st Character} pop edx {Restore Start Position} test eax, eax {Result = 0?} jz @StrExit {Exit if 1st Character Not Found} mov ecx, [esi-4] {Length SubStr} add edx, eax {Update Start Position for Next Loop} sub edi, eax {Update Remaining Length for Next Loop} sub ecx, 1 {Remaining Characters to Compare} @StrCheck: mov al, [edx+ecx-1] {Compare Next Char of SubStr and Str} cmp al, [esi+ecx] jne @StrLoop {Different - Return to First Character Search} sub ecx, 1 jnz @StrCheck {Check each Remaining Character} mov eax, edx {All Characters Matched - Calculate Result} sub eax, ebp @StrExit: pop ebp {Restore Registers} pop ebx pop edi pop esi ret @NotFound: xor eax, eax {Return 0} @NotFoundExit: ret @SingleChar: mov al, [eax] {Search Character} @CharPos: CMP ECX, 8 JG @@NotSmall @@Small: or ecx, ecx jle @@NotFound {Exit if Length <= 0} CMP AL, [EDX] JZ @Found1 DEC ECX JZ @@NotFound CMP AL, [EDX+1] JZ @Found2 DEC ECX JZ @@NotFound CMP AL, [EDX+2] JZ @Found3 DEC ECX JZ @@NotFound CMP AL, [EDX+3] JZ @Found4 DEC ECX JZ @@NotFound CMP AL, [EDX+4] JZ @Found5 DEC ECX JZ @@NotFound CMP AL, [EDX+5] JZ @Found6 DEC ECX JZ @@NotFound CMP AL, [EDX+6] JZ @Found7 DEC ECX JZ @@NotFound CMP AL, [EDX+7] JZ @Found8 @@NotFound: XOR EAX, EAX RET @Found1: MOV EAX, 1 RET @Found2: MOV EAX, 2 RET @Found3: MOV EAX, 3 RET @Found4: MOV EAX, 4 RET @Found5: MOV EAX, 5 RET @Found6: MOV EAX, 6 RET @Found7: MOV EAX, 7 RET @Found8: MOV EAX, 8 RET @@NotSmall: {Length(Str) > 8} MOV AH, AL ADD EDX, ECX MOVD MM0, EAX PUNPCKLWD MM0, MM0 PUNPCKLDQ MM0, MM0 PUSH ECX {Save Length} NEG ECX @@First8: MOVQ MM1, [EDX+ECX] ADD ECX, 8 PCMPEQB MM1, MM0 {Compare All 8 Bytes} PACKSSWB MM1, MM1 {Pack Result into 4 Bytes} MOVD EAX, MM1 TEST EAX, EAX JNZ @@Matched {Exit on Match at any Position} CMP ECX, -8 {Check if Next Loop would pass String End} JGE @@Last8 @@Align: {Align to Previous 8 Byte Boundary} LEA EAX, [EDX+ECX] AND EAX, 7 {EAX -> 0 or 4} SUB ECX, EAX @@Loop: MOVQ MM1, [EDX+ECX] ADD ECX, 8 PCMPEQB MM1, MM0 {Compare All 8 Bytes} PACKSSWB MM1, MM1 {Pack Result into 4 Bytes} MOVD EAX, MM1 TEST EAX, EAX JNZ @@Matched {Exit on Match at any Position} CMP ECX, -8 {Check if Next Loop would pass String End} {$IFNDEF NoUnroll} JGE @@Last8 MOVQ MM1, [EDX+ECX] ADD ECX, 8 PCMPEQB MM1, MM0 {Compare All 8 Bytes} PACKSSWB MM1, MM1 {Pack Result into 4 Bytes} MOVD EAX, MM1 TEST EAX, EAX JNZ @@Matched {Exit on Match at any Position} CMP ECX, -8 {Check if Next Loop would pass String End} {$ENDIF} JL @@Loop @@Last8: MOVQ MM1, [EDX-8] {Position for Last 8 Used Characters} POP EDX {Original Length} PCMPEQB MM1, MM0 {Compare All 8 Bytes} PACKSSWB MM1, MM1 {Pack Result into 4 Bytes} MOVD EAX, MM1 TEST EAX, EAX JNZ @@Matched2 {Exit on Match at any Position} EMMS RET {Finished - Not Found} @@Matched: {Set Result from 1st Match in EDX} POP EDX {Original Length} ADD EDX, ECX @@Matched2: EMMS SUB EDX, 8 {Adjust for Extra ADD ECX,8 in Loop} TEST AL, AL JNZ @@MatchDone {Match at Position 1 or 2} TEST AH, AH JNZ @@Match1 {Match at Position 3 or 4} SHR EAX, 16 TEST AL, AL JNZ @@Match2 {Match at Position 5 or 6} SHR EAX, 8 ADD EDX, 6 JMP @@MatchDone @@Match2: ADD EDX, 4 JMP @@MatchDone @@Match1: SHR EAX, 8 {AL <- AH} ADD EDX, 2 @@MatchDone: XOR EAX, 2 AND EAX, 3 {EAX <- 1 or 2} ADD EAX, EDX end; //////////////////////////////////////////////////////////////////////////////// type TFunc = function (const substr : AnsiString; const s : AnsiString ) : Integer; const NewFuncs: array[TCPUType] of TFunc = ( PosFastcodeP3, PosFastcodeP4, PosFastcodeXP, PosFastcodeOpteron, PosFastcodeBlended,//PosFastcodePrescott, PosFastcodeBlended ); function PosStub(const substr : AnsiString; const s : AnsiString ) : Integer; begin Result := Pos(substr, s); end; var CodeHook: TCodeHook; OldFunc_: TFunc; // OldFunc_ & NewFunc_ have same type against overloads ambiguties OldFunc: Pointer absolute OldFunc_; OldFuncSave: Pointer; OldProtect: DWORD; NewFunc_: TFunc; NewFunc: Pointer absolute NewFunc_; initialization begin /// PosStub('', ''); OldFunc_ := PosStub; OldFuncSave := OldFunc; if not VirtualProtect(OldFunc, 3 + SizeOf(TJump), PAGE_READWRITE, OldProtect) then begin /// RaiseLastOSError; end; Inc(Integer(OldFunc), 3); Assert(PBYTE(OldFunc)^=$E8);// asserts we have a call to _LStrPos Inc(Integer(OldFunc)); OldFunc := Pointer(Integer(OldFunc) + SizeOf(Pointer) + PInteger(OldFunc)^); // calculate true address of _LStrPos if not VirtualProtect(OldFuncSave, 3 + SizeOf(TJump), OldProtect, nil) then begin /// RaiseLastOSError; end; if not FlushInstructionCache(GetCurrentProcess, OldFuncSave, 3 + SizeOf(TJump)) then begin RaiseLastOSError; end; NewFunc_ := NewFuncs[GetCPUType]; CodeHook := TCodeHook.Create(OldFunc, NewFunc); /// Pos('', ''); end; end.