Many thanks in advance! Btw StrLen_() does not work! Is it just only a C/C++ keyword or a pure API command?
[Edit]
Seems StrLen() is only a C/C++ keyword from the StdLib and not any API function!? I have searched for a StrLen() ASM replacement (because i want write an own lib without the use of any other lib!)
this should be the fastest StrLen() routine!? Can someone help me how to get it work!? (or if you have any faster routine, please share it here) thanks!
Normal x86 code i found (by lingo12):
Code: Select all
xor     edx,edx            ; edx=0
C2_loop:                                  ;
mov     eax, [esi+edx]     ; get a dword (buffer is aligned)
lea     ecx, [eax-1010101h];sub 1 from each byte in eax
add     edx, 4             ; ready for next dword
And     ecx, 80808080h     ; test  sign
jz      C2_loop            ; if not loop again
                                          ;
test    eax, 000000FFh     ; is al zero?
jz      C2_minus4          ;
test    eax, 0000FF00h     ; is ah zero?
jz      C2_minus3          ;
test    eax, 00FF0000h     ; is zero?
jz      C2_minus2          ;
test    eax, 0FF000000h    ; is zero?
jnz     C2_loop            ; if not zeroes loop again
lea     eax, [edx-1]       ; eax= length of string
ret                        ;        
C2_minus2:                                ;
lea     eax, [edx-2]       ; eax= length of string
ret                        ;
C2_minus3:                                ;
lea     eax, [edx-3]       ; eax= length of string
ret                        ;
C2_minus4:                                ;
lea     eax, [edx-4]       ; eax= length of string
ret                        ;
Code: Select all
;edx=string start
 lea     ecx,[edx+4]             ;load and increment pointer
 mov     ebx,[edx]               ;read first 4 bytes
 lea     edx,[edx+7]             ;pointer+7 used in the end
._1:  lea     eax,[ebx-01010101h]     ;subtract 1 from each byte
  xor     ebx,-1                  ;invert all bytes
   and     eax,ebx                 ;and these two
      mov     ebx,[ecx]               ;read next 4 bytes
  add     ecx,4                   ;increment pointer
  and     eax,80808080h           ;test all sign bits
 jz      ._1                     ;no zero bytes, continue loop
       test    eax,00008080h           ;test first two bytes
       jnz     ._2
 shr     eax,16                  ;not in the first 2 bytes
   add     ecx,2
._2:       shl     al,1                    ;use carry flag to avoid a branch
   sbb     ecx,edx                 ;compute length
     lea     edx,[edx-7]             ;restore pointer 
Code: Select all
; MMX version by Ryan Mack
; Roughly 13 + 3n + BRANCH clocks on a P-II
const unsigned __int64 STRINGTBL[8] = {0, 0xff,
        0xffff, 0xffffff, 0xffffffff, 0xffffffffff,
        0xffffffffffff, 0xffffffffffffff}
/* ... */
    pxor     mm1, mm1
    mov      ecx, eax
    mov      edx, eax
    and      ecx, -8
    and      eax, 7
    movq     mm0, [ecx]
    por      mm0, [STRINGTBL+eax*8]
MAIN:
    add      ecx, 8
    pcmpeqb  mm0, mm1
    packsswb mm0, mm0
    movd     eax, mm0
    movq     mm0, [ecx]
    test     eax, eax
    jz       MAIN
    bsf      eax, eax
    shr      eax, 2
    lea      ecx, [ecx+eax-8]
    sub      ecx, edx



