Here's an updated version of my code with an additional procedure to get the length of a string with the ability to specify a maximum value and a mid procedure.
Unfortunately the mid procedure is only faster for longer strings (at least on OSX)
Code: Select all
; Procedure.i UCS2Len(*UCS2String)
; Procedure.i UCS2LenM(*UCS2String, MaxLen.i = -1)
; Procedure.s UCS2Mid(*UCS2String, StartPos.i, Length.i = -1)
CompilerIf #PB_Compiler_Processor = #PB_Processor_x64
; x64 Macro definitions
Macro UCS2Len_MOV
!movdqa xmm0, [rdx]
EndMacro
Macro UCS2Len_ADD
!lea rdx, [rdx + 16]
EndMacro
Macro UCS2Len_CMP
!cmp rdx, r8
EndMacro
CompilerElse
; x86 Macro definitions
Macro UCS2Len_MOV
!movdqa xmm0, [edx]
EndMacro
Macro UCS2Len_ADD
!lea edx, [edx + 16]
EndMacro
Macro UCS2Len_CMP
!cmp edx, ebx
EndMacro
CompilerEndIf
Procedure.i UCS2Len(*UCS2String)
; init and check word boundary alignment
CompilerIf #PB_Compiler_Processor = #PB_Processor_x64
!mov rdx, [p.p_UCS2String]
!mov ecx, edx
!and rdx, -16
CompilerElse
!mov edx, [p.p_UCS2String]
!mov ecx, edx
!and edx, -16
CompilerEndIf
UCS2Len_MOV
!pxor xmm1, xmm1
!sub ecx, edx
!test ecx, 1
!jnz ucs2len1
; handle strings aligned to word boundary
!pcmpeqw xmm0, xmm1
!pmovmskb eax, xmm0
!shr eax, cl
!shl eax, cl
UCS2Len_ADD
!and eax, eax
!jnz ucs2len3
!ucs2len0:
UCS2Len_MOV
!pcmpeqw xmm0, xmm1
!pmovmskb eax, xmm0
UCS2Len_ADD
!and eax, eax
!jz ucs2len0
!jmp ucs2len3
; handle strings not aligned to word boundary
!ucs2len1:
!movdqa xmm3, xmm0
!pslldq xmm0, 1
!inc cl
!pcmpeqw xmm0, xmm1
!pmovmskb eax, xmm0
!shr eax, cl
!shl eax, cl
UCS2Len_ADD
!and eax, eax
!jnz ucs2len3
!ucs2len2:
UCS2Len_MOV
!movdqa xmm2, xmm0
!pslldq xmm0, 1
!psrldq xmm3, 15
!por xmm0, xmm3
!pcmpeqw xmm0, xmm1
!pmovmskb eax, xmm0
UCS2Len_ADD
!movdqa xmm3, xmm2
!and eax, eax
!jz ucs2len2
; exit procedure
!ucs2len3:
!bsf ecx, eax
!lea eax, [ecx + edx - 16]
!sub eax, [p.p_UCS2String]
!shr eax, 1
ProcedureReturn
EndProcedure
Procedure.i UCS2LenM(*UCS2String, MaxLen.i = -1)
; init and check word boundary alignment
CompilerIf #PB_Compiler_Processor = #PB_Processor_x64
!mov rdx, [p.p_UCS2String]
!mov rax, [p.v_MaxLen]
!lea r8, [rdx + rax * 2]
!sar rax, 63
!or r8, rax
!mov ecx, edx
!and rdx, -16
CompilerElse
!mov edx, [p.p_UCS2String]
!mov eax, [p.v_MaxLen]
!push ebx
!lea ebx, [edx + eax * 2]
!sar eax, 31
!or ebx, eax
!mov ecx, edx
!and edx, -16
CompilerEndIf
UCS2Len_MOV
!pxor xmm1, xmm1
!sub ecx, edx
!test ecx, 1
!jnz ucs2lenm1
; handle strings aligned to word boundary
!pcmpeqw xmm0, xmm1
!pmovmskb eax, xmm0
!shr eax, cl
!shl eax, cl
UCS2Len_ADD
!and eax, eax
!jnz ucs2lenm3
!ucs2lenm0:
UCS2Len_MOV
!pcmpeqw xmm0, xmm1
!pmovmskb eax, xmm0
UCS2Len_CMP
UCS2Len_ADD
!ja ucs2lenm3
!and eax, eax
!jz ucs2lenm0
!jmp ucs2lenm3
; handle strings not aligned to word boundary
!ucs2lenm1:
!movdqa xmm3, xmm0
!pslldq xmm0, 1
!inc cl
!pcmpeqw xmm0, xmm1
!pmovmskb eax, xmm0
!shr eax, cl
!shl eax, cl
UCS2Len_ADD
!and eax, eax
!jnz ucs2lenm3
!ucs2lenm2:
UCS2Len_MOV
!movdqa xmm2, xmm0
!pslldq xmm0, 1
!psrldq xmm3, 15
!por xmm0, xmm3
!pcmpeqw xmm0, xmm1
!pmovmskb eax, xmm0
UCS2Len_CMP
UCS2Len_ADD
!ja ucs2lenm3
!movdqa xmm3, xmm2
!and eax, eax
!jz ucs2lenm2
; exit procedure
!ucs2lenm3:
!bsf ecx, eax
CompilerIf #PB_Compiler_Processor = #PB_Processor_x64
!lea rax, [rcx + rdx - 16]
!cmp rax, r8
!cmova rax, r8
CompilerElse
!lea eax, [ecx + edx - 16]
!cmp eax, ebx
!cmova eax, ebx
!pop ebx
CompilerEndIf
!sub eax, [p.p_UCS2String]
!shr eax, 1
ProcedureReturn
EndProcedure
Procedure.s UCS2Mid(*UCS2String, StartPos.i, Length.i = -1)
If StartPos > 0
ProcedureReturn PeekS(*UCS2String + UCS2LenM(*UCS2String, StartPos - 1) << 1, Length)
Else
ProcedureReturn PeekS(*UCS2String + UCS2LenM(*UCS2String, 0) << 1, Length)
EndIf
EndProcedure