Since results might be different on different platforms, it would be great if someone wants to test it.
On my iMac with OSX (Core2Duo) the PB version is faster for short strings and SSE2 for longer strings.
Edit:
I posted an updated code a few posts below.
It's faster and also contains a Mid procedure.
http://www.purebasic.fr/english/viewtop ... 25#p451425
Code: Select all
Procedure.i UCS2Len(*UCS2String)
CompilerIf #PB_Compiler_Processor = #PB_Processor_x64
!mov rdx, [p.p_UCS2String]
!mov ecx, edx
!and rdx, -16
!movdqa xmm0, [rdx]
!push rbx
CompilerElse
!mov edx, [p.p_UCS2String]
!mov ecx, edx
!and edx, -16
!movdqa xmm0, [edx]
!push ebx
CompilerEndIf
!pxor xmm1, xmm1
!pcmpeqb xmm0, xmm1
!pmovmskb eax, xmm0
!mov ebx, 0xaaaaaaaa
!sub ecx, edx
!shr eax, cl
!add ecx, 16
!shl eax, cl
!and ecx, 1
!shr ebx, cl
!jmp ucs2len1
!ucs2len0:
CompilerIf #PB_Compiler_Processor = #PB_Processor_x64
!add rdx, 16
!movdqa xmm0, [rdx]
CompilerElse
!add edx, 16
!movdqa xmm0, [edx]
CompilerEndIf
!pcmpeqb xmm0, xmm1
!pmovmskb ecx, xmm0
!shr eax, 16
!shl ecx, 16
!or eax, ecx
!ucs2len1:
!lea ecx, [eax + eax]
!and ecx, eax
!and ecx, ebx
!jz ucs2len0
CompilerIf #PB_Compiler_Processor = #PB_Processor_x64
!pop rbx
CompilerElse
!pop ebx
CompilerEndIf
!bsf eax, ecx
!lea eax, [eax + edx - 16]
!sub eax, [p.p_UCS2String]
!shr eax, 1
ProcedureReturn
EndProcedure
Code: Select all
S.s = "Unicode string length, SSE2 vs PB" + #CRLF$ + #CRLF$
t1 = ElapsedMilliseconds()
For i = 1 To 10000000
c = UCS2Len(@"Short")
Next
t2 = ElapsedMilliseconds()
For i = 1 To 10000000
c = MemoryStringLength(@"Short")
Next
t3 = ElapsedMilliseconds()
S + "Short string : " + Str(t2 - t1) + "(SSE2) vs " + Str(t3 - t2) + "(PB)" + #CRLF$
t1 = ElapsedMilliseconds()
For i = 1 To 10000000
c = UCS2Len(@"A bit longer string to test with")
Next
t2 = ElapsedMilliseconds()
For i = 1 To 10000000
c = MemoryStringLength(@"A bit longer string to test with")
Next
t3 = ElapsedMilliseconds()
S + "Longer string : " + Str(t2 - t1) + "(SSE2) vs " + Str(t3 - t2) + "(PB)" + #CRLF$
SpaceString.s = Space(20000)
t1 = ElapsedMilliseconds()
For i = 1 To 100000
c = UCS2Len(@SpaceString)
Next
t2 = ElapsedMilliseconds()
For i = 1 To 100000
c = MemoryStringLength(@SpaceString)
Next
t3 = ElapsedMilliseconds()
S + "Very long string : " + Str(t2 - t1) + "(SSE2) vs " + Str(t3 - t2) + "(PB)" + #CRLF$
MessageRequester("Test results", S)