Code: Select all
mystr.s = "one two"
mystr = ReplaceString(mystr, " ", " ")
Debug(mystr)To solve this it seems a loop is required?? first to check if there are any double-spaces left, and then replacing if required:
Code: Select all
Procedure.s StripDualSpaces_ReplaceString(sStr.s) ;Win+Linux+Mac, 32+64, Unicode+Ascii
Repeat
If FindString(sStr, " ") = 0: Break: EndIf ;ReplaceString(" " with " ") doesnt work with multiple spaces,
sStr.s = ReplaceString(sStr, " ", " ") ;so we need to repeatedly call it until there are no more.
ForEver
ProcedureReturn sStr
EndProcedureBut i need to do this lottttts and there should be a more efficient way than looping like that, so i wrote what i guess is a typical(?) "copy backwards valid chars from ptrB to ptrA", incrementing both pointers as necessary (and for speed reasons this modifies the string you give it - it doesn't return a separate string):
Code: Select all
DisableDebugger
Procedure StripDualSpacesPBptr(*pstr) ;Win+Linux+Mac, 32+64, Unicode+Ascii
Protected *pbyteA.Character, *pbyteB.Character, SpaceFlag.i, Increment.i
*pbyteA = *pstr
*pbyteB = *pbyteA
CompilerIf #PB_Compiler_Unicode = 1
Increment = 2
CompilerElse
Increment = 1
CompilerEndIf
Repeat
If *pbyteB\c = 0 ;Char=Null
Break
ElseIf *pbyteB\c = 32 ;Char=Space
If SpaceFlag = 0
SpaceFlag = 1 ;(1st space)
*pbyteA\c = *pbyteB\c
*pbyteA+Increment: *pbyteB+Increment
Else ;(2nd+ space)
*pbyteB+Increment
EndIf
Else ;Char=Other (not space or null)
SpaceFlag = 0
*pbyteA\c = *pbyteB\c
*pbyteA+Increment: *pbyteB+Increment
EndIf
ForEver
*pbyteA\c = 0
EndProcedure
EnableDebuggerI get really good performance out of that! but being in an assembly mood this week i thought i'd try my luck with making an asm version, which was fun as im enjoying learning
Code: Select all
CompilerIf #PB_Compiler_Processor = #PB_Processor_x86
Macro rax : eax : EndMacro ;thanks again wilbert for these helpers!
Macro rbx : ebx : EndMacro
Macro rcx : ecx : EndMacro
CompilerEndIf
DisableDebugger ;Win+Linux+Mac, 32+64, Unicode+Ascii
CompilerIf #PB_Compiler_Unicode
Procedure StripDualSpacesAsm(*pstr)
EnableASM
mov rax, *pstr
push rbx
mov rbx, rax
!nextbyte:
mov dx, [rbx]
! cmp dx, 0 ;Null?
! je endproc
! cmp dx, 32 ;Space?
! jne normalchar ;Other
!spacechar:
cmp rcx, 0
! jne dualspace
!firstspace:
mov rcx, 1
mov [rax], dx
add rax, 2
add rbx, 2
! jmp nextbyte
!dualspace:
add rbx, 2
! jmp nextbyte
!normalchar:
XOr rcx, rcx
mov [rax], dx
add rax, 2
add rbx, 2
! jmp nextbyte
!endproc:
mov [rax], word 0
pop rbx
DisableASM
EndProcedure
CompilerElse
Procedure StripDualSpacesAsm(*pstr)
EnableASM
mov rax, *pstr
push rbx
mov rbx, rax
!nextbyte:
mov dl, [rbx]
! cmp dl, 0 ;Null?
! je endproc
! cmp dl, 32 ;Space?
! jne normalchar ;Other
!spacechar:
cmp rcx, 0
! jne dualspace
!firstspace:
mov rcx, 1
mov [rax], dl
inc rax
inc rbx
! jmp nextbyte
!dualspace:
inc rbx
! jmp nextbyte
!normalchar:
XOr rcx, rcx
mov [rax], dl
inc rax
inc rbx
! jmp nextbyte
!endproc:
mov [rax], byte 0
pop rbx
DisableASM
EndProcedure
CompilerEndIf
EnableDebuggerTiming tests, including ones posted later in this thread ...
My short string is "one two three four five six seven eight nine end" (53% space chars), which is 84 bytes.
My long string is 2000 copies of that (for a total string len of 168kb).
Then there are the Unicode versions also!
I try 5 million calls to the short buffer, and 10 thousand calls to the long one.
Code: Select all
TimePB=Native PB, FindString+ReplaceString loop
TimeK1=Keya's PB, BytePtr-based
TimeK2=Keya's asm, BytePtr-based w/ 8bit ops
TimeW1=wilbert's asm, BytePtr-based w/ 32bit ops
TimeR1=Rashad's PB, StringField-based
TimeR2=Rashad's PB, Peek-based
TimeI1=IdeasVacuum's PB, StringField-based
OS=Linux-64 Char=Ascii String len=84, trying 5000000 calls...
TimeR1=38568
TimeR2=33397
TimeI1=20884
TimePB=6655
TimeK1=1145
TimeK2=643
TimeW1=422
OS=Linux-64 Char=Unicode String len=84, trying 5000000 calls...
TimeR1=99061
TimeR2=50534
TimeI1=22502
TimePB=8557
TimeK1=1259
TimeK2=791
TimeW1=582
OS=Linux-64 Char=Ascii String len=168000, trying 10000 calls...
TimeR1=184175000
TimeR2=53705000
TimeI1=51403000
TimePB=21622
TimeK1=3954
TimeK2=2138
TimeW1=1235
OS=Linux-64 Char=Unicode String len=168000, trying 10000 calls...
TimeR1=531260000
TimeR2=175150000
TimeI1=60638000
TimePB=24686
TimeK1=4657
TimeK2=2709
TimeW1=1841
---
OS=Linux-32 Char=Ascii String len=84, trying 5000000 calls...
TimeR2=39688
TimeR1=35749
TimeI1=26846
TimePB=9655
TimeK1=1121
TimeK2=564
TimeW1=402
OS=Linux-32 Char=Unicode String len=84, trying 5000000 calls...
TimeR1=99186
TimeR2=62115
TimeI1=26096
TimePB=9200
TimeK1=1342
TimeK2=728
TimeW1=651
OS=Linux-32 Char=Ascii String len=168000, trying 10000 calls...
TimeR1=148315000
TimeI1=65492000
TimeR2=58475000
TimePB=22914
TimeK1=3881
TimeK2=1725
TimeW1=1144
OS=Linux-32 Char=Unicode String len=168000, trying 10000 calls...
TimeR1=511220000
TimeR2=175540000
TimeI1=57704000
TimePB=25697
TimeK1=4630
TimeK2=2207
TimeW1=1883
---
OS=OSX-64 Char=Ascii String len=84, trying 5000000 calls...
TimeR2=60728
TimeR1=47592
TimeI1=35529
TimePB=8584
TimeK1=1459
TimeK2=833
TimeW1=749
OS=OSX-64 Char=Unicode String len=84, trying 5000000 calls...
TimeR1=129911
TimeR2=96344
TimeI1=42651
TimePB=11463
TimeK1=2040
TimeK2=1503
TimeW1=1359
OS=OSX-64 Char=Ascii String len=168000, trying 10000 calls...
TimeR1=185485000
TimeR2=151470000
TimeI1=70608000
TimePB=23684
TimeK1=5089
TimeK2=2802
TimeW1=2286
OS=OSX-64 Char=Unicode String len=168000, trying 10000 calls...
TimeR1=582020000
TimeR2=353820000
TimeI1=80147000
TimePB=32381
TimeK1=6991
TimeK2=4804
TimeW1=4315
---
OS=Win-32 Char=Ascii String len=84, trying 5000000 calls...
TimeR2=67536
TimeR1=59299
TimeI1=30923
TimePB=22804
TimeK1=1552
TimeK2=1024
TimeW1=765
OS=Win-32 Char=Unicode String len=84, trying 5000000 calls...
TimeR1=117095
TimeR2=93698
TimeI1=32484
TimePB=11562
TimeK1=2054
TimeK2=1555
TimeW1=1457
OS=Win-32 Char=Ascii String len=168000, trying 10000 calls...
TimeR1=264375000
TimeR2=231745000
TimeI1=58647000
TimePB=84465 ;anomaly
TimeK1=5261
TimeK2=3610
TimeW1=2508
OS=Win-32 Char=Unicode String len=168000, trying 10000 calls...
TimeR1=570505000
TimeR2=375740000
TimeI1=75652000
TimePB=34182
TimeK1=7585
TimeK2=5629
TimeW1=4787
---
OS=Win-64 Char=Ascii String len=84, trying 5000000 calls...
TimeR2=71279
TimeR1=62363
TimeI1=34368
TimePB=9304
TimeK1=1475
TimeK2=991
TimeW1=699
OS=Win-64 Char=Unicode String len=84, trying 5000000 calls...
TimeR1=136522
TimeR2=96323
TimeI1=38987
TimePB=11055
TimeK1=1927
TimeK2=1628
TimeW1=1382
OS=Win-64 Char=Ascii String len=168000, trying 10000 calls...
TimeR1=278125000
TimeR2=225950000
TimeI1=77789000
TimePB=25608
TimeK1=5100
TimeK2=3363
TimeW1=2333
OS=Win-64 Char=Unicode String len=168000, trying 10000 calls...
TimeR1=626865000
TimeR2=345905000
TimeI1=87042000
TimePB=28811
TimeK1=6845
TimeK2=5005
TimeW1=4062



