fvillanova wrote: ↑Tue May 16, 2023 4:58 am I appreciate any contribution to speeding up the CreateS() procedure.
Code: Select all
Procedure.s CreateS(*input)
Protected Dim Result.u(100)
FillMemory(@Result(), 200, '0', #PB_Unicode)
!mov r9, [p.a_Result]
!xor rax, rax
!xor edx, edx
!mov r8, [p.p_input]
!.loop:
!mov ecx, [r8]
!add r8, 6
!and ecx, 0xf000f
!imul ecx, 0xa0001
!shr ecx, 16
!neg rcx
!mov byte [r9 + rcx*2 + 198], '1'
!cmp [r8 - 2], dx
!jne .loop
ProcedureReturn PeekS(@result())
EndProcedure
s1.s="92 00 99 80 65 68 04 01"
s2.s="37 65 04 92 15 98 00 43 02"
Debug CreateS(@s1)
Debug CreateS(@s2)
Grep using a 128 bit structure ...
Code: Select all
Structure I128
low.q
high.q
EndStructure
Procedure CreateB_I128(*input, *result.I128)
!xor eax, eax
!mov r8, [p.p_input] ; pointer to input string
!mov r9, [p.p_result] ; pointer to i128 result
!xor r10, r10 ; result bits low [00-63]
!xor r11, r11 ; result bits high [64-99]
!.l0:
!mov ecx, [r8] ; get number
!add r8, 6
!and ecx, 0xf000f
!imul ecx, 0xa0001
!shr ecx, 16
!test ecx, 64 ; >= 64 ?
!jnz .l1
!bts r10, rcx ; set bit in low
!jmp .l2
!.l1:
!bts r11, rcx ; set bit in high
!.l2:
!cmp [r8 - 2], ax ; check for end of input string
!jne .l0 ; loop if not end of string
!mov [r9], r10 ; set result bits low [00-63]
!mov [r9 + 8], r11 ; set result bits high [64-99]
EndProcedure
Procedure.i PopCount_I128(*input.I128)
!mov r8, [p.p_input]
!popcnt rax, [r8] ; popcnt bits [00-63]
!popcnt rdx, [r8 + 8] ; popcnt bits [64-99]
!add rax, rdx ; add
ProcedureReturn
EndProcedure
Procedure.i Grep_I128(*input1.I128, *input2.I128)
!mov r8, [p.p_input1]
!mov r9, [p.p_input2]
!mov rax, [r8] ; get low bits of input1
!and rax, [r9] ; and with low bits of input2
!popcnt rax, rax ; popcnt
!mov rdx, [r8 + 8] ; get high bits of input1
!and rdx, [r9 + 8] ; and with high bits of input2
!popcnt rdx, rdx ; popcnt
!add rax, rdx ; add
ProcedureReturn
EndProcedure
s1.s="07 18 15 61 02 10 08 03 00 99 97"
s2.s="01 17 61 07 04 05 18 06 57 99"
s3.s="01 07"
CreateB_I128(@s1, @x.I128)
CreateB_I128(@s2, @y.I128)
CreateB_I128(@s3, @z.I128)
Debug "PopCount"
Debug PopCount_I128(@x)
Debug PopCount_I128(@y)
Debug PopCount_I128(@z)
Debug "Grep"
Debug Grep_I128(@x, @y)
Debug Grep_I128(@x, @z)
Debug Grep_I128(@y, @z)
Using popcnt is very useful if you convert the "01 99 50" strings once and use bits internally.
If you are always using those "01 99 50" strings as input for grep it probably is faster to do it differently.