This one is inspired on the trie Idle posted.
It should be faster compared to the code I previously posted.
There's no limit to the amount of elements inside a string and no limit to the length of an element.
When #ASCII_RANGE_ONLY is set to #False , it can handle unicode strings as well.
Code: Select all
DisableDebugger
#TRIENODES = 8192
#LAST_SEPARATION_CHAR = 32
#ASCII_RANGE_ONLY = #True
Structure GrepTrieNode
n.l[16]
value.l
EndStructure
Structure GrepTrie
t.GrepTrieNode[0]
EndStructure
#GREP_TRIENODE_SIZE = SizeOf(GrepTrieNode)
Global *GrepTrie.GrepTrie = AllocateMemory(#TRIENODES * #GREP_TRIENODE_SIZE)
CompilerIf #PB_Compiler_Processor = #PB_Processor_x86
Macro rax : eax : EndMacro
Macro rbx : ebx : EndMacro
Macro rcx : ecx : EndMacro
Macro rdx : edx : EndMacro
Macro rsi : esi : EndMacro
Macro rdi : edi : EndMacro
CompilerEndIf
Macro M_Grep_GetNextChar()
CompilerIf #PB_Compiler_Unicode
movzx eax, word [rdx]
add rdx, 2
CompilerElse
movzx eax, byte [rdx]
add rdx, 1
CompilerEndIf
EndMacro
Macro M_Grep_String1(rolbits = 4)
!rol ax, rolbits
!mov ebx, eax
!and ebx, 111100b
!add ebx, esi
mov esi, [rcx + rbx]
!test esi, esi
!jnz grep_string1_walk#MacroExpandedCount
mov [rcx + rbx], edi
!mov esi, edi
add edi, #GREP_TRIENODE_SIZE
!grep_string1_walk#MacroExpandedCount:
EndMacro
Macro M_Grep_String2(rolbits = 4)
!rol ax, rolbits
!mov ebx, eax
!and ebx, 111100b
!add ebx, esi
mov esi, [rcx + rbx]
!test esi, esi
!jz grep_not_found
EndMacro
Procedure.i Grep(*String1.Character, *String2.Character)
EnableASM
Protected.i *t, result, reg_bx, reg_si, reg_di
Protected.l n_cnt, n_cur, n_max = MemorySize(*GrepTrie)
; ** backup registers ***
mov reg_bx, rbx
mov reg_si, rsi
mov reg_di, rdi
; ** process string1 **
mov rcx, *GrepTrie ; rcx = *GrepTrie
mov rdx, *String1 ; rdx = *String
!xor esi, esi ; esi = n_cur
mov edi, #GREP_TRIENODE_SIZE ; edi = n_cnt
!grep_string1_loop:
M_Grep_GetNextChar()
cmp eax, #LAST_SEPARATION_CHAR
!jna grep_string1_separ ; if <= #LAST_SEPARATION_CHAR, goto grep_string1_separ
lea ebx, [edi + 272] ; 272 = 4 * #GREP_TRIENODE_SIZE
cmp ebx, n_max
!ja grep_resize_trie
!grep_resize_continue:
CompilerIf #PB_Compiler_Unicode And Not #ASCII_RANGE_ONLY
M_Grep_String1(6)
M_Grep_String1()
M_Grep_String1()
CompilerElse
M_Grep_String1(14)
CompilerEndIf
M_Grep_String1()
!jmp grep_string1_loop
!grep_string1_separ: ; increase node value
inc dword [rcx + rsi + 64] ; 64 = offset of value field
!xor esi, esi
!grep1_skip_inc:
!test eax, eax ; check for zero character
!jnz grep_string1_loop
mov [rcx + 64], eax ; clear node 0 value
mov n_cnt, edi
!jmp grep_string2
; resize trie
!grep_resize_trie:
mov *String1, rdx
mov n_cur, esi
mov n_cnt, edi
mov result, rax
*t = ReAllocateMemory(*GrepTrie, n_max + #TRIENODES * #GREP_TRIENODE_SIZE)
If *t
*GrepTrie = *t
n_max + #TRIENODES * #GREP_TRIENODE_SIZE
Else
result = -1
Goto Grep_Clear
EndIf
mov rcx, *GrepTrie
mov rdx, *String1
mov esi, n_cur
mov edi, n_cnt
mov rax, result
!jmp grep_resize_continue
; ** process string2 **
!grep_string2:
mov rdx, *String2 ; rdx = *String2
!xor esi, esi ; esi = n_cur
!xor edi, edi ; edi = result
!grep_string2_loop:
M_Grep_GetNextChar()
cmp eax, #LAST_SEPARATION_CHAR
!jna grep_string2_separ ; if <= #LAST_SEPARATION_CHAR, goto grep_string2_separ
CompilerIf #PB_Compiler_Unicode And Not #ASCII_RANGE_ONLY
M_Grep_String2(6)
M_Grep_String2()
M_Grep_String2()
CompilerElse
M_Grep_String2(14)
CompilerEndIf
M_Grep_String2()
!jmp grep_string2_loop
!grep_not_found: ; if not found, skip characters
M_Grep_GetNextChar()
cmp eax, #LAST_SEPARATION_CHAR
!ja grep_not_found
!xor esi, esi
!test eax, eax ; check for zero character
!jnz grep_string2_loop
!jmp grep_string2_result
!grep_string2_separ: ; add node value to result
add edi, [rcx + rsi + 64]
!xor esi, esi
!test eax, eax ; check for zero character
!jnz grep_string2_loop
!grep_string2_result:
mov result, rdi
Grep_Clear:
FillMemory(*GrepTrie, n_cnt)
; ** restore registers ***
mov rbx, reg_bx
mov rsi, reg_si
mov rdi, reg_di
ProcedureReturn result
DisableASM
EndProcedure
Test code
Code: Select all
String1.s = "ac3 b9d45 b ks23 al97 ac5 al99 vs42159ssbpx"
String2.s = "b xl33 ac3 bxp12 ac5 ks27t2l9"
t1 = ElapsedMilliseconds()
For i = 1 To 12000000
Result = Grep(@String1, @String2)
Next
t2 = ElapsedMilliseconds()
MessageRequester("Result (" + Str(t2-t1) + " ms)", Str(Result))