Thanks for the explanation.
A quick hack of the find string to do k-mers style find it doesn't account for reverse or canonical.
output for the string "AGCTTTTCATTCTGACTGCAA" by "CT"
3 Tokens found
item, count and the positions of the items
CT 3
4: CT 22: CT 30: CT
replaced found strings with ---
AG--TTTCATT--GA--GCAA
see for the Squint module
viewtopic.php?f=12&t=75783
Code: Select all
IncludeFile "Squint2.pbi"
UseModule SQUINT
EnableExplicit
Structure Item
key.s
count.l
len.l
List positions.i()
EndStructure
Structure FindStrings
List *item.item()
EndStructure
Global FindStringsItems.FindStrings
Procedure Kmers(*squint.squint,*source,*keys,k=2,*items.FindStrings=0)
Protected *inp.Character,*inp1.Character,*node.squint_node,*sp
Protected c,count,key.s,*item.item
If *source
*inp = *source
*sp = *source
While *inp\c <> 0
c=1
*inp1 = *inp
While (*inp1\c <> 0 And c <= k)
*inp1+SizeOf(Character)
c+1
Wend
key = PeekS(*source,(*inp1-*source)/SizeOf(Character))
*node = Squintset(*squint,@key,0)
If Not *node\value
*item = AllocateStructure(item)
AddElement(*item\positions())
*item\positions() = *source - *sp
*item\count = 1
*item\key = key
*item\len = (*inp1-*source)/SizeOf(Character)
*node\value = *item
Else
*item = *node\value
AddElement(*item\positions())
*item\positions() = *source - *sp
*item\len = (*inp1-*source)/SizeOf(Character)
*item\count + 1
EndIf
If *inp1\c <> 0
*inp+2
*source = *inp
Else
Break
EndIf
Wend
EndIf
*inp = *keys
While *inp\c <> 0
c=1
*inp1 = *inp
While (*inp1\c <> 0 And c <= k)
*inp1+SizeOf(Character)
c+1
Wend
key = PeekS(*keys,(*inp1-*keys)/SizeOf(Character))
*item = Squintget(*squint,@key)
If *item
count + *item\count
If *items
If *item\count >= 1
AddElement(*items\item())
*items\item() = *item
EndIf
EndIf
EndIf
If *inp1\c <> 0
*inp+2
*keys = *inp
Else
Break
EndIf
Wend
ProcedureReturn count
EndProcedure
Procedure cbFindStringsFree(*key,*value,*Data)
FreeStructure(*value)
EndProcedure
Procedure cbFindStringsEnum(*key,*value,*items.FindStrings)
AddElement(*items\item())
*items\item() = *value
Debug PeekS(*key,-1,#PB_UTF8)
EndProcedure
Procedure FindStringsFree(*squint.squint)
SquintWalk(*squint,@cbFindStringsFree())
SquintFree(*squint)
EndProcedure
Procedure FindStringsEnum(*mt.squint,key.s,*items.FindStrings)
ClearList(*items\item())
SquintEnum(*mt,@key,@cbFindStringsEnum(),*items)
EndProcedure
Global String3.s = "AGCTTTTCATTCTGACTGCAA"
Global String4.s = "CT"
Global out.s
Global FindStringsItems.FindStrings
Global *squint.squint = SquintNew()
Global Replace.s = "-----------------------------------------------------------"
Debug Str(Kmers(*squint,@String3,@String4,2,@FindStringsItems)) + " k-mers found"
Debug "item, count and it position and the item "
ForEach FindStringsItems\item()
out=""
Debug FindStringsItems\item()\key + " " + Str(FindStringsItems\item()\count)
ForEach FindStringsItems\item()\positions()
out + Str(FindStringsItems\item()\positions()) + ": " + PeekS(@string3 + FindStringsItems\item()\positions(),FindStringsItems\item()\len) + " "
CopyMemory(@Replace,@string3+FindStringsItems\item()\positions(),FindStringsItems\item()\len*SizeOf(Character)) ;replace found items
Next
Debug out
Next
Debug "replaced found strings with --"
Debug string3
Debug "Enum from C in callback"
FindStringsEnum(*squint,"C",@FindStringsItems)
Debug "foreach after Enum"
ForEach FindStringsItems\item()
Debug FindStringsItems\item()\key
Next
FindStringsFree(*squint)