Re: How to get the word_before and word_after a specific keyword from a string.
Posted: Fri Aug 26, 2022 5:37 pm
I like the pointors. Thank you infratec.
http://www.purebasic.com
https://www.purebasic.fr/english/
Really nice version, I prefer it by far vs RegEx version.
Code: Select all
EnableExplicit
Structure WordBeforeAfter_Structure
Before$
After$
EndStructure
Procedure.i GetWordBeforeAndAfter(String$, Keyword$, List ResultList.WordBeforeAfter_Structure())
Protected.i WordCounter, Offset
Protected Word$, LastWord$
Protected *String.Character
Protected NewList WordList$()
ClearList(ResultList())
*String = @String$
While *String\c <> 0
If (*String\c >= 'A' And *String\c <= 'Z') Or (*String\c >= '_' And *String\c <= 'z')
Word$ + Chr(*String\c)
Else
If Word$ <> ""
AddElement(WordList$())
WordList$() = Word$
WordCounter + 1
;Debug Word$
If LastWord$ = Keyword$
AddElement(ResultList())
If WordCounter - 3 >= 0
If SelectElement(WordList$(), WordCounter - 3)
ResultList()\Before$ = WordList$()
EndIf
EndIf
If LastElement(WordList$())
ResultList()\After$ = WordList$()
EndIf
EndIf
LastWord$ = Word$
Word$ = ""
EndIf
EndIf
*String + SizeOf(Character)
Wend
If Word$ <> "" Or LastWord$ = Keyword$
AddElement(WordList$())
WordList$() = Word$
WordCounter + 1
;Debug Word$
If LastWord$ = Keyword$ Or Word$ = Keyword$
AddElement(ResultList())
If Word$ <> Keyword$
Offset = 3
Else
Offset = 2
EndIf
If WordCounter - Offset >= 0
If SelectElement(WordList$(), WordCounter - Offset)
ResultList()\Before$ = WordList$()
EndIf
EndIf
If Word$ <> Keyword$
If LastElement(WordList$())
ResultList()\After$ = WordList$()
EndIf
EndIf
EndIf
EndIf
;Debug "-------"
ProcedureReturn ListSize(ResultList())
EndProcedure
NewList WordBeforeAfterList.WordBeforeAfter_Structure()
If GetWordBeforeAndAfter(" blah blah -(word_before keyword word_after!. blah blah ,word_before_ keyword word_after_. blah blah", "keyword", WordBeforeAfterList())
;If GetWordBeforeAndAfter(" blah blah -(word_before keyword word_after", "keyword", WordBeforeAfterList())
;If GetWordBeforeAndAfter("word_before keyword word_after", "keyword", WordBeforeAfterList())
;If GetWordBeforeAndAfter("keyword word_after", "keyword", WordBeforeAfterList())
;If GetWordBeforeAndAfter("keyword", "keyword", WordBeforeAfterList())
;If GetWordBeforeAndAfter("word_before keyword", "keyword", WordBeforeAfterList())
ForEach WordBeforeAfterList()
Debug WordBeforeAfterList()\Before$
Debug WordBeforeAfterList()\After$
Next
EndIf
Code: Select all
EnableExplicit
Macro AddQueue(lList, word)
FirstElement(llist)
DeleteElement(llist)
LastElement(llist)
AddElement(llist)
llist = Word
EndMacro
Procedure.i GetWordBeforeAndAfter(*String.Character, Keyword$, List wordlist.s())
Protected Word$,found,ct
ClearList(wordlist())
While *String\c <> 0
If *String\c > 32
Word$ + Chr(*String\c)
Else
If Word$ <> ""
If ct > 2
If Wordlist() = Keyword$
found =1
EndIf
AddQueue(WordList(),word$)
If found
word$=""
Break
EndIf
Else
AddElement(WordList())
WordList() = Word$
EndIf
ct+1
word$=""
EndIf
EndIf
*String + SizeOf(Character)
Wend
If Word$ <> ""
If ct > 2
AddQueue(WordList(),word$)
Else
AddElement(WordList())
WordList() = Word$
EndIf
EndIf
ProcedureReturn ListSize(wordlist())
EndProcedure
NewList WordBeforeAfterList.s()
If GetWordBeforeAndAfter(@" blah blah -(word_before keyword word_after!. blah blah ,word_before_ keyword word_after_. blah blah", "keyword", WordBeforeAfterList())
;If GetWordBeforeAndAfter(@" blah blah -(word_before keyword word_after", "keyword", WordBeforeAfterList())
;If GetWordBeforeAndAfter(@"word_before keyword word_after", "keyword", WordBeforeAfterList())
;If GetWordBeforeAndAfter(@"keyword word_after", "keyword", WordBeforeAfterList())
;If GetWordBeforeAndAfter(@"keyword", "keyword", WordBeforeAfterList())
;If GetWordBeforeAndAfter(@"word_before keyword", "keyword", WordBeforeAfterList())
ForEach WordBeforeAfterList()
Debug WordBeforeAfterList()
Next
EndIf
Code: Select all
string1.s = "blah blah blah word_before keyword word_after blah blah blah"
string2.s = " blah blah blah (word_before keyword word_after, blah blah blah "
Procedure process(text.s,keyword.s)
length=Len(text.s)
lastchr.s=" "
For stpos=1 To length ; Go through each character in the input string
chr.s=Mid(text.s,stpos,1) ; Extract each character in turn
If (chr.s <> " " Or lastchr.s <> " ") ; If character is not a space, or the previous character was not a space, include it
If chr.s <> "(" And chr.s <> ")" And chr.s <> "." And chr.s <> "," ; Exclude certain unwanted chars ( ) . ,
newstring.s=newstring.s+chr.s ; Include the character in the 'newstring.s' output
EndIf
EndIf
lastchr.s=chr.s ; Save this character for the next iteration in the loop
Next stpos
before.s = StringField(newstring.s, 1, keyword.s) ; Split the string on the keyword
countspc.i = CountString(before.s, " ") ; Count no. of spaces to obtain the last word before the keyword
before.s = StringField(before.s, countspc.i," ") ; Split the string on the last space
Debug "Input string : " + text.s ; Display the input string (before it is processed)
Debug "Word before : " + before.s ; Display the resulting word before
after.s = Trim(StringField(newstring.s, 2, keyword.s)) ; Split the string on the next space after keyword, remove trailing space
after.s = StringField(after.s, 1, " ") ; Split the string on the first space after keyword
Debug "Word after : " + after.s ; Display resulting word after
EndProcedure
process(string1.s, "keyword")
process(string2.s, "keyword")
Code: Select all
EnableExplicit
Structure WordBeforeAfter_Structure
Before$
After$
EndStructure
Procedure.i GetWordBeforeAndAfter(String$, Keyword$, List ResultList.WordBeforeAfter_Structure(), StringNoCase = #PB_String_CaseSensitive)
Protected Word$, WordCase$, KeywordCase$, LastWord$, KeyFound
Protected *String.Character, *Keyword.Character
Debug "String: " + String$
Debug "Keyword: " + Keyword$
Debug "-->"
If StringNoCase
*Keyword = @Keyword$
While *Keyword\c <> 0
If *Keyword\c <= 'Z'
KeywordCase$ + Chr(*Keyword\c +32)
Else
KeywordCase$ + Chr(*Keyword\c)
EndIf
*Keyword + SizeOf(Character)
Wend
Else
KeywordCase$ = Keyword$
EndIf
ClearList(ResultList())
*String = @String$
Repeat
If (*String\c >= 'A' And *String\c <= 'Z') Or (*String\c >= '_' And *String\c <= 'z')
Word$ + Chr(*String\c)
If StringNoCase And *String\c <= 'Z'
WordCase$ + Chr(*String\c + 32)
Else
WordCase$ + Chr(*String\c)
EndIf
Else
If Word$ <> ""
If KeyFound
ResultList()\After$ = Word$
If WordCase$ = KeywordCase$
AddElement(ResultList())
ResultList()\Before$ = LastWord$
KeyFound = #True
Else
KeyFound = #False
EndIf
Else
If WordCase$ = KeywordCase$
AddElement(ResultList())
ResultList()\Before$ = LastWord$
KeyFound = #True
EndIf
EndIf
LastWord$ = Word$
Word$ = ""
WordCase$ = ""
EndIf
EndIf
If *String\c = 0
Break
EndIf
*String + SizeOf(Character)
ForEver
ProcedureReturn ListSize(ResultList())
EndProcedure
NewList WordBeforeAfterList.WordBeforeAfter_Structure()
Define String$ = "KeyWord blah blah -(word_before keyword word_after!. blah blah ,word_before_ keyword keyword word_after_. blah blah keyword"
;Define String$ = " blah blah -(word_before keyword word_after"
;Define String$ = "word_before keyword word_after"
;Define String$ = "keyword word_after"
;Define String$ = "keyword"
;Define String$ = "word_before keyword"
Define Keyword$ = "keyword"
;Define Keyword$ = "KeyWord"
If GetWordBeforeAndAfter(String$, Keyword$, WordBeforeAfterList())
;If GetWordBeforeAndAfter(String$, Keyword$, WordBeforeAfterList(), #PB_String_NoCase)
Define NbMatch
ForEach WordBeforeAfterList()
NbMatch + 1
Debug "Word_Before " + Str(NbMatch) + ": " + WordBeforeAfterList()\Before$
Debug "Word_After " + Str(NbMatch) + ": " + WordBeforeAfterList()\After$
Next
EndIf
Code: Select all
Macro anotherMid(alpha, beta)
PeekS(alpha\wrd(3, beta), alpha\wrd(2, beta) )
EndMacro
#bpc = SizeOf(character) ; (b)ytes (p)er (c)haracter
#bpi = SizeOf(integer) ; (b)ytes (p)er (i)nteger
;longest word
;x32u; x32a; x64u; x64a
;114; 1469; ?(big); ?(big)
; (for beta reducing)
#greatestUnsignedCharacter = 1 << (8 * #bpc) - 1
#greatestSignedInteger = 1 << ((8 * #bpi) - 1) - 1
#cmLim = 1 << (8 * #bpc) - 1 ; (c)haracter (m)ask array (lim)it
#pvLim = 1 << 16 - 1 ; (p)rime (v)alue array (lim)it
Structure charMask
Array cm.a(#cmLim)
EndStructure
Structure wrd
Array wrd.i(3, 4095)
qty.i
EndStructure
Structure primeValue
Array pv.i(#pvLim)
EndStructure
Procedure cmCreate()
Define *this.charMask = AllocateMemory(SizeOf(charMask) )
InitializeStructure(*this, charMask)
ProcedureReturn *this
EndProcedure
Procedure pvCreate()
Define *this.primeValue = AllocateMemory(SizeOf(primeValue) )
Define i, j, sqrPvLim = Sqr(#pvLim)
InitializeStructure(*this, primeValue)
With *this
; *** 1/3 sieving ******************************************
i = 2
Repeat
If \pv(i) = 0
j = i * i
Repeat
\pv(j) = j
j + i
Until j > #pvLim
EndIf
i + 1
Until i > sqrPvLim
; *** 2/3 compacting ***************************************
j = 1
For i = 2 To #pvLim
If Not \pv(i)
\pv(j) = i
j + 1
EndIf
Next
j - 1
; *** 3/3 alpha reducing *****************************************
\pv(0) = j
ReDim \pv(j)
EndWith
ProcedureReturn *this
EndProcedure
Procedure hash(*a.character, *pv.primeValue)
While *a\c
i + 1
r + *pv\pv(i) * *a\c
*a + SizeOf(character)
Wend
ProcedureReturn r
EndProcedure
Procedure SplitFilterAndHash(*a.character, *cm.charMask, *pv.primeValue)
*c.wrd = AllocateMemory(SizeOf(wrd) ) ; resulting array
InitializeStructure(*c, wrd)
With *c
*a - #bpc
While *a\c
*a + #bpc
j + 1
If *cm\cm(*a\c)
If r
\wrd(0, k) = r
\wrd(2, k) = i
k + 1
i = 0
r = 0
EndIf
Else
If r = 0
\wrd(1, k) = j
\wrd(3, k) = *a
EndIf
i + 1
r + *pv\pv(i) * *a\c
EndIf
Wend
If r
\wrd(0, k) = r
\wrd(2, k) = i
EndIf
\qty = k
ProcedureReturn *c
EndWith
EndProcedure
; Here, we go !
Define *pv.primeValue = pvCreate()
Define *cm.charMask = cmCreate()
; WE EXCLUDE :
*cm\cm(9) = 1 ; tabulation char
*cm\cm(10) = 1 ; line feed char
*cm\cm(13) = 1 ; carriage return char
*cm\cm(32) = 1 ; space char
*cm\cm('(') = 1 ; 1st parenthesis char
*cm\cm('+') = 1 ; 'plus' char...
*cm\cm('e') = 1 ; and 'e' char...
*cm\cm('e') = 0 ; ...finally, nop : no 'e' char exclude...
*cm\cm('♞') = 0 ; We insure ourselves we keep the horse...
a$ = " monday (tuesday wednesday thursday+ friday"
weSearch = hash(@"wednesday", *pv)
Define *c.wrd = SplitFilterAndHash(@a$, *cm, *pv)
Debug a$
For i = 0 To *c\qty
Debug PeekS(*c\wrd(3, i), *c\wrd(2, i) )
If *c\wrd(0, i) = weSearch
Debug "before " + anotherMid(*c, i) + " there is " + anotherMid(*c, i - 1) + " and before again : " + anotherMid(*c, i - 2)
Debug "after " + anotherMid(*c, i) + " there is " + anotherMid(*c, i + 1) + " and after again : " + anotherMid(*c, i + 2)
EndIf
Next
word_before and word_after as it's written does not exist in English, I wrote that way to convey the meaning of the string.What is the best way to get the word_before and word_after a specific keyword from a string.
That meets these two conditions.
First condition.
1. string.s="blah blah blah word_before keyword word_after blah blah blah"
Second condition.
2. string.s=" blah blah blah (word_before keyword word_after, blah blah blah "
I agree.Oso wrote: Fri Aug 26, 2022 11:09 pm I'm surprised at the complexity of some of the solutions put forward. I'd be concerned if they needed to be this complex, that the future task of maintaining the code would be difficult, especially as it might be someone else.
I've got ya, my apologies for 'throwing the spanner in the works', as they sayidle wrote: Sat Aug 27, 2022 2:27 am @Oso you've just taken 2 steps back, each post was an improvement over the others, my post also simplified and improved the runtime complexity of Infratec's last code, he's a PB guru, he knows his stuff.