phonetische Textsuche mit DoubleMetaphone
Verfasst: 25.01.2005 22:52
zu verwenden als Grundlage für Rechtschreibprüfung oder
einfach als flexiblere Suchfunktion da fehlertolerant.
fast ein Bisschen zu lang zum posten. Aber ne Seite aufzumachen
ist noch mehr Arbeit.
Denke das kann der Eine oder Andere textver- bearbeitende weiterverwenden.
_________
schic
einfach als flexiblere Suchfunktion da fehlertolerant.
Code: Alles auswählen
;
; phonetic text search with the DoubleMetaphone code from Lawrence Philips
; coded in PureBasic from SchiC
;
; best viewed with jaPBe
Structure DblMet
i.s
II.s
n.l
EndStructure
Structure WordInTxt
strTxt.s
Pos.l
EndStructure
Procedure getTPLMTFN(txt$, StartPos.l, numChars.l)
; Source : DoubleMetaphone - A phonetic search algorithm, much better than Soundex()
; : Converted from Lawrence Philips; CUJ June 2000 C++ "Double Metaphone" code.
; Version : 1 Beta
; Author(s) : Lawrence Philips for the "Original" C++ code, and
; : SchiC for the PureBasic conversion of the C++ code.
; --------------------------------------------------------------------
; References : Philips, Lawrence. C/C++ Users Journal (CUJ), June, 2000
; :
; Note : Reduces alphabet to the 14 consonant sounds:
; : X S K J T F H L M N P R 0 W
; : drop vowels except at the beginning
; --------------------------------------------------------------------
;
; Useful links:
; The original C++ double Metaphone algorithm from Lawrence Philips:
; ftp://ftp.cuj.com/pub/2000/1806/philips.zip
;
; as reference for testing results look at
; http://swoodbridge.com/DoubleMetaPhone/mptest.php3
Shared RetTxt.DblMet
#vowels = "AEIOUY"
Static slavogermanic.b
RetTxt\i = ""
RetTxt\II = ""
current = StartPos
Length = numChars;Len(txt$)
last = Length-1;Len(txt$) - 1
txt$=UCase(txt$)
If StartPos = 1
;skip these when at start of word
If FindString("GN KN PN WR PS",Left(txt$,2),1) ;skip these when at start of word
current + 1
ElseIf Left(txt$,1)="X" ;Initial "X" is pronounced "Z" e.g. "Xavier"
RetTxt\i = RetTxt\i + "S"
RetTxt\II = RetTxt\II + "S"
current + 1
ElseIf FindString(#vowels,Left(txt$,1),1) ;all init vowels now map to "A"
RetTxt\i = RetTxt\i + "A"
RetTxt\II = RetTxt\II + "A"
current + 1
ElseIf Left(txt$,6)="CAESAR" ;special case "caesar"
RetTxt\i = RetTxt\i + "S"
RetTxt\II = RetTxt\II + "S"
current + 2
EndIf
slavogermanic = #False
If FindString(txt$,"W",1) Or FindString(txt$,"K",1) Or FindString(txt$,"CZ",1) Or FindString(txt$,"WITZ",1)
slavogermanic = #True
EndIf
EndIf
txt$ = Space(6) + txt$ + Space(6)
current + 6
Length = current + numChars-1
last = 7 + last
While current <= Length;Len(RetTxt\I) < 4 Or Len(RetTxt\II) < 4
If Len(RetTxt\i) >= 4: Break: EndIf
strcur.s = Mid(txt$,current,1)
strprv1.s = Mid(txt$,current-1,1)
strprv2.s = Mid(txt$,current-2,1)
strnxt1.s = Mid(txt$,current+1,1)
strnxt2.s = Mid(txt$,current+2,1)
Select strcur
Case "B";-B
RetTxt\i = RetTxt\i + "P"
RetTxt\II = RetTxt\II + "P"
If strnxt1=strcur
current + 2
Else
current + 1
EndIf
Case "Ç";-Ç
RetTxt\i = RetTxt\i + "S"
RetTxt\II = RetTxt\II + "S"
If strnxt1=strcur
current + 2
Else
current + 1
EndIf
Case "C";-C
;various germanic
If current > 8 And FindString(#vowels,strprv2,1)=0 And Mid(txt$,current-1,3)="ACH" And FindString("EI",strnxt2,1)=0 Or FindString("BACHER MACHER,",Mid(txt$,current-2,6),1)
RetTxt\i = RetTxt\i + "K"
RetTxt\II = RetTxt\II + "K"
current + 2
ElseIf Mid(txt$,current,4)="CHIA" ;italian "chianti"
RetTxt\i = RetTxt\i + "K"
RetTxt\II = RetTxt\II + "K"
current + 2
ElseIf Mid(txt$,current,2)="CH"
If current > 7 And Mid(txt$,current,4)="CHAE"
;find "michael"
RetTxt\i = RetTxt\i + "K"
RetTxt\II = RetTxt\II + "X"
current + 2
ElseIf current = 7
;greek roots e.g. "chemistry", "chorus"
If (FindString("HARAC HARIS ",Mid(txt$,current+1,5),1) Or FindString("HOR HYM HIA HEM",Mid(txt$,current+1,3),1)) And Mid(txt$,7,5) <> "CHORE"
RetTxt\i = RetTxt\i + "K"
RetTxt\II = RetTxt\II + "K"
Else
RetTxt\i = RetTxt\i + "X"
RetTxt\II = RetTxt\II + "X"
EndIf
current + 2
;germanic, greek, or otherwise "ch" for "kh" sound
;"architect but not "arch", "orchestra", "orchid"
;e.g., "wachtler", "wechsler", but not "tichner"
ElseIf ((FindString("VAN VON ",Mid(txt$,7,4),1) Or Mid(txt$,7,3)="SCH") Or FindString("ORCHES ARCHIT, ORCHID",Mid(txt$,current-2,6),1) Or FindString("TS",strnxt2,1) Or ((FindString("AOUE",strprv1,1) Or current=7) And FindString("LRNMBHFVW ",strnxt2,1)))
RetTxt\i = RetTxt\i + "K"
RetTxt\II = RetTxt\II + "K"
current + 2
ElseIf current > 7
If Mid(txt$,7,2)="MC" ;e.g., "McHugh"
RetTxt\i = RetTxt\i + "K"
RetTxt\II = RetTxt\II + "K"
Else
RetTxt\i = RetTxt\i + "X"
RetTxt\II = RetTxt\II + "K"
EndIf
current + 2
EndIf
;End "CH"
ElseIf Mid(txt$,current,2)="CZ" And Mid(txt$,current-2,4)<>"WICZ" ;e.g, "czerny"
RetTxt\i = RetTxt\i + "S"
RetTxt\II = RetTxt\II + "X"
current + 2
ElseIf Mid(txt$,current+1,3)="CIA" ;e.g., "focaccia"
RetTxt\i = RetTxt\i + "X"
RetTxt\II = RetTxt\II + "X"
current + 3
;double "C", but not if e.g. "McClellan"
ElseIf Mid(txt$,current,2)="CC" And Mid(txt$,7,3)<>"MCC"
;"bellocchio" but not "bacchus"
If FindString("IEH",strnxt2,1) And Mid(txt$,current+2,2)<>"HU"
;"accident", "accede" "succeed"
If (current = 8 And strprv1="A") Or FindString("UCCEE UCCES",Mid(txt$,current-1,5),1)
RetTxt\i = RetTxt\i + "KS"
RetTxt\II = RetTxt\II + "KS"
Else ;"bacci", "bertucci", other italian
RetTxt\i = RetTxt\i + "X"
RetTxt\II = RetTxt\II + "X"
EndIf
current + 3
Else ;Pierce"s rule
RetTxt\i = RetTxt\i + "K"
RetTxt\II = RetTxt\II + "K"
current + 2
EndIf
ElseIf FindString("CK CG CQ",Mid(txt$,current,2),1)
RetTxt\i = RetTxt\i + "K"
RetTxt\II = RetTxt\II + "K"
current + 2
ElseIf FindString("CI CE CY",Mid(txt$,current,2),1)
;italian vs. english
If FindString("CIO CIE CIA,",Mid(txt$,current,3),1)
RetTxt\i = RetTxt\i + "S"
RetTxt\II = RetTxt\II + "X"
Else
RetTxt\i = RetTxt\i + "S"
RetTxt\II = RetTxt\II + "S"
EndIf
current + 2
Else
RetTxt\i = RetTxt\i + "K"
RetTxt\II = RetTxt\II + "K"
;name sent in "mac caffrey", "mac gregor"
If FindString(" C Q G",Mid(txt$,current+1,2),1)
current + 3
ElseIf FindString("CKQ",strnxt1,1) And FindString("CE CI",strnxt1+strnxt2,1)=0
current + 2
Else
current + 1
EndIf
EndIf
Case "D";-D
If Mid(txt$,current,2)="DG"
If FindString("IEY",strnxt2,1)
RetTxt\i = RetTxt\i + "J"
RetTxt\II = RetTxt\II + "J"
current + 3
Else
RetTxt\i = RetTxt\i + "TK"
RetTxt\II = RetTxt\II + "TK"
current + 2
EndIf
ElseIf FindString("DT DD",Mid(txt$,current,2),1)
RetTxt\i = RetTxt\i + "T"
RetTxt\II = RetTxt\II + "T"
current + 2
Else
RetTxt\i = RetTxt\i + "T"
RetTxt\II = RetTxt\II + "T"
current + 1
EndIf
Case "F";-F
RetTxt\i = RetTxt\i + strcur
RetTxt\II = RetTxt\II + strcur
If strnxt1=strcur
current + 2
Else
current + 1
EndIf
Case "G";-G
If strnxt1 = "H"
If current > 7 And FindString(#vowels,strprv1,1) = 0
RetTxt\i = RetTxt\i + "K"
RetTxt\II = RetTxt\II + "K"
current + 2
ElseIf current = 7 ;"ghislane", ghiradelli
If strnxt2 = "I"
RetTxt\i = RetTxt\i + "J"
RetTxt\II = RetTxt\II + "J"
Else
RetTxt\i = RetTxt\i + "K"
RetTxt\II = RetTxt\II + "K"
EndIf
current + 2
;Parker"s rule (with some further refinements) - e.g., "hugh"
;e.g., "bough"
ElseIf (((current > 8) And FindString("BHD",strprv2,1)) Or (current > 2 And FindString("BHD",Mid(txt$,current-3,1),1)) Or (current > 3 And FindString("BH",Mid(txt$,current-4,1),1)))
current + 2
Else ;e.g., "laugh", "McLaughlin", "cough", "gough", "rough", "tough"
If current > 9 And strprv1 = "U" And FindString("CGLRT",Mid(txt$,current-3,1),1)
RetTxt\i = RetTxt\i + "F"
RetTxt\II = RetTxt\II + "F"
Else
If current > 7 And strprv1 <> "I"
RetTxt\i = RetTxt\i + "K"
RetTxt\II = RetTxt\II + "K"
EndIf
EndIf
current + 2
EndIf
ElseIf strnxt1 = "N"
If current = 8 And FindString(#vowels,Mid(txt$,7,1),1) And slavogermanic = #False
RetTxt\i = RetTxt\i + "KL"
RetTxt\II = RetTxt\II + "L"
current + 2
Else ;not e.g. "cagney"
If Mid(txt$,current + 2,2) <> "EY" And strnxt1 <> "Y" And slavogermanic = #False
RetTxt\i = RetTxt\i + "N"
RetTxt\II = RetTxt\II + "KN"
Else
RetTxt\i = RetTxt\i + "KN"
RetTxt\II = RetTxt\II + "KN"
EndIf
current + 2
EndIf
ElseIf strnxt1+strnxt2="LI" And slavogermanic = #False ;"tagliaro"
RetTxt\i = RetTxt\i + "KL"
RetTxt\II = RetTxt\II + "L"
current + 2
; -ges-,-gep-,-gel-, -gie- at beginning
ElseIf current = 7 And (strnxt1 = "Y" Or FindString("ES EP EB EL EY IB IL IN IE EI ER",Mid(txt$,current+1,2),1))
RetTxt\i = RetTxt\i + "K"
RetTxt\II = RetTxt\II + "J"
current + 2
; -ger-, -gy-
ElseIf (strnxt1+strnxt2="ER" Or strnxt1="Y") And FindString("DANGER RANGER MANGER",Mid(txt$,7,6),1)=0 And FindString("EI",strprv1,1)=0 And FindString("RGY OGY",Mid(txt$,current-1,3),1)=0
RetTxt\i = RetTxt\i + "K"
RetTxt\II = RetTxt\II + "J"
current + 2
; italian e.g, "biaggi"
ElseIf FindString("EIY",strnxt1,1) Or FindString("AGGI OGGI",Mid(txt$,current-1,4),1)
;obvious germanic
If (FindString("VAN VON ",Mid(txt$,7,4),1) Or Mid(txt$,7,3)="SCH" ) Or strnxt1+strnxt2="ET"
RetTxt\i = RetTxt\i + "K"
RetTxt\II = RetTxt\II + "K"
ElseIf Mid(txt$,current + 1,4)="IER " ;always soft if french ending
RetTxt\i = RetTxt\i + "J"
RetTxt\II = RetTxt\II + "J"
Else
RetTxt\i = RetTxt\i + "J"
RetTxt\II = RetTxt\II + "K"
EndIf
current + 2
ElseIf strnxt1="G"
current + 2
RetTxt\i = RetTxt\i + "K"
RetTxt\II = RetTxt\II + "K"
Else
current + 1
RetTxt\i = RetTxt\i + "K"
RetTxt\II = RetTxt\II + "K"
EndIf
Case "H";-H
If (current = 7 Or FindString(#vowels, strprv1, 1)) And FindString(#vowels, strnxt1, 1)
current + 2
RetTxt\i = RetTxt\i + "H"
RetTxt\II = RetTxt\II + "H"
Else
current + 1
EndIf
Case "J";-J
;obvious spanish, "jose", "san jacinto"
If Mid(txt$,current,4)="JOSE" Or Mid(txt$,7,4)="SAN "
If (current = 7 And Mid(txt$,current+4,1)=" ") Or Mid(txt$,7,4)="SAN "
RetTxt\i = RetTxt\i + "H"
RetTxt\II = RetTxt\II + "H"
Else
RetTxt\i = RetTxt\i + "J"
RetTxt\II = RetTxt\II + "H"
EndIf
current + 1
ElseIf current = 7 ;Yankelovich/Jankelowicz
RetTxt\i = RetTxt\i + "J"
RetTxt\II = RetTxt\II + "A"
current + 1
;spanish pron. of e.g. "bajador"
ElseIf FindString(#vowels,strprv1,1) And slavogermanic = #False And (strnxt1="A" Or strnxt1="O")
RetTxt\i = RetTxt\i + "J"
RetTxt\II = RetTxt\II + "H"
current + 1
ElseIf current = last
RetTxt\i = RetTxt\i + "J"
RetTxt\II = RetTxt\II + " "
current + 1
ElseIf FindString("LTKSNMBZ",strnxt1,1)=0 And FindString("SKL",strprv1,1)=0
RetTxt\i = RetTxt\i + "J"
RetTxt\II = RetTxt\II + "J"
current + 1
ElseIf strnxt1 = "J"
current + 2
Else
current + 1
EndIf
Case "K";-K
RetTxt\i = RetTxt\i + strcur
RetTxt\II = RetTxt\II + strcur
If strnxt1=strcur
current + 2
Else
current + 1
EndIf
Case "L";-L
If strnxt1 = "L"
;spanish e.g. "cabrillo", "gallegos"
If (current = (Length-2) And FindString("ILLO ILLA ALLE,",Mid(txt$,current-1,4),1)) Or ((FindString("AS OS,",Mid(txt$,last-1,2),1) Or FindString("A O",Mid(txt$,last,1),1)) And Mid(txt$,current-1,4)="ALLE")
RetTxt\i = RetTxt\i + "L" ;alternate is silent
Else
RetTxt\i = RetTxt\i + "L"
RetTxt\II = RetTxt\II + "L"
EndIf
current + 2
Else
current + 1
RetTxt\i = RetTxt\i + "L"
RetTxt\II = RetTxt\II + "L"
EndIf
Case "M";-M
RetTxt\i = RetTxt\i + strcur
RetTxt\II = RetTxt\II + strcur
;"dumb", "thumb"
If (Mid(txt$,current-1,3)="UMB" And (current+1=last Or Mid(txt$,current+2,2)="ER")) Or strnxt1 = "M"
current + 2
Else
current + 1
EndIf
Case "N";-N
RetTxt\i = RetTxt\i + strcur
RetTxt\II = RetTxt\II + strcur
If strnxt1=strcur Or strnxt1="Ñ"
current + 2
Else
current + 1
EndIf
Case "Ñ";-Ñ
RetTxt\i = RetTxt\i + "N"
RetTxt\II = RetTxt\II + "N"
If strnxt1="N" Or strnxt1=strcur
current + 2
Else
current + 1
EndIf
Case "P";-P
If strnxt1 = "H"
current = current + 2
RetTxt\i = RetTxt\i + "F"
RetTxt\II = RetTxt\II + "F"
ElseIf strnxt1="P" Or strnxt1="B"
; also account FOR "campbell" AND "raspberry"
current + 2
RetTxt\i = RetTxt\i + strcur
RetTxt\II = RetTxt\II + strcur
Else
current + 1
RetTxt\i = RetTxt\i + strcur
RetTxt\II = RetTxt\II + strcur
EndIf
Case "Q";-Q
RetTxt\i = RetTxt\i + "K"
RetTxt\II = RetTxt\II + "K"
If strnxt1=strcur
current + 2
Else
current + 1
EndIf
Case "R";-R
;french e.g. "rogier", but exclude "hochmeier"
If current = last And slavogermanic = 0 And Mid(txt$,current-2, 2)="IE" And FindString("ME MA",Mid(txt$,current-4,2),1)=0
RetTxt\II = RetTxt\II + strcur
Else
RetTxt\i = RetTxt\i + strcur
RetTxt\II = RetTxt\II + strcur
EndIf
If strnxt1=strcur
current + 2
Else
current + 1
EndIf
Case "S";-S
;special cases "island", "isle", "carlisle", "carlysle"
If FindString("ISL YSL",Mid(txt$, current-1, 3),1)
current + 1
ElseIf Mid(txt$, current, 2) = "SH"
;germanic
If FindString("HEIM HOEK HOLM Holz",Mid(txt$, current + 1,4),1)
RetTxt\i = RetTxt\i + strcur
RetTxt\II = RetTxt\II + strcur
Else
RetTxt\i = RetTxt\i + "X"
RetTxt\II = RetTxt\II + "X"
EndIf
current + 2
Else
; italian & armenian
If FindString("SIO SIA",Mid(txt$, current,3),1) Or Mid(txt$, current,4)="SIAN"
If slavogermanic = 0
RetTxt\i = RetTxt\i + "S"
RetTxt\II = RetTxt\II + "X"
Else
RetTxt\i = RetTxt\i + "S"
RetTxt\II = RetTxt\II + "S"
EndIf
current + 3
Else
; german & anglicisations, e.g. "smith" match "schmidt", "snider" match "schneider"
; also, -sz- in slavic language altho in hungarian it is pronounced "s"
If (current = 7 And FindString("M N L W", strnxt1,1)) Or strnxt1 ="Z"
RetTxt\i = RetTxt\i + "S"
RetTxt\II = RetTxt\II + "X"
If strnxt1 ="Z"
current + 2
Else
current + 1
EndIf
Else
If Mid(txt$, current, 2) = "SC"
; Schlesinger"s RULE
If strnxt2 = "H"
If FindString("OO ER EN UY ED EM", Mid(txt$, current+3, 2),1); dutch origin, e.g. "school", "schooner"
If FindString("ER EN", Mid(txt$, current+3, 2),1);"schermerhorn", "schenker"
RetTxt\i = RetTxt\i + "X"
RetTxt\II = RetTxt\II + "SK"
Else
RetTxt\i = RetTxt\i + "SK"
RetTxt\II = RetTxt\II + "SK"
EndIf
current + 3
Else
If current = 7 And FindString(#vowels, Mid(txt$, 10, 1),1) = 0 And Mid(txt$, current+3, 1) <> "W"
RetTxt\i = RetTxt\i + "X"
RetTxt\II = RetTxt\II + "S"
Else
RetTxt\i = RetTxt\i + "X"
RetTxt\II = RetTxt\II + "X"
EndIf
current + 3
EndIf
Else
If FindString("I E Y", strnxt2,1)
RetTxt\i = RetTxt\i + "S"
RetTxt\II = RetTxt\II + "S"
Else
RetTxt\i = RetTxt\i + "SK"
RetTxt\II = RetTxt\II + "SK"
EndIf
current + 3
EndIf
Else
If current = 7 And Mid(txt$, current, 5) = "SUGAR"; special CASE "sugar-"
RetTxt\i = RetTxt\i + "X"
RetTxt\II = RetTxt\II + "S"
current + 1
Else
; french e.g. "resnais", "artois"
If current = last And FindString("AI OI", Mid(txt$, current-2, 2),1)
RetTxt\II = RetTxt\II + "S"
Else
RetTxt\i = RetTxt\i + "S"
RetTxt\II = RetTxt\II + "S"
EndIf
If FindString("S Z", strnxt1, 1)
current + 2
Else
current + 1
EndIf
EndIf
EndIf
EndIf
EndIf
EndIf
Case "T";-T
If Mid(txt$, current, 4) = "TION"
RetTxt\i = RetTxt\i + "X"
RetTxt\II = RetTxt\II + "X"
current + 3
ElseIf FindString("TIA TCH", Mid(txt$, current, 3),1)
RetTxt\i = RetTxt\i + "X"
RetTxt\II = RetTxt\II + "X"
current + 3
ElseIf Mid(txt$, current, 2) = "TH" Or Mid(txt$, current, 3) = "TTH"
; special CASE "thomas", "thames" or germanic
If FindString("OM AM", Mid(txt$, current+2, 2),1) Or FindString("VAN VON", Mid(txt$, 7, 4),1) Or Mid(txt$, 7, 4) = "SCH"
RetTxt\i = RetTxt\i + "T"
RetTxt\II = RetTxt\II + "T"
Else
RetTxt\i = RetTxt\i + "O"
RetTxt\II = RetTxt\II + "T"
EndIf
current + 2
ElseIf FindString("T D", strnxt1,1)
RetTxt\i = RetTxt\i + "T"
RetTxt\II = RetTxt\II + "T"
current + 2
Else
RetTxt\i = RetTxt\i + "T"
RetTxt\II = RetTxt\II + "T"
current + 1
EndIf
Case "V";-V
If strnxt1=strcur
current + 2
Else
current + 1
RetTxt\i = RetTxt\i + "F"
RetTxt\II = RetTxt\II + "F"
EndIf
Case "W";-W
; can also be IN middle OF word
If Mid(txt$, current, 2) = "WR"
RetTxt\i = RetTxt\i + "R"
RetTxt\II = RetTxt\II + "R"
current + 2
Else
If current = 7 And (FindString(#vowels, strnxt1, 1) Or Mid(txt$, current, 2) = "WH")
If FindString(#vowels, strnxt1, 1); Wasserman should match Vasserman
RetTxt\i = RetTxt\i + "A"
RetTxt\II = RetTxt\II + "F"
current + 1
Else
RetTxt\i = RetTxt\i + "A"; need Uomo TO match Womo
RetTxt\II = RetTxt\II + "A"
current + 1
EndIf
Else
; Arnow should match Arnoff
If (current = last And FindString(#vowels, strprv1, 1)) Or FindString("EWSKI EWSKY OWSKI OWSKY", Mid(txt$, current-1, 5),1) Or Mid(txt$, 7, 3) = "SCH"
RetTxt\II = RetTxt\II + "F"
current + 1
Else
; polish e.g. "filipowicz"
If FindString("WICZ WITZ", Mid(txt$, current, 4),1)
RetTxt\i = RetTxt\i + "TS"
RetTxt\II = RetTxt\II + "FX"
current + 4
Else
current + 1 ;else skip it
EndIf
EndIf
EndIf
EndIf
Case "X";-X
; french e.g. breaux
If (current <> last And (FindString("WIAU EAU", Mid(txt$, current-3, 3),1)=0 Or FindString("AU OU", Mid(txt$, current-2, 3),1)=0))
RetTxt\i = RetTxt\i + "KS"
RetTxt\II = RetTxt\II + "KS"
EndIf
If FindString("C X", strnxt1,1)
current + 2
Else
current + 1
EndIf
Case "Z";-Z
If strnxt1 = strcur
RetTxt\i = RetTxt\i + "S"
RetTxt\II = RetTxt\II + "S"
current + 2
Else
If strnxt1 = "H"; chinese pinyin e.g. "zhao"
RetTxt\i = RetTxt\i + "J"
RetTxt\II = RetTxt\II + "J"
current + 2
Else
If FindString("ZO ZI ZA", Mid(txt$, current+1, 2),1) Or (slavogermanic = 1 And (current > 7 And strprv1 <> "T"))
RetTxt\i = RetTxt\i + "S"
RetTxt\II = RetTxt\II + "TS"
Else
RetTxt\i = RetTxt\i + "S"
RetTxt\II = RetTxt\II + "S"
EndIf
EndIf
current + 1
EndIf
Default
If strnxt1=strcur
current + 2
Else
current + 1
EndIf
EndSelect
Wend
RetTxt\i = Left(RetTxt\i,4)
RetTxt\II= Left(RetTxt\II,4)
RetTxt\n = current - 6
ProcedureReturn @RetTxt
EndProcedure
Procedure.l MemCharPos(*Source, strChar.s, StartPos)
; ASM-code to find one byte (character) in memory
; end of memoryblock has to be null
Result.l
MOV Ebx,strChar ; Ebx = Pointer to Char
MOV Ecx,*Source ; Ecx = Pointer to akt. Char in source
DEC StartPos ; StartPos - 1
ADD Ecx,StartPos; set source-pointer to startposition
! rpt_Src: ; startpoint for loop scanning through the source
MOV al,[Ecx]
INC Ecx ; Ecx + 1
CMP al,0 ; if null (end of source-string)
JZ endProc ; -> end Procedure, Result=0
CMP byte[Ebx],al; if found Char
JE gotit ; -> got it
JMP rpt_Src ; else go on with next (endpoint for loop scanning through the source)
! gotit: ; Result = Ecx - Source, to get the place in the source-string not the memory-address
SUB Ecx,*Source ; position of found character minus startpoint of memoryblock
MOV Result,Ecx ; copy result of subtraction to Result
! endProc:
ProcedureReturn Result
EndProcedure
;-wrapping procedures for the DoubleMetaphone proc
Procedure.b PhoneticMatch(word1.s, word2.s)
; to compare word by word
*str2Str.DblMet
strErgebnis.s
*str2Str = getTPLMTFN(word1,1,Len(word1))
strErgebnis = *str2Str\i + " " + *str2Str\II
*str2Str = getTPLMTFN(word2,1,Len(word2))
If FindString(strErgebnis, *str2Str\i, 1) Or FindString(strErgebnis, *str2Str\II, 1)
ProcedureReturn #True
Else
ProcedureReturn #False
EndIf
EndProcedure
Procedure PhoneticSearch(*scannedtxt, word2find.s)
; does split a text in memory to single words
; and comparing each phonetic charcter of this
; to the corresponding character in the phonetic
; code of the searched word.
; Words are skipped after the first character that
; does not match. For longer words this is faster
; than calculating the whole phonetic code of each
; word, cause most words do not match at the first
; two characters.
*str2Str.DblMet
PhonStr.DblMet
strResult.s
Shared Word.WordInTxt
match.b = #False
; get the phonetic code of word2find
*str2Str = getTPLMTFN(word2find, 1, Len(word2find))
If Len(*str2Str\i) < Len(*str2Str\II)
PhonStr\i = *str2Str\II
PhonStr\II = *str2Str\i
Else
PhonStr\i = *str2Str\i
PhonStr\II = *str2Str\II
EndIf
strResult = PhonStr\i + " " + PhonStr\II
LenErgebnis = Len(PhonStr\i)
WordStart = 1
WordEnd = MemCharPos(*scannedtxt, " ", WordStart)
While WordEnd And match = #False ; loop as long a space (end of word)
; or word2search is found
WordLen=WordEnd-WordStart
Word\strTxt = PeekS(*scannedtxt+WordStart-1, WordLen)
PosInWord = 1
PosInPhonWord = 1
Word1$=""
Word2$=""
Repeat ; if one character does not match, leave this loop at once.
*str2Str = getTPLMTFN(Word\strTxt, PosInWord, 1); get the phonetic code of 1 char
Word1$ + *str2Str\i
Word2$ + *str2Str\II
If Len(*str2Str\i) > Len(*str2Str\II)
PosInPhonWord + Len(*str2Str\i)
Else
PosInPhonWord + Len(*str2Str\II)
EndIf
If *str2Str\i <> "" ; phonetic code is as before
strResult=Left(PhonStr\i, PosInPhonWord-1) + " " + Left(PhonStr\II, PosInPhonWord-1)
If FindString(strResult, Word1$, 1) Or FindString(strResult, Word2$, 1)
match = #True
If PosInWord = 1: Word\Pos=WordStart: EndIf; remember the beginning of word
Else
match = #False
Word\Pos=0
EndIf
EndIf
PosInWord = *str2Str\n
Until match=#False Or PosInPhonWord > LenErgebnis Or PosInWord > WordLen
; set match back to #False if the phonetic code from text-word is shorter than from word to find
If PosInWord > WordLen And PosInPhonWord < LenErgebnis: match=#False: EndIf
; set position of next word
WordStart = WordEnd + 1
WordEnd = MemCharPos(*scannedtxt, " ", WordStart)
Wend
ProcedureReturn @Word
EndProcedure
;-Beispiele
;{-Beispiel phonetischer Code eines Wortes
*str2Str.DblMet
strResult.s
strTxt$="kurz"
*str2Str = getTPLMTFN(strTxt$, 1, Len(strTxt$))
Debug "der phonetische Code für " + strTxt$ + " ist " + *str2Str\i + " oder auch " + *str2Str\II
;}
;{-Beispiel Textsuche
*WordStruct.WordInTxt
scannedTxt.s = "das ist ein ziemlich kurtzer Text, mit nur 77 Zeichen, der zu durchsuchen ist"
;123456789 123456789 123456789 123456789 123456789 123456789 123456789 1234567
Debug "zu durchsuchender Text: "
Debug "'" + scannedTxt + "'"
*WordStruct = PhoneticSearch(@scannedTxt, strTxt$)
Debug "'" + strTxt$ + "' gefunden an Position " + Str(*WordStruct\Pos) + " als '" + *WordStruct\strTxt + "'"
strTxt$="ziehmlich"
*WordStruct = PhoneticSearch(@scannedTxt, strTxt$)
Debug "'" + strTxt$ + "' gefunden an Position " + Str(*WordStruct\Pos) + " als '" + *WordStruct\strTxt + "'"
;-a search examples in english
;text from http://www.mrcranky.com/movies/spiceworld/191/11.html :-))
scannedTxt.s = "I LUV THE SPICE GRLS... Tehy RoK!!! YOU SUCK!!!! YOU GOT NO TALINT AND Thay cin do it way better then you can. Your JUST JELOUS... becuz there the BESTEST PEOPLE IN THE WHOLE WIDE WORLD"
Debug "text to be scanned: "
Debug "'" + scannedTxt + "'"
strTxt$="love"
*WordStruct = PhoneticSearch(@scannedTxt, strTxt$)
Debug "'" + strTxt$ + "' found at position " + Str(*WordStruct\Pos) + " as '" + *WordStruct\strTxt + "'"
strTxt$="because"
*WordStruct = PhoneticSearch(@scannedTxt, strTxt$)
Debug "'" + strTxt$ + "' found at position " + Str(*WordStruct\Pos) + " as '" + *WordStruct\strTxt + "'"
strTxt$="they"
*WordStruct = PhoneticSearch(@scannedTxt, strTxt$)
Debug "'" + strTxt$ + "' found at position " + Str(*WordStruct\Pos) + " as '" + *WordStruct\strTxt + "'"
Debug ":-("
;}
;{-Beispiel Textvergleich
txt1.s = "abbrechen"
txt2.s = "Abruch"
If PhoneticMatch(txt1, txt2)
Debug "'" + txt1 + "'" + " entspricht phonetisch " + "'" + txt2 + "'"
Else
Debug "'" + txt1 + "'" + " entspricht nicht " + "'" + txt2 + "'"
EndIf
;-example english/german spelling
txt1.s = "english"
txt2.s = "englisch"
If PhoneticMatch(txt1, txt2)
Debug "'" + txt1 + "'" + " does match phonetic with " + "'" + txt2 + "'"
Else
Debug "'" + txt1 + "'" + " does not match phonetic with " + "'" + txt2 + "'"
EndIf
;}
End
ist noch mehr Arbeit.
Denke das kann der Eine oder Andere textver- bearbeitende weiterverwenden.
_________
schic