thanks!
https://drive.google.com/file/d/1sLYzrA ... sp=sharingline: filetext$ = PeekS(*UnpackID, Result, #PB_UTF8)
Code: Select all
; code 1
#FixedRegularStr = "^\s*SIGNED\s*$"
#KeyWordsNotFound = "keywords not found"
Global resultstr.s = ""
Procedure.s checkstring(sstr.s , regularstr.s = #FixedRegularStr)
Protected str1.s = #KeyWordsNotFound
Protected str2.s = ""
Protected stemp.s = ""
Protected i.i , j.i = CountString(sstr,Chr(13))
If j > 0
For i = 1 To j
stemp = StringField(sstr,i,Chr(13))
If Len(stemp) = 0
Continue
Else
If CreateRegularExpression(0, regularstr)
If ExamineRegularExpression(0, stemp)
While NextRegularExpressionMatch(0)
str2 = RegularExpressionMatchString(0)
Break
Wend
EndIf
EndIf
Break
EndIf
Next
Else
If CreateRegularExpression(0, regularstr)
If ExamineRegularExpression(0, sstr)
While NextRegularExpressionMatch(0)
str2 = RegularExpressionMatchString(0)
Break
Wend
EndIf
EndIf
EndIf
If str2 <> "" ; If the match is successful, str2 is not empty
str1 = str2
EndIf
ProcedureReturn str1
EndProcedure
Procedure.s parsedocxxml(*xmlstr)
Protected firstlinestr.s = ""
ProcedureReturn firstlinestr
EndProcedure
Procedure.s checkdocxkeywords(filename.s , *pmem = #Null, filesize = 0, regularstr.s = #FixedRegularStr)
Protected firstlinestr.s = "" , filetext$ = ""
Protected *UnpackID
Protected Result.i
Debug "filename.s =" + filename
If filename = "H:\mylib\GemBox.Document.Examples-master\C#\Advanced Features\Progress Reporting and Cancellation\Cancellation in WPF\LargeDocument.docx"
Debug "break by bug"
EndIf
;OnErrorCall(@ErrorHandler())
resultstr = ""
UseZipPacker()
If filename <> ""
result = OpenPack(11, filename, #PB_PackerPlugin_Zip) ; List all the entries
ElseIf (*pmem <> #Null And filesize > 0 )
Result = CatchPack(11, *pmem, filesize , #PB_PackerPlugin_Zip)
EndIf
If Result > 0
If ExaminePack(11)
While NextPackEntry(11)
If PackEntryName(11) = "word/document.xml"
Debug "PackEntrySize(11) =" + Str(PackEntrySize(11))
*UnpackID = AllocateMemory(PackEntrySize(11))
If *UnpackID = 0
ClosePack(11)
ProcedureReturn resultstr
EndIf
Result = UncompressPackMemory(11, *UnpackID, PackEntrySize(11),"word/document.xml")
Debug "Result =" + Str(Result)
If result > 0
Debug "*UnpackID=" + Str(*UnpackID)
filetext$ = PeekS(*UnpackID, Result, #PB_UTF8)
If Len(firstlinestr) > 0
resultstr = checkstring(firstlinestr , regularstr)
EndIf
Break
EndIf
FreeMemory(*UnpackID)
EndIf
Wend
EndIf
ClosePack(11)
EndIf
ProcedureReturn resultstr
EndProcedure
Procedure.i existkeywords_docx(filename.s , regularstr.s = #FixedRegularStr)
Protected existkeywords.i = 1 , checkresult.s
If regularstr = ""
regularstr = #FixedRegularStr
EndIf
checkresult = checkdocxkeywords(filename , #Null, 0 , #FixedRegularStr)
If checkresult = #KeyWordsNotFound
existkeywords.i = 0
ElseIf checkresult = ""
existkeywords.i = -1
EndIf
ProcedureReturn existkeywords
EndProcedure
Procedure.i existkeywords_file(filename.s , regularstr.s); = #FixedRegularStr)
Protected existkeywords.i = 1, filenameext$ = UCase(GetExtensionPart(filename))
If regularstr = ""
regularstr = #FixedRegularStr
EndIf
Select filenameext$
Case "DOCX"
existkeywords = existkeywords_docx(filename , #FixedRegularStr)
Default
existkeywords = - 1
EndSelect
ProcedureReturn existkeywords
EndProcedure
;-> main
;
Global NewList filelist.s()
Global NewList filelist_checked.s()
If OpenConsole()
EnableGraphicalConsole(1)
ClearConsole()
If OpenFile(0, "AllResults-v2.txt",#PB_File_SharedRead | #PB_UTF8)
While Not Eof(0)
AddElement(filelist())
filelist() = ReadString(0,#PB_UTF8)
;Debug filelist()
Wend
EndIf
ResetList(filelist())
Define.i filetotal = ListSize(filelist()), i = 1 , j = 0
Define.s filenamestr , result = ""
ForEach filelist()
ConsoleLocate(2, 2)
j = existkeywords_file(filelist(),"")
If j = 1
result = "warn 1"
ElseIf j = 0
result = "warn 2"
ElseIf j = -1
result = "warn 3"
EndIf
AddElement(filelist_checked())
filelist_checked() = filelist() + ";" + result
ConsoleLocate(2, 2)
;Print(Str(i) + Space(140))
Print(Str(i) + Space(2) + filelist_checked() + Space(2))
i = i + 1
Next
If OpenFile(1,"AllResults-v2-checked.txt",#PB_File_SharedWrite | #PB_UTF8)
ResetList(filelist_checked())
ForEach filelist_checked()
WriteStringN(1,filelist_checked(),#PB_UTF8)
Next
EndIf
EndIf
When I use code 2 to test the peeks function (even 5000 times), the test is completed successfully.ErrorHandler() is copy from help file
Code: Select all
; code 2
Procedure test_peeks()
;For j = 1 To 5000 ;Assume there are 5000 files
;Debug "j = " + Str(j)
UseZipPacker()
result1.i = OpenPack(0,"H:\mylib\GemBox.Document.Examples-master\C#\Advanced Features\Progress Reporting and Cancellation\Cancellation in WPF\LargeDocument.docx", #PB_PackerPlugin_Zip)
Debug "result1 = " + result1
If result1 > 0
If ExaminePack(0)
While NextPackEntry(0)
If PackEntryName(0) = "word/document.xml"
Debug "PackEntrySize(0) = " + Str(PackEntrySize(0))
*p = AllocateMemory(PackEntrySize(0))
Result2.i = UncompressPackMemory(0, *p, PackEntrySize(0),"word/document.xml")
;result3.i = UncompressPackFile(0,"H:\20240804\document-1.xml")
filetext$ = PeekS(*p, Result2, #PB_UTF8)
result4.i = CatchXML(0,*p,Result2)
FreeXML(0)
FreeMemory(*P)
Debug "Result2 = " + result2
Debug "filetext$ len = " + Str(Len(filetext$))
;Debug "Result3 = " + result3
Debug "Result4 = " + result4
;Debug filetext$
EndIf
Wend
EndIf
EndIf
ClosePack(0)
;Next
EndProcedure
test_peeks()
The problem can be temporarily solved by modifying the code and temporarily ignoring the large file. Comment the code line
Code: Select all
filetext$ = PeekS(*UnpackID, Result, #PB_UTF8)
If filetext$ <> ""
firstlinestr = parsedocxxml(@filetext$)
EndIf
Code: Select all
firstlinestr = parsedocxxml("",*UnpackID, Result)
Code: Select all
parsedocxxml("",*UnpackID, Result)
....
If XMLStatus(0) <> #PB_XML_Success
FreeXML(0)
ProcedureReturn firstlinestr
EndIf
Maybe I will try to modify the decompression scheme of the code. When PackEntrySize(11) is greater than 500000, decompress it directly to a temporary file.