Fix case sensitivity in path for html files
Posted: Fri Feb 07, 2025 5:09 am
I was trying to fix case-sensitive paths in file and folder names. If we extract the CHM file, we will see the paths in different registers. My task is to make the letter case in the text the same as the real files. For example
The real path: 2DDrawing\AlphaBlend.html
Using the ScanDir() function, I get the tree structure. Next, I need to send the path to the array, dividing the string by "/". Each element of the path has an attachment level. I'm trying to find a folder with the same case-insensitive attachment level, and if it is found and does not match the case, then I replace it with the correct name. I also need to take into account the relative paths "../".
I'm a little confused right now, but it's been too long since I've tried to solve the problem I need for the chmViewer program. But I think this is a more universal feature, as it will help to fix any help so that it can be uploaded to a server that also uses case-sensitive file names.
Code: Select all
<a href="../2ddrawing/index.html">Drawing operations</a>Using the ScanDir() function, I get the tree structure. Next, I need to send the path to the array, dividing the string by "/". Each element of the path has an attachment level. I'm trying to find a folder with the same case-insensitive attachment level, and if it is found and does not match the case, then I replace it with the correct name. I also need to take into account the relative paths "../".
I'm a little confused right now, but it's been too long since I've tried to solve the problem I need for the chmViewer program. But I think this is a more universal feature, as it will help to fix any help so that it can be uploaded to a server that also uses case-sensitive file names.
Code: Select all
EnableExplicit
#q$ = Chr(34)
; #q$ = "`"
;- ● Enumeration
Enumeration File
#File
EndEnumeration
Enumeration RegExp
#RegExpFixPathTOC
#RegExpFixPathHTML
EndEnumeration
Global dir0$ = "C:\html\"
Global pathTOS$ = "Table of Contents"
Structure Path2
path.s
pathLCase.s
file.s
fileLCase.s
EndStructure
Structure Files
Name.s ; File name
NameL.s ; File name (lowercase)
EndStructure
Structure Tree
DirName.s ; Folder name
DirNameL.s ; Folder name (lowercase)
List Files.Files() ; List of files in the current folder.
List Dirs.Tree() ; List of subfolders of the current folder.
EndStructure
Procedure ScanDir(*s.Tree, AllDir.s) ; Save the structure of the specified folder
Protected id, Dir.s, ext.s
If *s = 0
ProcedureReturn 0
EndIf
id = ExamineDirectory(#PB_Any, AllDir, "*.*")
If id
While NextDirectoryEntry(id)
If DirectoryEntryType(id) = #PB_DirectoryEntry_Directory ; Folder
Dir = DirectoryEntryName(id)
If Dir <> "." And Dir <> ".."
If AddElement(*s\Dirs())
*s\Dirs()\DirName = Dir
*s\Dirs()\DirNameL = LCase(Dir)
ScanDir(*s\Dirs(), AllDir + Dir + #PS$) ; Recursive procedure call
EndIf
EndIf
Else ; File
ext = GetExtensionPart(DirectoryEntryName(id))
If (ext = "html" Or ext = "htm") And AddElement(*s\Files()) ; checking that the file type/extension is html
*s\Files()\Name = DirectoryEntryName(id)
*s\Files()\NameL = LCase(DirectoryEntryName(id))
EndIf
EndIf
Wend
FinishDirectory(id)
EndIf
ProcedureReturn
EndProcedure
Procedure ClearFileTree(*s.Tree) ; Clear the structure previously obtained by the ScanDir function
If *s = 0
ProcedureReturn 0
EndIf
FreeList(*s\Files()) ; delete file lists
If ListSize(*s\Dirs())
ForEach *s\Dirs()
ClearFileTree(*s\Dirs()) ; Recursive procedure call
Next
FreeList(*s\Dirs()) ; delete folder lists after deleting its contents
EndIf
ProcedureReturn
EndProcedure
Procedure SplitA2(String$, Array StringList.s(1), Separator$ = #CRLF$ + #TAB$ + #FF$ + #VT$ + " ")
Protected *S = @String$
Protected *jc.Character, *c.Character = @String$
Protected i
i = 0
While *c\c
*jc = @Separator$
While *jc\c
If *c\c = *jc\c
*c\c = 0
If *S <> *c
ReDim StringList(i)
StringList(i) = PeekS(*S)
i + 1
EndIf
*S = *c + SizeOf(Character)
Break
EndIf
*jc + SizeOf(Character)
Wend
*c + SizeOf(Character)
Wend
ReDim StringList(i)
StringList(i) = PeekS(*S)
EndProcedure
Procedure TestPath(*s.Tree, *folder.String, IsFile = 0)
If IsFile
ForEach *s\Files()
If *s\Files()\NameL = LCase(*folder\s) ; if they are equal in lowercase, then
If *s\Files()\Name <> *folder\s ; if they are NOT equal in uppercase, then
*folder\s = *s\Files()\Name
ProcedureReturn
Break
EndIf
EndIf
Next
Else
ForEach *s\Dirs()
If *s\Dirs()\DirNameL = LCase(*folder\s) ; if they are equal in lowercase, then
If *s\Dirs()\DirName <> *folder\s ; if they are NOT equal in uppercase, then
*folder\s = *s\Dirs()\DirName
ProcedureReturn @*s\Dirs() ; return the pointer to the list element
; ProcedureReturn *s\Dirs()\DirName ; return the pointer to the first element of the structure, that is, to the structure itself
; ReplaceString(Text$, RegularExpressionMatchString(#RegExpFixPathTOC), Files2()\path, #PB_String_InPlace, RegularExpressionMatchPosition(#RegExpFixPathTOC), 1)
; Debug "replacement in position: " + RegularExpressionMatchPosition(#RegExpFixPathTOC)
Break
EndIf
EndIf
Next
EndIf
ProcedureReturn 0 ; return 0 if nothing is found.
EndProcedure
Procedure.s ReadFileToVar(Path$)
Protected id_file, Format, Text$
id_file = ReadFile(#PB_Any, Path$)
If id_file
Format = ReadStringFormat(id_file)
Text$ = ReadString(id_file, Format | #PB_File_IgnoreEOL)
; Text$ = ReadString(id_file, #PB_UTF8 | #PB_File_IgnoreEOL)
CloseFile(id_file)
EndIf
ProcedureReturn Text$
EndProcedure
Procedure FixPath(AndHTML = 0)
Protected NewList Files.s()
Protected NewList Files2.Path2()
Protected length, Format, pathTosTotal$, Text$, pathLCase$, htmpath$, *s, i, *tmp, newPath$
Protected Dim pathLCase.s(0)
length = Len(dir0$) + 1
; for Linux, it is necessary to generate html paths in lowercase to find if the path is incorrect.
Protected s.Tree ; Creating an instance of a structure.
; If the path to the folder has text and exists, being the correct path, then
If Asc(dir0$) And FileSize(dir0$) = -2
s\DirName = dir0$
; scan the folder
ScanDir(s, s\DirName) ; scan getting a tree of files and folders
ClearFileTree(s)
EndIf
; Fix paths in hhc
pathTosTotal$ = dir0$ + pathTOS$ + ".hhc"
; Text$ = OpenFileToGadget(pathTosTotal$, @Format) ; read hhc
Text$ = ReadFileToVar(pathTosTotal$) ; read hhc
If Format <> #PB_Ascii
Format = #PB_UTF8 ; do not give permission to other formats so as not to fail.
EndIf
; we get all the paths in the hhc text
If CreateRegularExpression(#RegExpFixPathTOC, "<param name=.Local. value=.\K[^\r\n:*?<>|]+]*?(?=.>)", #PB_RegularExpression_NoCase)
If ExamineRegularExpression(#RegExpFixPathTOC, Text$)
While NextRegularExpressionMatch(#RegExpFixPathTOC)
If FileSize(dir0$ + RegularExpressionMatchString(#RegExpFixPathTOC)) < 0 ; if the found path does not exist (in Linux due to the case), then
; Debug "not found: " +RegularExpressionMatchString(#RegExpFixPathTOC)
pathLCase$ = LCase(RegularExpressionMatchString(#RegExpFixPathTOC)) ; half a copy of the path
If Asc(pathLCase$) = '\' Or Asc(pathLCase$) = '/'
pathLCase$ = Mid(pathLCase$, 2) ; read from the second character
EndIf
; check here that it does not start with "./"
; start moving through the tree
ReDim pathLCase(0) ; clear the array
SplitA2(pathLCase$, pathLCase(), "\/") ; dividing the array into path elements
*s = @s ; starting the immersion from the root
For i = 0 To ArraySize(pathLCase())
*tmp = @pathLCase(i)
If i = ArraySize(pathLCase())
TestPath(*s, @*tmp, 1) ; the last one is a file
Else
*s = TestPath(*s, @*tmp) ; we return the attached folder
EndIf
If *s = 0
Break
EndIf
newPath$ + pathLCase(i) + #PS$
Next
newPath$ = RTrim(newPath$, #PS$)
; if the case-insensitive path contains the same path, then
If FindString(newPath$, pathLCase$, #PB_String_NoCase, #PB_String_NoCase)
Debug "путь не поломался"
EndIf
ForEach Files2()
If Files2()\pathLCase = pathLCase$
ReplaceString(Text$, RegularExpressionMatchString(#RegExpFixPathTOC), Files2()\path, #PB_String_InPlace, RegularExpressionMatchPosition(#RegExpFixPathTOC), 1)
Debug "замена в позиции: " + RegularExpressionMatchPosition(#RegExpFixPathTOC)
Break
EndIf
Next
EndIf
Wend
If CreateFile(#File, dir0$ + "0.hhc", #PB_UTF8)
WriteString(#File, Text$)
CloseFile(#File)
EndIf
EndIf
FreeRegularExpression(#RegExpFixPathTOC)
EndIf
If AndHTML
Debug "——————————————————————"
If CreateRegularExpression(#RegExpFixPathHTML, "(?<=\hhref=([" + #q$ + "']))[^\r\n<>|?*:]+?(?=\1)", #PB_RegularExpression_NoCase)
ForEach Files()
Files2()\file = GetPathPart(Files2()\path)
Files2()\fileLCase = GetPathPart(Files2()\pathLCase)
Next
ForEach Files()
; Text$ = OpenFileToGadget(Files(), @Format)
Text$ = ReadFileToVar(Files())
If Format <> #PB_Ascii
Format = #PB_UTF8 ; do not give permission to other formats so as not to fail.
EndIf
htmpath$ = GetPathPart(Files())
If ExamineRegularExpression(#RegExpFixPathHTML, Text$)
While NextRegularExpressionMatch(#RegExpFixPathHTML)
; ignore paths starting with a dot
If Asc(RegularExpressionMatchString(#RegExpFixPathHTML)) <> '.' And FileSize(htmpath$ + RegularExpressionMatchString(#RegExpFixPathHTML)) < 0 ; if the found path does not exist, then
Debug "не найден: " + RegularExpressionMatchString(#RegExpFixPathHTML)
pathLCase$ = LCase(RegularExpressionMatchString(#RegExpFixPathHTML))
ForEach Files2()
If Files2()\fileLCase = pathLCase$
ReplaceString(Text$, RegularExpressionMatchString(#RegExpFixPathHTML), Files2()\file, #PB_String_InPlace, RegularExpressionMatchPosition(#RegExpFixPathHTML), 1)
Debug "replacement in position: " + RegularExpressionMatchPosition(#RegExpFixPathHTML)
Break
EndIf
Next
EndIf
Wend
EndIf
If CreateFile(#File, Files(), Format)
WriteString(#File, Text$)
CloseFile(#File)
EndIf
Next
FreeRegularExpression(#RegExpFixPathHTML)
Else
Debug "Error"
EndIf
MessageRequester("", "")
EndIf
; If Asc(filepaths1$) And Asc(filepaths2$)
; If CreateFile(#File, dir0$ + "f1.txt", #PB_UTF8)
; WriteString(#File , filepaths1$)
; CloseFile(#File)
; EndIf
; If CreateFile(#File, dir0$ + "f2.txt", #PB_UTF8)
; WriteString(#File , filepaths2$)
; CloseFile(#File)
; EndIf
; EndIf
EndProcedure