txt -> html (chm)

Share your advanced PureBasic knowledge/code with the community.
AZJIO
Addict
Addict
Posts: 2191
Joined: Sun May 14, 2017 1:48 am

txt -> html (chm)

Post by AZJIO »

I have some work information that I find easier to read in a highlighted html file, so I made a text to html file converter. Next I can build this using the CHM compiler or in an APK

Code: Select all

; AZJIO

EnableExplicit

; https://www.purebasic.fr/english/viewtopic.php?p=575871
Structure ReplaceGr
    pos.i
    ngr.i
    group.s
EndStructure

Procedure RegexReplace2(RgEx, *Result.string, Replace0$, Escaped = 0)
    Protected i, CountGr, Pos, Offset = 1
    Protected Replace$
    Protected NewList item.s()
    Protected LenT, *Point
;     Static RE2
;     Static RE3
    Protected RE2
    Protected NewList ReplaceGr.ReplaceGr()

    CountGr = CountRegularExpressionGroups(RgEx)
    ; ограничение групп, только обратные ссылки \1 .. \9
    If CountGr > 9
        CountGr = 9
    EndIf

    If ExamineRegularExpression(RgEx, *Result\s)

        ; Поиск Esc-символов в поле замены регвыр
        If Escaped
            Replace0$ = ReplaceString(Replace0$, "\r", #CR$)
            Replace0$ = ReplaceString(Replace0$, "\n", #LF$)
            Replace0$ = ReplaceString(Replace0$, "\t", #TAB$)
            Replace0$ = ReplaceString(Replace0$, "\f", #FF$)
        EndIf

        ; Поиск ссылок на группы в поле замены регвыр
        RE2 = CreateRegularExpression(#PB_Any, "\\\d")
        If RE2
            If ExamineRegularExpression(RE2, Replace0$)
                While NextRegularExpressionMatch(RE2)
                    If AddElement(ReplaceGr())
                        ReplaceGr()\pos = RegularExpressionMatchPosition(RE2) ; позиция
                        ReplaceGr()\ngr = ValD(Right(RegularExpressionMatchString(RE2), 1)) ; номер группы
                        ReplaceGr()\group = RegularExpressionMatchString(RE2) ; текст группы
                    EndIf
                Wend
            EndIf
            FreeRegularExpression(RE2) ; убрать строку при Static
        EndIf
        If Not ListSize(ReplaceGr())
            *Result\s = ReplaceRegularExpression(RgEx, *Result\s, Replace0$)
            ProcedureReturn
        EndIf
;         Сортировка по позиции, чтобы делать замены с конца и не нарушались ранее найденные позиции
        SortStructuredList(ReplaceGr(), #PB_Sort_Descending, OffsetOf(ReplaceGr\pos), TypeOf(ReplaceGr\pos))

        While NextRegularExpressionMatch(RgEx)
            Pos = RegularExpressionMatchPosition(RgEx)
            Replace$ = Replace0$

            ForEach ReplaceGr()
                If ReplaceGr()\ngr
                    Replace$ = ReplaceString(Replace$, ReplaceGr()\group, RegularExpressionGroup(RgEx, ReplaceGr()\ngr), #PB_String_CaseSensitive, ReplaceGr()\pos, 1)
                Else
                    Replace$ = ReplaceString(Replace$, ReplaceGr()\group, RegularExpressionMatchString(RgEx), #PB_String_CaseSensitive, ReplaceGr()\pos, 1) ; обратная ссылка \0
                EndIf
            Next
            ; item() = часть строки между началом и первым совпадением или между двумя совпадениями + результат подстановки групп

            If AddElement(item())
                item() = Mid(*Result\s, Offset, Pos - Offset) + Replace$
            EndIf
            Offset = Pos + RegularExpressionMatchLength(RgEx)
        Wend
        If AddElement(item())
            item() = Mid(*Result\s, Offset)
        EndIf

        ; Формирования текстового списка
        ; Debug "Count = " + Str(ListSize(item()))
;         Count = ListSize(item())
        LenT = 0
        ForEach item()
            LenT + Len(item()) ; вычисляем длину данных для вмещения частей текста
        Next

        *Result\s = Space(LenT) ; создаём строку забивая её пробелами
        *Point = @*Result\s ; Получаем адрес строки
        ForEach item()
            CopyMemoryString(item(), @*Point) ; копируем очередной путь в указатель
        Next
        ; Конец => Формирования текстового списка

        FreeList(item()) ; удаляем список, хотя в функции наверно это не требуется
    EndIf
EndProcedure


Procedure ForceDirectories(Dir.s)
	Static tmpDir.s, Init
	Protected result
	
	If Asc(Dir) ; если что-то есть
		If Not Init
			tmpDir = Dir
			Init   = 1
		EndIf
		Dir = RTrim(Dir, #PS$) ; убираем последний слеш, не делая лишние проверки
		; если папка существует или дошли до корневой, то
		If FileSize(Dir) = -2 Or (Len(Dir) < 3) Or GetPathPart(Dir) = Dir
			If FileSize(tmpDir) = -2
				result = -1 ; попробуем использовать -1 в качестве "путь уже создан"
			EndIf
			tmpDir = ""
			Init   = 0
			ProcedureReturn result ; если это папка, то возврат будет 1
		EndIf
		ForceDirectories(GetPathPart(Dir))
		ProcedureReturn CreateDirectory(Dir)
	Else
		ProcedureReturn 0
	EndIf
EndProcedure


Procedure FileSearch(List Files.s(), dir.s, mask.s = "*", depth = 130)
	Protected Name.s, c
	Protected Dim hDir(depth)
	Protected Dim SearchPath.s(depth)

	If Right(dir, 1) <> #PS$
		dir + #PS$
	EndIf

	SearchPath(c) = dir
	hDir(c) = ExamineDirectory(#PB_Any, dir, mask)
	If Not hDir(c)
		ProcedureReturn
	EndIf

	Repeat
		While NextDirectoryEntry(hDir(c))
			Name = DirectoryEntryName(hDir(c))
			If Name = "." Or Name = ".."
				Continue
			EndIf
			If DirectoryEntryType(hDir(c)) = #PB_DirectoryEntry_Directory
				If c >= depth
					Continue
				EndIf
				dir = SearchPath(c)
				c + 1
				SearchPath(c) = dir + Name + #PS$
				hDir(c) = ExamineDirectory(#PB_Any, SearchPath(c), mask)
				If Not hDir(c)
					c - 1
				EndIf
			Else
				If AddElement(Files())
					Files() = SearchPath(c) + Name
				EndIf
			EndIf
		Wend
		FinishDirectory(hDir(c))
		c - 1
	Until c < 0
EndProcedure


Procedure.s ReadFileToVar(Path$)
	Protected id_file, Format, text$

	id_file = ReadFile(#PB_Any, Path$)
	If id_file
		Format = ReadStringFormat(id_file)
		text$ = ReadString(id_file, Format | #PB_File_IgnoreEOL)
		; 	text$ = ReadString(id_file, #PB_UTF8 | #PB_File_IgnoreEOL)
		CloseFile(id_file)
	EndIf

	ProcedureReturn text$
EndProcedure


Define NewList Files.s()
Define PathTXT$ = "C:\my_folder\" ; source path for text files (.txt)
Define PathCHM$ = "C:\my_folder2\html\" ; destination path for html files
Define length0 = Len(PathTXT$) + 1
Define length1 = Len(PathTXT$)
; Define length2 = Len(PathTXT$) - 4
Define isFile
Define Text.string
FileSearch(Files(), PathTXT$)

If ListSize(Files()) = 0
	End
EndIf

ForEach Files()
	Files() = Mid(Files(), length0)
Next

; Here are regular expressions to highlight the text, framing it in tags
CreateRegularExpression(0, "\[.+?\]",  0)
CreateRegularExpression(1, "\d\d\.\d\d\.\d\d",  0) ; date
CreateRegularExpression(2, ~"\".+?\"",  0)
CreateRegularExpression(3, "[^\x{01}-\x{FF}]+",  0) ; > Ascii

ForEach Files()
	Text\s = ReadFileToVar(PathTXT$ + Files())
; 	the file is already open, so we don’t worry about breaking the path and replace it locally
	ReplaceString(Files(), ".txt", ".htm", #PB_String_CaseSensitive | #PB_String_InPlace, 1, 1)
	
; 	Adding tags
	RegexReplace2(0, @Text, "<font color=#FF8080>\0</font>")
	RegexReplace2(1, @Text, "<font color=#00C2BB>\0</font>")
	RegexReplace2(2, @Text, "<font color=#3FBA89>\0</font>")
	RegexReplace2(3, @Text, "<font color=#E3CEAB>\0</font>")
; 	RegexReplace2(4, @Text, "<font color=#AFE7B3>\0</font>")
; 	RegexReplace2(5, @Text, "<font color=#8CD0D3>\0</font>")
; 	RegexReplace2(6, @Text, "<font color=#FCAF3E>\0</font>")
	
; 	Adding line breaks
	Text\s = ReplaceString(Text\s, #CRLF$, "<br />")
	If FindString(Text\s, #LF$)
		Text\s = ReplaceString(Text\s, #LF$, "<br />" + #CRLF$)
	EndIf
	If FindString(Text\s, #CR$)
		Text\s = ReplaceString(Text\s, #CR$, "<br />" + #CRLF$)
	EndIf
	Text\s = ReplaceString(Text\s, "<br />", "<br />" + #CRLF$)
; 	Debug Files()
	#File = 0
	If CreateFile(#File, PathCHM$ + Files(), #PB_Ascii)
		isFile = 1
	ElseIf ForceDirectories(GetPathPart(PathCHM$ + Files())) And CreateFile(#File, PathCHM$ + Files(), #PB_Ascii)
		isFile = 1
	Else
		Debug "file not created: " + Files()
	EndIf
	If isFile
		; 		WriteStringFormat(#File, #PB_Ascii)
		WriteStringN(#File, ~"<!DOCTYPE HTML PUBLIC \"-//W3C//DTD HTML 4.01 Transitional//EN\">")
		WriteStringN(#File, "<html>")
		WriteStringN(#File, "<head>")
		WriteStringN(#File, ~"<meta content=\"text/html; charset=UTF-8\" http-equiv=\"Content-Type\">")
		WriteStringN(#File, ~"<link type=\"text/css\" rel=\"stylesheet\" href=\"../styles.css\">")
		WriteStringN(#File, "</head>")
		WriteStringN(#File, "<body>")
		WriteStringN(#File, Text\s)
		WriteStringN(#File, "<br /><br /><br /><br />")
		WriteString(#File, "</body></html>")
		CloseFile(#File)
	EndIf
Next

FreeRegularExpression(#PB_All)

; Creating a list of files

;- toc.htm
If CreateFile(#File, PathCHM$ + "toc.htm", #PB_UTF8)
	WriteStringN(#File, ~"<!DOCTYPE HTML PUBLIC \"-//W3C//DTD HTML 4.01 Transitional//EN\">")
	WriteStringN(#File, "<html>")
	WriteStringN(#File, "<head>")
	WriteStringN(#File, ~"<meta content=\"text/html; charset=UTF-8\" http-equiv=\"Content-Type\">")
	WriteStringN(#File, ~"<link type=\"text/css\" rel=\"stylesheet\" href=\"../styles.css\">")
	WriteStringN(#File, "</head>")
	WriteStringN(#File, "<body>")
	WriteStringN(#File, "<ul>")

	ForEach Files()
		WriteStringN(#File, ~"<li><a href=\"" +  Files() + ~"\">" +  GetFilePart(Files(), #PB_FileSystem_NoExtension) + "</a></li>")
	Next
	WriteStringN(#File, "</ul>")
	WriteStringN(#File, "</body>")
	WriteString(#File, "</html>")
	CloseFile(#File)
EndIf

; Creating files for CHM (contents and indexes)
;- _TOC.hhc
If CreateFile(#File, GetPathPart(PathCHM$) + "_TOC.hhc", #PB_Ascii)
	WriteStringN(#File, ~"<!DOCTYPE HTML PUBLIC \"-//IETF//DTD HTML//EN\"><HTML><HEAD>")
	WriteStringN(#File, ~"<meta name=\"GENERATOR\" content=\"Hand\">")
	WriteStringN(#File, "<!-- Sitemap 1.0 -->")
	WriteStringN(#File, "</HEAD><BODY>")
	WriteStringN(#File, #CRLF$)
	WriteStringN(#File, ~"<OBJECT type=\"text/site properties\"><param name=\"Window Styles\" value=\"0x25\"><param name=\"ImageType\" value=\"Folder\"></OBJECT><UL>")
	WriteStringN(#File, #CRLF$)

	ForEach Files()
		WriteString(#File, ~"<LI><OBJECT type=\"text/sitemap\"><param name=\"Name\" value=\"" +  GetFilePart(Files(), #PB_FileSystem_NoExtension))
		WriteString(#File, ~"\"><param name=\"Local\" value=\"html\\" +  Files())
		WriteStringN(#File, ~"\"></OBJECT>")
	Next
	WriteString(#File, "</UL></BODY></HTML>")
	CloseFile(#File)
EndIf

;- _Index.hhk
If CreateFile(#File, GetPathPart(PathCHM$) + "_Index.hhk", #PB_Ascii)
	WriteStringN(#File, ~"<!DOCTYPE HTML PUBLIC \"-//IETF//DTD HTML//EN\"><HTML><HEAD>")
	WriteStringN(#File, ~"<meta name=\"GENERATOR\" content=\"Hand\">")
	WriteStringN(#File, "<!-- Sitemap 1.0 -->")
	WriteStringN(#File, "</HEAD><BODY>")
	WriteStringN(#File, "<UL>")
	WriteStringN(#File, #CRLF$)

	ForEach Files()
		WriteString(#File, ~"<LI><OBJECT type=\"text/sitemap\"><param name=\"Name\" value=\"" +  GetFilePart(Files(), #PB_FileSystem_NoExtension))
		WriteString(#File, ~"\"><param name=\"Local\" value=\"html\\" +  Files())
		WriteStringN(#File, ~"\"></OBJECT>")
	Next
	WriteString(#File, "</UL></BODY></HTML>")
	CloseFile(#File)
EndIf

; Project.hhp
; ====================
; [OPTIONS]
; Compatibility=1.1 Or later
; Compiled file=Project.chm
; Contents file=_TOC.hhc
; Default Window=Help Window
; Default topic=html\TOC.htm
; Display compile progress=Yes
; Enhanced decompilation=Yes
; Error log file=_errorlog.txt
; Full-text search=Yes
; Index file=_Index.hhk
; Language=0x419
; Title=Неисправности (15.08.2024)

; [WINDOWS]
; Help Window="Project  (15.08.2024)","_TOC.hhc","_Index.hhk","html\TOC.htm",,,,,,0x63520,,0x10000c,[0,0,1024,768],0x80000,,,,,,0


; [INFOTYPES]
threedslider
Enthusiast
Enthusiast
Posts: 397
Joined: Sat Feb 12, 2022 7:15 pm

Re: txt -> html (chm)

Post by threedslider »

Nice try and good job too :D

I am looking forward to this :wink:

Good luck and happy coding !
Post Reply