Page 1 of 1

HTML to RTF Lib

Posted: Fri Apr 28, 2023 8:23 am
by Oliver13
Hello,
I found a free DLL to convert HTML to RTF (https://sautinsoft.com/products/convert ... to-rtf.php). The conversion just needs one API call, sample code (C) is included in the download package.

I was able to translate to PB and call the DLL: conversion of the file works, but the given parameters (as struct) are obviously ignored. When compiling the C source, all works well. Probably I've doing somwething wrong with converting the struct to PB, here is my code (PB 32bit):

Code: Select all

;-> relevant C-code

; struct ConvertSettings
; {		
; 	int PreserveTables;				//1 - preserve tables, 0 - transfer to text
; 	int PreserveImages;				//1 - preserve images,  0 - skip
; 	int PreserveHyperlinks;			//1 - preserve hyperlinks, 0 - skip
; 	int PreserveFontFace;			//1 - preserve font faces as in HTML, 0 - all font faces will be as in 'FontFace'
; 	int PreserveFontSize;			//1 - preserve font sizes as in HTML, 0 - all font sizes will be as in 'FontSize'
; 	int PreserveFontColor;			//1 - preserve font colors as in HTML, 0 - black font color
; 	int PreserveBackgroundColor;	//1 - preserve bacground colors as in HTML, 0 - skip
; 	int PreserveAlignment;			//1 - preserve alignment as in HTML, 0 - all text will have align as in 'PageAlignment'
; 	int PreserveTableWidth;			//1 - preserve width of columns
; 	int PreserveNestedTables;		//1 - preserve nested tables, 0 - translate nested tables to plain tables
; 	int PageMarginLeft;				//page margin left, mm. For example 10
; 	int PageMarginRight;			//page margin right, mm. For example 10
; 	int PageMarginTop;				//page margin top, mm. For example 10
; 	int PageMarginBottom;			//page margin bottom, mm. For example 10
; 	int BorderVisibility;			//table borders: 1 - visible borders, 0 - hidden borders, 2 - as in HTML
; 	int PageOrientation;			//page orientation: 0 - Portrait, 1 - Landscape
; 	int PageSize;					//page size: 0 - A4, 1 - A3, 2 - A5, 3 - B5, 4 - Letter, 5 - Legal, 6 - Executive, 7 - Monarh
; 	int FontFace;					//default font face: Arial - 0, Times New Roman - 1, Verdana - 2, Helvetica - 3, Courier - 4, Courier New - 5, Times - 6, Georgia - 7, MS Sans Serif - 8,
; 									//Futura - 9, Arial Narrow - 10, Garamond - 11, Impact - 12, Lucida Console - 13, Tahoma - 14, Inform - 15, Symbol - 16, WingDings - 17, Traditional Arabic - 18
; 	int FontSize;					//default font size, any value from 6 to 72
; 	int PageAlignment;				//page alignment: 0 - left, 1 - center, 2 - right, 3 - justify
; 	int RtfLanguage;				//RTF language: English - 1033, Albanian - 1052, Belgian - 2067, Bulgarian - 1026, Hungarian - 1038, Danish - 1030, Spanish - 3082, Latvian - 1062, Lithuanian - 1063,
; 									//German - 1031, Netherlands - 1043, Norwegian - 2068, Portuguese - 2070, Romanian - 1048, Russian - 1049, Ukrainian - 1058, Finnish - 1035, French - 1036,
; 									//Czech - 1029, Swedish - 1053, Arabic - 1053, Turkish - 1055, Japanese - 932, SimplifiedChinese - 936, TraditionalChinese - 950, Korean - 949, Thai = 874
; 	int Encoding;					//AutoSelect - 0, ISO-8859-1 - 1, ISO-8859-5 - 2, KOI8-R - 3, Windows-1251 - 4, UTF-8 - 5, Windows-1254 - 6, Windows-1256 - 7,
; 									//Windows-1250 - 8, Windows-1252 - 9, Windows-1253 - 10, Windows-1255 - 11, Windows-1257 - 12, Windows-1258 - 13
; 	int OutputTextFormat;			//Output Format: Rtf - 0, Text - 1, Doc - 2 (only file with .doc extension)  
; 	int PreservePageBreaks;			//1 - preserve page-breaks
; 	int ImageCompatible;			//type of produced images: Word - 0, WordPad - 1
; 	int PageNumbers;				//page numbers: 0 - disable, 1 - numbers from first page, 2 - from second page
; 	char PageHeader[150];			//page header, any string
; 	char PageFooter[150];			//page footer, any string
; 	char HtmlPath[650];				//html path for method 'htmltortf_string', will be used for find images (now is not used)
; 	int PageNumbersAlignV;			//page numbers vertical align: Top - 4, Bottom - 5
; 	int PageNumbersAlignH;			//page numbers horizontal align: 0 - left, 1 - center, 2 - right, 3 - justify
; 	int PreserveHR;					//1 - preseve <hr>, 0 - skip
; 	int RtfParts;					//0 - rtf completely, 1 - only rtf body (to insert inside another rtf files)
; 	int CreateTraceFile;			//1 - the component will create trace file, it helps to see how converting goes and shows errors
; 	char TraceFilePath[650];		//specifies path for trace file, for example "c:\\Trace.txt"
; 	int TableCellPadding;			//specifies table cell padding in pixels, any value from 0 to 10
; 	int PreserveHttpImages;			//1- download http images, 0 - skip them
; 	}                     ;
; 	


; //SET PARAMETERS
; struct ConvertSettings cs;
; 
; cs.BorderVisibility = SameAsOriginalHtml;
; cs.Encoding = 0                         ;
; cs.FontFace = 0                         ;
; cs.FontSize = 9                         ;
; strcpy(cs.HtmlPath,"")                  ; 
; cs.ImageCompatible = 0                  ;
; cs.OutputTextFormat = Rtf               ;
; cs.PageAlignment = 0                    ;
; strcpy(cs.PageHeader,"")                ;
; strcpy(cs.PageFooter,"")                ;
; cs.PageMarginBottom = 10                ;
; cs.PageMarginLeft = 15                  ;
; cs.PageMarginRight = 15                 ;
; cs.PageMarginTop = 15                   ;
; cs.PageNumbers = 0                      ;
; cs.PageNumbersAlignH = 1                ;
; cs.PageNumbersAlignV = 5                ;
; cs.PageOrientation = 0                  ;
; cs.PageSize = 4                         ;
; cs.PreserveAlignment = true             ;
; cs.PreserveBackgroundColor = true       ;
; cs.PreserveFontColor = true             ;
; cs.PreserveFontFace = true              ;
; cs.PreserveFontSize = true              ;
; cs.PreserveHR = 0                       ;
; cs.PreserveHyperlinks = true            ;
; cs.PreserveNestedTables = true          ;
; cs.PreservePageBreaks = true            ;
; cs.PreserveTables = true                ;
; cs.PreserveTableWidth = true            ;
; cs.RtfLanguage = 1033                   ;
; cs.RtfParts = 0                         ;
; cs.PreserveImages = 1                   ;
; cs.TableCellPadding = 0                 ;
; cs.PreserveHttpImages = 1               ;
; 
; //CONVERT
; retValue=htmltortf_file((char*)HtmlFile.c_str(),(char*)RtfFile.c_str(),cs);


Structure ConvertSettings  Align #PB_Structure_AlignC
  PreserveTables.i                  ;keep tables/transform To text
  PreserveImages.i                ;keep images
  PreserveHyperlinks.i            ;keep hyperlinks
  PreserveFontFace.i             ;keep font
  PreserveFontSize.i              ;keep font sizes
  PreserveFontColor.i            ;keep background colors
  PreserveBackgroundColor.i ;keep background colors
  PreserveAlignment.i             ;keep alignment
  PreserveTableWidth.i          ;keep width of table's columns
  PreserveNestedTables.i      ;keep nested tables/ trasform
  PageMarginLeft.i                 ;set left page margin
  PageMarginRight.i               ;set right page margin
  PageMarginTop.i                 ;set top page margin
  PageMarginBottom.i           ;set bottom page margin
  BorderVisibility.i                  ;specify table borders visibility
  PageOrientation.i               ;set portrait Or landscape page
  PageSize.i                           ;specify page size (Letter, A4 ...)
  FontFace.i                         ;set Default font
  FontSize.i                           ;set Default font size
  PageAlignment.i                  ;set Default text alignment
  RtfLanguage.i                     ;specify RTf spelling language
  Encoding.i                           ;Select encoding
  OutputTextFormat.i           ;output file format RTF Or Text
  PreservePageBreaks.i         ;keep page breaks
  ImageCompatible.i               ;image type (WordPad Or Word)
  PageNumbers.i                     ;specify page numbers
  sPageHeader.a[150]           ;header
  sPageFooter.a[150]           ;footer
  sHtmlPath.a[650]               ;path
  PageNumbersAlignV.i           ;page numbers vertical alignment
  PageNumbersAlignH.i           ;page numbers horiz alignment
  PreserveHR.i                        ;keep horiz rules <hr>
  RtfParts.i                             ;completely rtf file Or only rtf body
  CreateTraceFile.i ;creates trace(Debug) file
  sTraceFilePath.a[650]         ;trace file path
  TableCellPadding.i               ;set Default cell padding in pix
  PreserveHttpImages.i         ;set Default cell padding in pix

   
EndStructure



PrototypeC htmltortf_file(ssourcefile.p-ascii,sdestfile.p-ascii,flag)
PrototypeC flushlist() 


Global htmltortf_file.htmltortf_file
Global flushlist.flushlist



Procedure.i LoadDLL()
  Protected hDLL.i

  hDLL = OpenLibrary(#PB_Any, "htmltortf_sautinsoft.dll")
  If hDLL <> 0
    htmltortf_file = GetFunction(hDLL, "htmltortf_file")
    flushlist = GetFunction(hDLL, "flushlist")
        ProcedureReturn hDLL
  EndIf

  ProcedureReturn #False
EndProcedure


iDLL=LoadDLL()
If IsLibrary(idll)
  sheader$="Test"
  Define cs.ConvertSettings
  
  ;cs\sHtmlPath=Space(650)
  ;cs\sPageHeader=@sheader$
  ;cs\sPageFooter=Space(150)
  
  cs\PreserveTables=1 
  cs\PreserveImages=1
  cs\PreserveFontSize=1
  cs\PreserveTableWidth=1
  cs\PageSize=0
  cs\RtfLanguage=1031
  cs\ImageCompatible=0
  cs\PreserveImages=1
   cs\PreserveHttpImages=1
  cs\PreservePageBreaks=1
  cs\PreserveHR=1
  cs\PageMarginBottom=20
  cs\PageMarginLeft=20
  cs\PageMargintop=20
  cs\PageMarginright=20
  cs\PageOrientation = 0
  ;*cs\sPageHeader=@sheader$
  cs\CreateTraceFile=1
  ssource$="result.htm"
  sdest$="result.rtf"
  DeleteFile(sdest$)
  result=htmltortf_file(ssource$,sdest$,cs)  ;- Conversion works and result.rtf is generated, but the parameter in cs are ignored
  
  RunProgram(sdest$)
     Debug result
     flushlist();
  CloseLibrary(idll)
  EndIf
  
  
Thank you for any hints... I'm using PB 6.01 32bit

Re: HTML to RTF Lib

Posted: Fri Apr 28, 2023 8:36 am
by PeDe
int in C is long in PureBasic, otherwise it can't work with the PB 64-bit version if you use it.

Peter

Re: HTML to RTF Lib

Posted: Fri Apr 28, 2023 11:31 am
by Oliver13
int in C is long in PureBasic, otherwise it can't work with the PB 64-bit version if you use it.
Tx, sorry, I forget: I'm using PB 32 bit version

Re: HTML to RTF Lib

Posted: Fri Apr 28, 2023 12:56 pm
by infratec
Try this:

Code: Select all

result=htmltortf_file(ssource$,sdest$,@cs)
But it is not a free dll according to the web page.
And you will see:
Created by unlicensed version of RTF to HTML .Net
And, as already mentioned, you should use .l in the structure and not .i

Re: HTML to RTF Lib

Posted: Fri Apr 28, 2023 2:45 pm
by Oliver13
Try this:
result=htmltortf_file(ssource$,sdest$,@cs)
TX, I alrerady tried, but this does not work, too.
I also tried with .l instead of .i (btw, is there even a difference on 32bit ?).

It is really strange, for the C sample code works.
But it is not a free dll according to the web page.
It's free (I've asked them...), but it I'm talking NOT of the net version, but the Win32 DLL.

Just see: the developer seem having removed the page with the download in meantime, due this old DLL is not maintained anymore.

Re: HTML to RTF Lib

Posted: Fri Apr 28, 2023 3:48 pm
by PeDe
I specified absolute paths as a test, and for me your code works without any further change.

Code: Select all

	ssource$="T:\TEMP\Testing HTMLs\sample1.htm"
	sdest$="T:\TEMP\result.rtf"
Edit: Sorry no, changes to values in the structure are ignored, only the RTF file is created.

Peter

Re: HTML to RTF Lib

Posted: Fri Apr 28, 2023 4:07 pm
by Kiffi
Just put it out there as an alternative: Pandoc is an universal document converter which is available for every important operating system.

Code: Select all

pandoc -s -f html -t rtf your.html -o your.rtf
https://pandoc.org/try/

Re: HTML to RTF Lib

Posted: Fri Apr 28, 2023 6:02 pm
by RASHAD
Hi
Work around

Code: Select all

Enumeration 1
  #OLECMDID_OPEN                             = 1
  #OLECMDID_NEW                              = 2
  #OLECMDID_SAVE                             = 3
  #OLECMDID_SAVEAS                           = 4
  #OLECMDID_SAVECOPYAS                       = 5
  #OLECMDID_PRINT                            = 6
  #OLECMDID_PRINTPREVIEW                     = 7
  #OLECMDID_PAGESETUP                        = 8
  #OLECMDID_SPELL                            = 9
  #OLECMDID_PROPERTIES                       = 10
  #OLECMDID_CUT                              = 11
  #OLECMDID_COPY                             = 12
  #OLECMDID_PASTE                            = 13
  #OLECMDID_PASTESPECIAL                     = 14
  #OLECMDID_UNDO                             = 15
  #OLECMDID_REDO                             = 16
  #OLECMDID_SELECTALL                        = 17
  #OLECMDID_CLEARSELECTION                   = 18
  #OLECMDID_ZOOM                             = 19
  #OLECMDID_GETZOOMRANGE                     = 20
  #OLECMDID_UPDATECOMMANDS                   = 21
  #OLECMDID_REFRESH                          = 22
  #OLECMDID_STOP                             = 23
  #OLECMDID_HIDETOOLBARS                     = 24
  #OLECMDID_SETPROGRESSMAX                   = 25
  #OLECMDID_SETPROGRESSPOS                   = 26
  #OLECMDID_SETPROGRESSTEXT                  = 27
  #OLECMDID_SETTITLE                         = 28
  #OLECMDID_SETDOWNLOADSTATE                 = 29
  #OLECMDID_STOPDOWNLOAD                     = 30
  #OLECMDID_FIND                             = 32
  #OLECMDID_DELETE                           = 33
  #OLECMDID_HTTPEQUIV_DONE                   = 35
  #OLECMDID_ENABLE_INTERACTION               = 36
  #OLECMDID_ONUNLOAD                         = 37
  #OLECMDID_PROPERTYBAG2                     = 38
  #OLECMDID_PREREFRESH                       = 39
  #OLECMDID_SHOWSCRIPTERROR                  = 40
  #OLECMDID_SHOWMESSAGE                      = 41
  #OLECMDID_SHOWFIND                         = 42
  #OLECMDID_SHOWPAGESETUP                    = 43
  #OLECMDID_SHOWPRINT                        = 44
  #OLECMDID_CLOSE                            = 45
  #OLECMDID_ALLOWUILESSSAVEAS                = 46
  #OLECMDID_DONTDOWNLOADCSS                  = 47
  #OLECMDID_UPDATEPAGESTATUS                 = 48
  #OLECMDID_PRINT2                           = 49
  #OLECMDID_PRINTPREVIEW2                    = 50
  #OLECMDID_SETPRINTTEMPLATE                 = 51
  #OLECMDID_GETPRINTTEMPLATE                 = 52
  #OLECMDID_PAGEACTIONBLOCKED                = 55
  #OLECMDID_PAGEACTIONUIQUERY                = 56
  #OLECMDID_FOCUSVIEWCONTROLS                = 57
  #OLECMDID_FOCUSVIEWCONTROLSQUERY           = 58
  #OLECMDID_SHOWPAGEACTIONMENU               = 59
  #OLECMDID_ADDTRAVELENTRY                   = 60
  #OLECMDID_UPDATETRAVELENTRY                = 61
  #OLECMDID_UPDATEBACKFORWARDSTATE           = 62
  #OLECMDID_OPTICAL_ZOOM                     = 63
  #OLECMDID_OPTICAL_GETZOOMRANGE             = 64
  #OLECMDID_WINDOWSTATECHANGED               = 65
  #OLECMDID_ACTIVEXINSTALLSCOPE              = 66
  #OLECMDID_UPDATETRAVELENTRY_DATARECOVERY   = 67
EndEnumeration 

Enumeration 0
  #OLECMDEXECOPT_DODEFAULT     
  #OLECMDEXECOPT_PROMPTUSER       
  #OLECMDEXECOPT_DONTPROMPTUSER   
  #OLECMDEXECOPT_SHOWHELP       
EndEnumeration 

; Variant stuff

Enumeration 
  #VT_EMPTY           = 0
  #VT_NULL            = 1
  #VT_I2              = 2
  #VT_I4              = 3
  #VT_R4              = 4
  #VT_R8              = 5
  #VT_CY              = 6
  #VT_DATE            = 7
  #VT_BSTR            = 8
  #VT_DISPATCH        = 9
  #VT_ERROR           = 10
  #VT_BOOL            = 11
  #VT_VARIANT         = 12
  #VT_UNKNOWN         = 13
  #VT_DECIMAL         = 14
  #VT_I1              = 16
  #VT_UI1             = 17
  #VT_UI2             = 18
  #VT_UI4             = 19
  #VT_I8              = 20
  #VT_UI8             = 21
  #VT_INT             = 22
  #VT_UINT            = 23
  #VT_VOID            = 24
  #VT_HRESULT         = 25
  #VT_PTR             = 26
  #VT_SAFEARRAY       = 27
  #VT_CARRAY          = 28
  #VT_USERDEFINED     = 29
  #VT_LPSTR           = 30
  #VT_LPWSTR          = 31
  #VT_RECORD          = 36
  #VT_FILETIME        = 64
  #VT_BLOB            = 65
  #VT_STREAM          = 66
  #VT_STORAGE         = 67
  #VT_STREAMED_OBJECT = 68
  #VT_STORED_OBJECT   = 69
  #VT_BLOB_OBJECT     = 70
  #VT_CF              = 71
  #VT_CLSID           = 72
  #VT_BSTR_BLOB       = $0fff
  #VT_VECTOR          = $1000
  #VT_ARRAY           = $2000
  #VT_BYREF           = $4000
  #VT_RESERVED        = $8000
  #VT_ILLEGAL         = $ffff
  #VT_ILLEGALMASKED   = $0fff
  #VT_TYPEMASK        = $0fff
EndEnumeration

Text$ = "<div style=background-color:SeaShell;border-style:solid;border-left-width:2px;>This <b> is </b> a real <i> test </i> with HTML <font size=5 color=red>revolutionary</font>."
Text$ + "<h1>Heading</h1</div> "

Global EStream.EDITSTREAM

Procedure SStreamCB(dwCookie, pbBuff, cb, pcb)
  ProcedureReturn WriteFile_(dwCookie, pbBuff, cb, pcb, 0) ! 1
EndProcedure

OpenWindow(0, 0, 0, 600, 400, "", #PB_Window_ScreenCentered | #PB_Window_SystemMenu | #PB_Window_MinimizeGadget )

WebGadget(1, 10, 10, 550, 150,"")
WebObject.IWebBrowser2 = GetWindowLongPtr_(GadgetID(1), #GWL_USERDATA)
SetGadgetItemText(1, #PB_Web_HtmlCode, Text$)
EditorGadget(2,10,180,580,165)
SendMessage_(GadgetID(2), #EM_SETTEXTMODE, #TM_RICHTEXT, 0)
ButtonGadget(3,10,350,160,20,"Save as RTF")

Repeat
  Select WaitWindowEvent()
    Case #PB_Event_CloseWindow
      Quit = 1
      
    Case #PB_Event_Gadget
      Select EventGadget()
        Case 3
          WebObject\ExecWB(#OLECMDID_SELECTALL,#OLECMDEXECOPT_DONTPROMPTUSER,0,0)  ;Select All
          WebObject\ExecWB(#OLECMDID_COPY, #OLECMDEXECOPT_DONTPROMPTUSER, 0, 0)
          WebObject\ExecWB(#OLECMDID_CLEARSELECTION,#OLECMDEXECOPT_DONTPROMPTUSER,0,0)
          ClearGadgetItems(2)
          SendMessage_(GadgetID(2),#WM_PASTE,0,0)
          sFile$ = SaveFileRequester("Please choose file to save",GetTemporaryDirectory()+"test.rtf","Supported format|*.rtf|(RTF (*.rtf)",0)
          If CreateFile(0, sFile$)              
            EStream\dwCookie = FileID(0)
            EStream\pfnCallback = @SStreamCB()
            SendMessage_(GadgetID(2), #EM_STREAMOUT, #SF_RTF, EStream)             
            CloseFile(0)
          EndIf 
      EndSelect
  EndSelect
Until Quit = 1