Page 2 of 2

Re: [Solved] Read a String inside a PDF file

Posted: Sun Apr 26, 2020 6:27 pm
by infratec
You need ImportC:

Code: Select all

;
; https://github.com/bblanchon/pdfium-binaries
;

#Example = 2

CompilerIf #PB_Compiler_IsMainFile
  EnableExplicit
CompilerEndIf

; PDF object types
#FPDF_OBJECT_UNKNOWN = 0
#FPDF_OBJECT_BOOLEAN = 1
#FPDF_OBJECT_NUMBER = 2
#FPDF_OBJECT_STRING = 3
#FPDF_OBJECT_NAME = 4
#FPDF_OBJECT_ARRAY = 5
#FPDF_OBJECT_DICTIONARY = 6
#FPDF_OBJECT_STREAM = 7
#FPDF_OBJECT_NULLOBJ = 8
#FPDF_OBJECT_REFERENCE = 9

; PDF text rendering modes
#FPDF_TEXTRENDERMODE_UNKNOWN = -1
#FPDF_TEXTRENDERMODE_FILL = 0
#FPDF_TEXTRENDERMODE_STROKE = 1
#FPDF_TEXTRENDERMODE_FILL_STROKE = 2
#FPDF_TEXTRENDERMODE_INVISIBLE = 3
#FPDF_TEXTRENDERMODE_FILL_CLIP = 4
#FPDF_TEXTRENDERMODE_STROKE_CLIP = 5
#FPDF_TEXTRENDERMODE_FILL_STROKE_CLIP = 6
#FPDF_TEXTRENDERMODE_CLIP = 7
#FPDF_TEXTRENDERMODE_LAST = #FPDF_TEXTRENDERMODE_CLIP

Enumeration FPDF_DUPLEXTYPE
  #DuplexUndefined
  #Simplex
  #DuplexFlipShortEdge
  #DuplexFlipLongEdge
EndEnumeration


ImportC "pdfium.dll.lib"
  FPDF_InitLibrary()
  FPDF_LoadDocument(file_path.p-utf8, password.p-utf8)
  FPDF_GetPageCount.i(document.i)
  FPDF_CloseDocument(document.i)
  FPDF_LoadPage.i(document.i, page_index.i)
  FPDF_GetPageWidthF.f(page.i)
  FPDF_GetPageHeightF.f(page.i)
  FPDF_ClosePage(page.i)
  FPDFText_LoadPage.i(page.i)
  FPDFText_ClosePage(text_page.i)
  FPDFText_CountChars.i(text_page.i)
  FPDFText_GetText.i(text_page.i, start_index.i, count.i, *result)
EndImport


CompilerIf #PB_Compiler_IsMainFile
  
  CompilerSelect #Example
    CompilerCase 1
      
      Define.i PDF, Page, TextPage, TextPageCharCount, PageCount, PageIndex
      Define Filename$, PageText$
      Define *Buffer
      
      Filename$ = OpenFileRequester("Choose a PDF file", "", "PDF|*.pdf", 0)
      If Filename$
        
        FPDF_InitLibrary()
        PDF = FPDF_LoadDocument(Filename$, "")
        If PDF
          PageCount = FPDF_GetPageCount(PDF)
          Debug "PDF contains " + Str(PageCount) + " pages."
          
          For PageIndex = 0 To PageCount - 1
            Page = FPDF_LoadPage(PDF, PageIndex)
            If Page
              Debug "Page " + Str(PageIndex + 1) + " : " + StrF(FPDF_GetPageWidthF(Page) * 0.3528, 0) + "x" + StrF(FPDF_GetPageHeightF(Page) * 0.3528, 0) + "mm"
              
              TextPage = FPDFText_LoadPage(Page)
              If TextPage
                TextPageCharCount = FPDFText_CountChars(TextPage)
                
                *Buffer = AllocateMemory((TextPageCharCount + 1) * 2)
                If *Buffer
                  If FPDFText_GetText(TextPage, 0, TextPageCharCount, *Buffer)
                    PageText$ = PeekS(*Buffer)
                    PageText$ + #LF$ + #LF$ + "Next page?"
                    If MessageRequester(GetFilePart(Filename$) + " page " + Str(PageIndex + 1), PageText$, #PB_MessageRequester_YesNo) = #PB_MessageRequester_No
                      PageIndex = PageCount
                    EndIf
                  EndIf
                  FreeMemory(*Buffer)
                EndIf
                
                FPDFText_ClosePage(TextPage)
              EndIf
              
              FPDF_ClosePage(Page)
            EndIf
          Next PageIndex
          
          FPDF_CloseDocument(PDF)
        EndIf
      EndIf
      
    CompilerCase 2
      
      Enumeration
        #MenuOpen
        #MenuExit
        
        #Editor
        #FirstButton
        #ForwardButton
        #BackwardButton
        #LastButton
      EndEnumeration
      
      
      
      Procedure ShowTextOfPage(PDF.i, PageCount.i, CurrentPage.i)
        
        Protected.i Page, TextPage, TextPageCharCount
        Protected *Buffer
        
        
        Page = FPDF_LoadPage(PDF, CurrentPage)
        If Page
          
          StatusBarText(0, 0, Str(CurrentPage + 1) + " / " + Str(PageCount), #PB_StatusBar_Center)
          
          StatusBarText(0, 1, StrF(FPDF_GetPageWidthF(Page) * 0.3528, 0) + " x " + StrF(FPDF_GetPageHeightF(Page) * 0.3528, 0) + "mm", #PB_StatusBar_Center)
          
          TextPage = FPDFText_LoadPage(Page)
          If TextPage
            TextPageCharCount = FPDFText_CountChars(TextPage)
            
            *Buffer = AllocateMemory((TextPageCharCount + 1) * 2)
            If *Buffer
              If FPDFText_GetText(TextPage, 0, TextPageCharCount, *Buffer)
                SetGadgetText(#Editor, PeekS(*Buffer))
              EndIf
              FreeMemory(*Buffer)
            EndIf
            
            FPDFText_ClosePage(TextPage)
          EndIf
          
          FPDF_ClosePage(Page)
        EndIf
        
      EndProcedure
      
      
      
      Define.i PDF, PageCount, CurrentPage, Exit, Event
      Define FilenameToLoad$, Filename$
      
      
      FPDF_InitLibrary()
      
      OpenWindow(0, 0, 0, 800, 600, "PDFium TextView", #PB_Window_MinimizeGadget|#PB_Window_ScreenCentered)
      
      CreateMenu(0, WindowID(0))
      MenuTitle("File")
      MenuItem(#MenuOpen, "Open")
      MenuBar()
      MenuItem(#MenuExit, "Exit")
      
      
      EditorGadget(#Editor, 10, 10, 780, 480, #PB_Editor_ReadOnly)
      
      ButtonGadget(#FirstButton, 10, 510, 40, 30, "|<")
      ButtonGadget(#BackwardButton, 60, 510, 40, 30, "<")
      ButtonGadget(#ForwardButton, 110, 510, 40, 30, ">")
      ButtonGadget(#LastButton, 160, 510, 40, 30, ">|")
      
      
      CreateStatusBar(0, WindowID(0))
      AddStatusBarField(90)
      AddStatusBarField(120)
      
      
      PostEvent(#PB_Event_Menu, 0, #MenuOpen)
      
      Repeat
        
        Event = WaitWindowEvent()
        
        Select Event
          Case #PB_Event_Menu
            Select EventMenu()
              Case #MenuOpen
                FilenameToLoad$ = OpenFileRequester("Choose a PDF file", "", "PDF|*.pdf", 0)
                If FilenameToLoad$
                  If PageCount
                    FPDF_CloseDocument(PDF)
                  EndIf
                  PDF = FPDF_LoadDocument(FilenameToLoad$, "")
                  If PDF
                    
                    Filename$ = FilenameToLoad$
                    SetWindowTitle(0, GetFilePart(Filename$))
                    
                    PageCount = FPDF_GetPageCount(PDF)
                    CurrentPage = PageCount
                    PostEvent(#PB_Event_Gadget, 0, #FirstButton)
                  Else
                    SetGadgetText(#Editor, "")
                    PageCount = 0
                    CurrentPage = 0
                  EndIf
                  
                EndIf
                
              Case #MenuExit
                PostEvent(#PB_Event_CloseWindow)
            EndSelect
            
          Case #PB_Event_Gadget
            Select EventGadget()
              Case #FirstButton
                If CurrentPage > 0
                  CurrentPage = 0
                  ShowTextOfPage(PDF, PageCount, CurrentPage)
                EndIf
                
              Case #BackwardButton
                If CurrentPage > 0
                  CurrentPage - 1
                  ShowTextOfPage(PDF, PageCount, CurrentPage)
                EndIf
                
              Case #ForwardButton
                If CurrentPage < PageCount - 1
                  CurrentPage + 1
                  ShowTextOfPage(PDF, PageCount, CurrentPage)
                EndIf
                
              Case #LastButton
                If CurrentPage < PageCount - 1
                  CurrentPage = PageCount - 1
                  ShowTextOfPage(PDF, PageCount, CurrentPage)
                EndIf
                
            EndSelect
            
          Case #PB_Event_CloseWindow
            Exit = #True
            
        EndSelect
        
      Until Exit
      
      If PageCount
        FPDF_CloseDocument(PDF)
      EndIf
    
CompilerEndSelect

CompilerEndIf

Re: [Solved] Read a String inside a PDF file

Posted: Sun Apr 26, 2020 6:32 pm
by Fig
Ouch ! :?
I have no idea how you got all these constants...

So, to get my text, what would it be ?

Re: [Solved] Read a String inside a PDF file

Posted: Sun Apr 26, 2020 6:44 pm
by infratec
From the header files :wink:

I added the GetText stuff in the listing above.

Re: [Solved] Read a String inside a PDF file

Posted: Sun Apr 26, 2020 6:55 pm
by infratec
Now it's a simple PDF Text Viewer :wink:

Re: [Solved] Read a String inside a PDF file

Posted: Sun Apr 26, 2020 7:09 pm
by Fig
That's gold ! Image

Re: [Solved] Read a String inside a PDF file

Posted: Sun Apr 26, 2020 7:16 pm
by loulou2522
THis is trange in X86 32 bits the lib doesn't works and provoc an error even if i transform Import in InportC
Did you try with PB5.72 32 bit or not ?
Notice than in 64 bits the programm works well

Re: [Solved] Read a String inside a PDF file

Posted: Sun Apr 26, 2020 8:16 pm
by infratec
I use PB 5.72 x86 on Windows 10 x64

You have to use also the lib file from the x86 zip file.

Re: [Solved] Read a String inside a PDF file

Posted: Sun Apr 26, 2020 8:18 pm
by Paul
Fig wrote:Ouch ! :?
I have no idea how you got all these constants...

So, to get my text, what would it be ?
Download the precompiled binary from here...
https://github.com/bblanchon/pdfium-binaries

You will find the Constants he used (as well as reference to all the Functions) in the Include folder under fpdfview.h

Re: [Solved] Read a String inside a PDF file

Posted: Mon Apr 27, 2020 7:34 am
by infratec
Added an example with an EditorGadget()

Re: [Solved] Read a String inside a PDF file

Posted: Tue Apr 28, 2020 6:21 pm
by morosh
Hello:
trying to run Infratec example, I got error:
The procedure entry point FPDF_InitLibrary could not be located in the dynamic link library.
I downloaded the library from:
https://github.com/bblanchon/pdfium-bin ... ws-x86.zip

running PB5.72-x86, windows10

any help is appreciated

Re: [Solved] Read a String inside a PDF file

Posted: Tue Apr 28, 2020 6:30 pm
by RASHAD

Re: [Solved] Read a String inside a PDF file

Posted: Tue Apr 28, 2020 6:51 pm
by morosh
@Rashad:
I tried your link, same result. It seems that the lib file isn't the good one.

thanks anyway

Re: [Solved] Read a String inside a PDF file

Posted: Tue Apr 28, 2020 8:25 pm
by infratec
I use the same environment:

PB 5.72 x86 on Win10 x64

The link from Rashad is the right one.

You have to copy x86\bin\pdfium.dll and x86\lib\pdfium.dll.lib

Both files are needed for compilation.
If you want to use the compiled exe you need only the dll file and not the lib file.

Re: [Solved] Read a String inside a PDF file

Posted: Wed Apr 29, 2020 7:06 am
by morosh
Thanks Infratec.
Yes, I didn't copy "pdfium.dll" to the current folder, that was the problem
Thank you

Re: [Solved] Read a String inside a PDF file

Posted: Thu Jan 11, 2024 10:56 am
by dige
I've tried it only just for fun and Pdf2Text it works nice! Thanks Infratec!!

What other possibilities are there with PDBium if it has already been tapped a little here?