WebGadget : get HTML text

Share your advanced PureBasic knowledge/code with the community.
gnozal
PureBasic Expert
PureBasic Expert
Posts: 4229
Joined: Sat Apr 26, 2003 8:27 am
Location: Strasbourg / France
Contact:

WebGadget : get HTML text

Post by gnozal »

Code updated For 5.20+

Code: Select all

;
; Get HTML Text In WebGadget
;
; NOT COMPATIBLE WITH FREAK'S WebGadgetExtras.pb
;
Interface IHTMLDocument2_FIXED
  QueryInterface(a,b)
  AddRef()
  Release()
  GetTypeInfoCount(a)
  GetTypeInfo(a,b,c)
  GetIDsOfNames(a,b,c,d,e)
  Invoke(a,b,c,d,e,f,g,h)
  get_Script(a)
  get_all(a)
  get_body(a)
  get_activeElement(a)
  get_images(a)
  get_applets(a)
  get_links(a)
  get_forms(a)
  get_anchors(a)
  put_title(a)
  get_title(a)
  get_scripts(a)
  put_designMode(a)
  get_designMode(a)
  get_selection(a)
  get_readyState(a)
  get_frames(a)
  get_embeds(a)
  get_plugins(a)
  put_alinkColor(a)
  get_alinkColor(a)
  put_bgColor(a)
  get_bgColor(a)
  put_fgColor(a)
  get_fgColor(a)
  put_linkColor(a)
  get_linkColor(a)
  put_vlinkColor(a)
  get_vlinkColor(a)
  get_referrer(a)
  get_location(a)
  get_lastModified(a)
  put_URL(a)
  get_URL(a)
  put_domain(a)
  get_domain(a)
  put_cookie(a)
  get_cookie(a)
  put_expando(a)
  get_expando(a)
  put_charset(a)
  get_charset(a)
  put_defaultCharset(a)
  get_defaultCharset(a)
  get_mimeType(a)
  get_fileSize(a)
  get_fileCreatedDate(a)
  get_fileModifiedDate(a)
  get_fileUpdatedDate(a)
  get_security(a)
  get_protocol(a)
  get_nameProp(a)
  write(a)
  writeln(a)
  open(a,b1,b2,b3,b4,c1,c2,c3,c4,d1,d2,d3,d4,e)
  close()
  clear()
  queryCommandSupported(a,b)
  queryCommandEnabled(a,b)
  queryCommandState(a,b)
  queryCommandIndeterm(a,b)
  queryCommandText(a,b)
  queryCommandValue(a,b)
  execCommand(a,b,c,d)
  execCommandShowHelp(a,b)
  createElement(a,b)
  put_onhelp(a1,a2,a3,a4)
  get_onhelp(a)
  put_onclick(a1,a2,a3,a4)
  get_onclick(a)
  put_ondblclick(a1,a2,a3,a4)
  get_ondblclick(a)
  put_onkeyup(a1,a2,a3,a4)
  get_onkeyup(a)
  put_onkeydown(a1,a2,a3,a4)
  get_onkeydown(a)
  put_onkeypress(a1,a2,a3,a4)
  get_onkeypress(a)
  put_onmouseup(a1,a2,a3,a4)
  get_onmouseup(a)
  put_onmousedown(a1,a2,a3,a4)
  get_onmousedown(a)
  put_onmousemove(a1,a2,a3,a4)
  get_onmousemove(a)
  put_onmouseout(a1,a2,a3,a4)
  get_onmouseout(a)
  put_onmouseover(a1,a2,a3,a4)
  get_onmouseover(a)
  put_onreadystatechange(a1,a2,a3,a4)
  get_onreadystatechange(a)
  put_onafterupdate(a1,a2,a3,a4)
  get_onafterupdate(a)
  put_onrowexit(a1,a2,a3,a4)
  get_onrowexit(a)
  put_onrowenter(a1,a2,a3,a4)
  get_onrowenter(a)
  put_ondragstart(a1,a2,a3,a4)
  get_ondragstart(a)
  put_onselectstart(a1,a2,a3,a4)
  get_onselectstart(a)
  elementFromPoint(a,b,c)
  get_parentWindow(a)
  get_styleSheets(a)
  put_onbeforeupdate(a1,a2,a3,a4)
  get_onbeforeupdate(a)
  put_onerrorupdate(a1,a2,a3,a4)
  get_onerrorupdate(a)
  toString(a)
  createStyleSheet(a,b,c)
EndInterface

Interface IHTMLElement_FIXED
  QueryInterface(a,b)
  AddRef()
  Release()
  GetTypeInfoCount(a)
  GetTypeInfo(a,b,c)
  GetIDsOfNames(a,b,c,d,e)
  Invoke(a,b,c,d,e,f,g,h)
  setAttribute(a,b,c)
  getAttribute(a,b,c)
  removeAttribute(a,b,c)
  put_className(a)
  get_className(a)
  put_id(a)
  get_id(a)
  get_tagName(a)
  get_parentElement(a)
  get_style(a)
  put_onhelp(a1,a2,a3,a4)
  get_onhelp(a)
  put_onclick(a1,a2,a3,a4)
  get_onclick(a)
  put_ondblclick(a1,a2,a3,a4)
  get_ondblclick(a)
  put_onkeydown(a1,a2,a3,a4)
  get_onkeydown(a)
  put_onkeyup(a1,a2,a3,a4)
  get_onkeyup(a)
  put_onkeypress(a1,a2,a3,a4)
  get_onkeypress(a)
  put_onmouseout(a1,a2,a3,a4)
  get_onmouseout(a)
  put_onmouseover(a1,a2,a3,a4)
  get_onmouseover(a)
  put_onmousemove(a1,a2,a3,a4)
  get_onmousemove(a)
  put_onmousedown(a1,a2,a3,a4)
  get_onmousedown(a)
  put_onmouseup(a1,a2,a3,a4)
  get_onmouseup(a)
  get_document(a)
  put_title(a)
  get_title(a)
  put_language(a)
  get_language(a)
  put_onselectstart(a1,a2,a3,a4)
  get_onselectstart(a)
  scrollIntoView(a)
  contains(a,b)
  get_sourceIndex(a)
  get_recordNumber(a)
  put_lang(a)
  get_lang(a)
  get_offsetLeft(a)
  get_offsetTop(a)
  get_offsetWidth(a)
  get_offsetHeight(a)
  get_offsetParent(a)
  put_innerHTML(a)
  get_innerHTML(a)
  put_innerText(a)
  get_innerText(a)
  put_outerHTML(a)
  get_outerHTML(a)
  put_outerText(a)
  get_outerText(a)
  insertAdjacentHTML(a,b)
  insertAdjacentText(a,b)
  get_parentTextEdit(a)
  get_isTextEdit(a)
  click()
  get_filters(a)
  put_ondragstart(a1,a2,a3,a4)
  get_ondragstart(a)
  toString(a)
  put_onbeforeupdate(a1,a2,a3,a4)
  get_onbeforeupdate(a)
  put_onafterupdate(a1,a2,a3,a4)
  get_onafterupdate(a)
  put_onerrorupdate(a1,a2,a3,a4)
  get_onerrorupdate(a)
  put_onrowexit(a1,a2,a3,a4)
  get_onrowexit(a)
  put_onrowenter(a1,a2,a3,a4)
  get_onrowenter(a)
  put_ondatasetchanged(a1,a2,a3,a4)
  get_ondatasetchanged(a)
  put_ondataavailable(a1,a2,a3,a4)
  get_ondataavailable(a)
  put_ondatasetcomplete(a1,a2,a3,a4)
  get_ondatasetcomplete(a)
  put_onfilterchange(a1,a2,a3,a4)
  get_onfilterchange(a)
  get_children(a)
  get_all(a)
EndInterface
;
Procedure.l GetBSTRLength(bstr)
  Length = WideCharToMultiByte_(#CP_ACP, 0, bstr, -1, 0, 0, 0, 0) 
  ProcedureReturn Length 
EndProcedure
;
Procedure.l ReadBSTRMem(*Buffer, bstr, Length) 
  Length = WideCharToMultiByte_(#CP_ACP, 0, bstr, -1, *Buffer, Length, 0, 0)    
  ProcedureReturn Length 
EndProcedure
;
Procedure WebGadget_GextHTMLText(WebGadget.l)
  Protected WebObject.IWebBrowser2, HTMLDoc.IHTMLDocument2_FIXED, DocDispatch.IDispatch, HTMLElement.IHTMLElement_FIXED
  WebObject = GetWindowLong_(GadgetID(WebGadget), #GWL_USERDATA)
  
  If WebObject\get_document(@DocDispatch) = #S_OK 
    If DocDispatch\QueryInterface(?IID_IHTMLDocument2, @HTMLDoc) = #S_OK 
      If HTMLDoc\get_body(@HTMLElement) = #S_OK 
        If HTMLElement\get_innerHTML(@bstr_code) = #S_OK
          
          bstr_len = GetBSTRLength(bstr_code)
          If bstr_len
            *Buffer = AllocateMemory(bstr_len)
            If *Buffer
              If ReadBSTRMem(*Buffer, bstr_code, bstr_len) ; Get HTML code in *Buffer
                Debug PeekS(*Buffer) ; [warning string size limitation, better use memory functions !]
              EndIf
              FreeMemory(*Buffer)
            EndIf
          EndIf
          
          SysFreeString_(bstr_code) 
        EndIf
      EndIf
      DocDispatch\Release()
    EndIf 
  EndIf
  ProcedureReturn
  
EndProcedure

DataSection
IID_IHTMLDocument2: ; {332C4425-26CB-11D0-B483-00C04FD90119}
Data.l $332C4425
Data.w $26CB, $11D0
Data.b $B4, $83, $00, $C0, $4F, $D9, $01, $19        
EndDataSection
Last edited by gnozal on Mon Oct 31, 2005 10:06 am, edited 1 time in total.
For free libraries and tools, visit my web site (also home of jaPBe V3 and PureFORM).
User avatar
oryaaaaa
Addict
Addict
Posts: 825
Joined: Mon Jan 12, 2004 11:40 pm
Location: Okazaki, JAPAN

Post by oryaaaaa »

Great! :D
It had downloaded it by background. Thanks.
Blade
Enthusiast
Enthusiast
Posts: 362
Joined: Wed Aug 06, 2003 2:49 pm
Location: Venice - Italy, Japan when possible.
Contact:

Post by Blade »

Error: Label not found (iid_ihtmldocument2)

Some userlibrary is needed?
gnozal
PureBasic Expert
PureBasic Expert
Posts: 4229
Joined: Sat Apr 26, 2003 8:27 am
Location: Strasbourg / France
Contact:

Post by gnozal »

Blade wrote:Error: Label not found (iid_ihtmldocument2)
Some userlibrary is needed?
No, the data section was missing (sorry).
Fixed.

Code: Select all

DataSection
IID_IHTMLDocument2: ; {332C4425-26CB-11D0-B483-00C04FD90119}
Data.l $332C4425
Data.w $26CB, $11D0
Data.b $B4, $83, $00, $C0, $4F, $D9, $01, $19        
EndDataSection
For free libraries and tools, visit my web site (also home of jaPBe V3 and PureFORM).
Pantcho!!
Enthusiast
Enthusiast
Posts: 538
Joined: Tue Feb 24, 2004 3:43 am
Location: Israel
Contact:

Post by Pantcho!! »

Thank you very much!! this code is very very handy! :)

keep up the good work.
Nik
Addict
Addict
Posts: 1017
Joined: Fri May 13, 2005 11:45 pm
Location: Germany
Contact:

Post by Nik »

does anyone of you know how to supress the right cklick menu but also allow ctrl+c?
Post Reply