Internet Searching with Altavista and Google
Posted: Mon Feb 11, 2008 12:48 am
Enjoy,
peterb
peterb
Code: Select all
;- Author : Petr Vavrin (peterb)
;- Location : Czech Republic
;- Email : pb.pb@centrum.cz
Structure search_item
source.s
link.s
name.s
description.s
encoding.s
EndStructure
Enumeration
#Window
#input
#text
#enter_button
EndEnumeration
#WS_EX_LAYERED = $00080000
Global close_button.RECT
Global move_button.RECT
Global settings_button.RECT
Global execute_button.RECT
Global window_width.l = 450
Global text_width.l = 120
Global move_button_width = 15
Global settings_button_width = 20
Global close_button_width = 20
Global execute_button_width = 20
Global main_window_height = 24
Global background_color.l = RGB ( 255, 220, 50 )
Global move_color.l = RGB ( 100, 100, 255 )
Global execute_color.l = RGB ( 50, 255, 50 )
Global settings_color.l = RGB ( 100, 100, 255 )
Global close_color.l = RGB ( 255, 50, 50 )
Global opacity.l = 255
Define hwnd.l = 0
Define ClassLong.l
Define EventID.l
Define mx.l
Define my.l
Define Result.l
Define input_with.l
Global NewList search_result.search_item()
Global temp_file$ = "searcher.tmp"
Procedure.s xURLEncode ( String.s )
Protected string_buffer.s, char.s, Encoded.s
string_buffer = "1234567890abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ"
For x = 0 To Len ( String.s ) - 1
char = PeekS ( @String + x, 1 )
If Not FindString ( string_buffer, char, 1 )
Encoded + "%" + RSet ( Hex ( Asc ( Char.s ) ), 2, "0" )
ElseIf char = Chr ( 32 )
Encoded.s + "+"
Else
Encoded.s + char
EndIf
Next
ProcedureReturn Encoded.s
EndProcedure
Procedure.s xUrlDecode ( String.s )
Protected char.s, Decoded.s, x.l
For x = 0 To Len ( String.s ) - 1
char = PeekS ( @String + x, 1 )
If char = "%"
Decoded + Chr ( Hex2Dec( PeekS ( @String + x + 1, 2 ) ) )
x + 2
ElseIf char = "+"
Decoded + Chr ( 32 )
Else
Decoded + char
EndIf
Next
ProcedureReturn Decoded
EndProcedure
Procedure.s regex_replace ( string$, pattern$, new_string$ )
If CreateRegularExpression ( 0, pattern$ ) : string$ = ReplaceRegularExpression ( 0, string$, new_string$ ) : EndIf
ProcedureReturn string$
EndProcedure
Procedure.s regex_param_replacer ( string$, param_name$, new_string$ )
Protected pattern$, reg_param_name$, i.l, char$
For i = 0 To Len ( param_name$ ) - 1
char$ = PeekS ( @param_name$ + i, 1 )
reg_param_name$ + "[" + UCase ( char$ ) + LCase ( char$ ) + "]"
Next i
pattern$ = reg_param_name$ + "=" + Chr(34) + "[^" + Chr(34) + "]*" + Chr(34)
string$ = regex_replace ( string$, pattern$, new_string$ )
ProcedureReturn string$
EndProcedure
Procedure.s regex_tag_replacer ( string$, tag_name$, new_string$, with_content.l = 0, with_end_tag.l = 0 )
Protected pattern$, reg_tag_name$, char$, i.l
For i = 0 To Len ( tag_name$ ) - 1
char$ = PeekS ( @tag_name$ + i, 1 )
reg_tag_name$ + "[" + UCase ( char$ ) + LCase ( char$ ) + "]"
Next i
If with_content = 0
If with_end_tag = 0
pattern$ = "<" + reg_tag_name$ + "[^>]*>"
Else
pattern$ = "<" + reg_tag_name$ + "[^>]*>|</" + reg_tag_name$ + "[^>]*>"
EndIf
Else
pattern$ = "<" + reg_tag_name$ + "[^>]*>.*?</" + reg_tag_name$ + ">"
EndIf
string$ = regex_replace ( string$, pattern$, new_string$ )
ProcedureReturn string$
EndProcedure
Procedure.s regex_extract_href ( string$ )
Dim result$(0)
If CreateRegularExpression(0, "[Hh][Rr][Ee][Ff]=" + Chr(34) + "[^" + Chr(34) + "]*" + Chr(34) )
ExtractRegularExpression ( 0, string$, result$() )
string$ = Result$(0)
EndIf
string$ = regex_replace ( string$, "[Hh][Rr][Ee][Ff]=", "" )
string$ = regex_replace ( string$, Chr ( 34 ), "" )
string$ = regex_replace ( string$, Chr ( 39 ), "" )
ProcedureReturn string$
EndProcedure
Procedure.s regex_extract_tag ( string$, tag_name$ )
Protected reg_tag_name$, char$, i.l
Dim result$(0)
For i = 0 To Len ( tag_name$ ) - 1
char$ = PeekS ( @tag_name$ + i, 1 )
reg_tag_name$ + "[" + UCase ( char$ ) + LCase ( char$ ) + "]"
Next i
If CreateRegularExpression(0, "<" + reg_tag_name$ + "[^>]*>.*?</" + reg_tag_name$ + ">" )
ExtractRegularExpression ( 0, string$, result$() )
string$ = Result$(0)
EndIf
string$ = regex_tag_replacer ( string$, tag_name$, "", 0, 1 )
ProcedureReturn string$
EndProcedure
Procedure.s DownloadFile ( url$ )
Debug url$
URLDownloadToFile_(0, url$, temp_file$, 0, 0)
DeleteUrlCacheEntry_(url$)
If ReadFile ( 0, temp_file$ )
length = Lof ( 0 )
*MemoryID = AllocateMemory ( length )
If *MemoryID : bytes = ReadData ( 0, *MemoryID, length ) : EndIf
CloseFile(0)
EndIf
If *MemoryID And length
page$ = PeekS ( *MemoryID, length )
page$ = regex_replace ( page$, "\n", "" )
page$ = regex_replace ( page$, "\t", " " )
Else
page$ = ""
EndIf
DeleteFile ( temp_file$ )
ProcedureReturn page$
EndProcedure
Procedure ChangeTitle ( text$ )
SetGadgetText ( #text, text$ )
EndProcedure
Procedure.s GoogleTextSearchQuery ( url$ )
page$ = DownloadFile ( url$ )
If Not page$ = ""
Dim result$(0)
If CreateRegularExpression(0, "<td nowrap class=b><a href=" + Chr(34) + "[^" + Chr(34) + "]*" + Chr(34) + ">" )
a = ExtractRegularExpression(0, page$, result$())
next_page$ = Result$(0)
EndIf
next_page$ = regex_extract_href ( next_page$ )
page$ = regex_tag_replacer ( page$, "script", "", 1 )
page$ = regex_tag_replacer ( page$, "table", "", 0, 1 )
page$ = regex_tag_replacer ( page$, "tr", "", 0, 1 )
page$ = regex_tag_replacer ( page$, "td", "", 0, 1 )
page$ = regex_tag_replacer ( page$, "b", "", 0, 1 )
page$ = regex_tag_replacer ( page$, "div", "", 0, 1 )
page$ = regex_tag_replacer ( page$, "span", "", 1 )
ReDim result$(0)
If CreateRegularExpression ( 0, "<h2 class=r><a[^>]*>.*?</a></h2><font[^>]*>.*?</font>" )
a = ExtractRegularExpression ( 0, page$, result$() )
For k = 0 To a - 1
line$ = Result$ ( k )
line$ = regex_tag_replacer ( line$, "h2", "", 0, 1 )
line$ = regex_tag_replacer ( line$, "font", "", 0, 1 )
AddElement ( search_result() )
search_result()\source = "Google"
search_result()\link = regex_extract_href ( line$ )
search_result()\name = regex_extract_tag ( line$, "a" )
search_result()\encoding = "UTF8"
line$ = regex_tag_replacer ( line$, "a", "", 1 )
search_result()\description = line$
Next
EndIf
ProcedureReturn next_page$
Else
ProcedureReturn ""
EndIf
EndProcedure
Procedure GoogleTextSearch ( query$, search_pages.l )
ChangeTitle ( "Searching - Google [" + Str ( search_pages ) + "]" )
Dim Result$(0)
page$ = DownloadFile ( "http://www.google.com" )
If Not page$ = ""
If CreateRegularExpression(0, "var a=" + Chr(34) + "[^" + Chr(34) + "]*" + Chr(34) )
a = ExtractRegularExpression(0, page$, result$())
url$ = Result$(0)
EndIf
url$ = regex_replace ( url$, "var a=", "" )
url$ = regex_replace ( url$, Chr ( 34 ), "" )
url$ = regex_replace ( url$, Chr ( 39 ), "" )
If Mid ( url$, Len ( url$ ), 1 ) = "/"
url$ = Mid ( url$, 1, Len ( url$ ) - 1 )
EndIf
If url$ = ""
url$ = "http://www.google.com"
EndIf
next_link$ = GoogleTextSearchQuery ( url$ + "/search?q=" + query$ + "&num=100&start=0" )
search_pages - 1
While Not next_link$ = "" And search_pages > 0
ChangeTitle ( "Searching - Google [" + Str ( search_pages ) + "]" )
next_link$ = GoogleTextSearchQuery ( url$ + next_link$ )
search_pages - 1
Wend
EndIf
ChangeTitle ( "Internet Search" )
EndProcedure
Procedure.s AltavistaTextSearchQuery ( url$ )
page$ = DownloadFile ( url$ )
If Not page$ = ""
Dim Result$(0)
If CreateRegularExpression(0, "<a[^>]*>Next.*?</a>" )
a = ExtractRegularExpression(0, page$, Result$())
next_page$ = Result$(0)
EndIf
next_page$ = regex_extract_href ( next_page$ )
If CreateRegularExpression(0, "<div id=" + Chr ( 34 ) + "results" + Chr ( 34 ) + ">.*?<table" )
a = ExtractRegularExpression(0, page$, Result$())
page$ = Result$(0)
EndIf
page$ = regex_tag_replacer ( page$, "b", "", 0, 1 )
page$ = regex_tag_replacer ( page$, "br", "", 0, 1 )
page$ = regex_replace ( page$, "<a[^>]*>More pages from.*?</a>", "" )
ReDim result$(0)
;
If CreateRegularExpression ( 0, "<a class[^>]*>.*? " )
a = ExtractRegularExpression ( 0, page$, result$() )
For k = 0 To a - 1
line$ = Result$ ( k )
AddElement ( search_result() )
search_result()\source = "Altavista"
search_result()\name = regex_extract_tag ( line$, "a" )
search_result()\encoding = "UTF8"
line$ = regex_tag_replacer ( line$, "a", "", 1 )
line$ = regex_replace ( line$, "<span class=rgy>.*?</span>", "" )
line1$ = line$
line$ = regex_replace ( line$, "<span class=ngrn>.*?</span>", "" )
line1$ = regex_replace ( line1$, "<span class=s>.*?</span>", "" )
search_result()\link = "http://" + regex_extract_tag ( line1$, "span" )
search_result()\description = regex_extract_tag ( line$, "span" )
Next
EndIf
ProcedureReturn next_page$
Else
ProcedureReturn ""
EndIf
EndProcedure
Procedure AltavistaTextSearch ( query$, search_pages.l )
ChangeTitle ( "Searching - Altavista [" + Str ( search_pages ) + "]" )
next_link$ = AltavistaTextSearchQuery ( "http://www.altavista.com/web/results?itag=ody&q=" + query$ + "&kgs=0&kls=0" )
search_pages - 1
While Not next_link$ = "" And search_pages > 0
ChangeTitle ( "Searching - Altavista [" + Str ( search_pages ) + "]" )
next_link$ = AltavistaTextSearchQuery ( next_link$ )
search_pages - 1
Wend
ChangeTitle ( "Internet Search" )
EndProcedure
Procedure ParseResults ()
Protected count_items.l, item_id.l, link.s, link_md5.s
count_items = CountList ( search_result() )
If count_items > 0
*md5_list = AllocateMemory ( count_items * 32 )
EndIf
ForEach search_result()
link = Trim ( search_result()\link )
search_result()\source = Trim ( search_result()\source )
search_result()\link = link
search_result()\name = Trim ( search_result()\name )
search_result()\description = Trim ( search_result()\description )
search_result()\encoding = Trim ( search_result()\encoding )
If count_items > 0
link_md5 = MD5Fingerprint ( @link, Len ( link ) )
If item_id = 0
PokeS ( *md5_list + ( item_id * 32 ), link_md5, 32 )
item_id = 1
Else
found.l = #False
For id = 1 To item_id
If CompareMemory ( @link_md5, *md5_list + ( id * 32 ), 32 ) = 1
DeleteElement ( search_result() )
found = #True
Break
EndIf
Next
If found = #False
PokeS ( *md5_list + ( item_id * 32 ), link_md5, 32 )
item_id + 1
EndIf
EndIf
EndIf
Next
EndProcedure
Procedure GenerateHtmlExport ( query$ )
query$ = xUrlDecode ( query$ )
html$ = "<html>" + Chr(13) + Chr (10)
html$ + "<head>" + Chr(13) + Chr (10)
html$ + "<title>Search Result - " + query$ + "</title>" + Chr(13) + Chr (10)
html$ + "<meta http-equiv='content-type' content='text/html; charset=UTF-8'>" + Chr(13) + Chr (10)
html$ + "</head>"
html$ + "<body>"
html$ + "<h1>Search Result - " + query$ + "</h1>"
count_items = CountList ( search_result() )
If count_items > 0
html$ + "<table cellapdding='0' cellspacing='0' border='0' width='900'>" + Chr(13) + Chr (10)
html$ + "<tr>" + Chr(13) + Chr (10)
html$ + "<td style='border-bottom: 1px solid #AAAAFF' width='130'>Results</td>" + Chr(13) + Chr (10)
html$ + "</tr>" + Chr(13) + Chr (10)
bg_color$ = "#ddddff"
ForEach search_result()
html$ + "<tr>" + Chr(13) + Chr (10)
html$ + "<td style='background-color: " + bg_color$ + "'><b>" + search_result()\name + "</b><br>"
html$ + "<a href='" + search_result()\link + "' target='_blank'>" + search_result()\link + "</a><br>"
html$ + search_result()\description + "<br>"
html$ + "[" + search_result()\source + "]</td>" + Chr(13) + Chr (10)
html$ + "</tr>" + Chr(13) + Chr (10)
If bg_color$ = "#ddddff"
bg_color$ = "#ddffdd"
Else
bg_color$ = "#ddddff"
EndIf
Next
html$ + "</table>" + Chr(13) + Chr (10)
Else
html$ + "<h3>0 results</h3>"
EndIf
html$ + "</body>"
html$ + "</html>"
If CreateFile(0, "links.htm")
x = 1
WriteString ( 0, html$ )
CloseFile ( 0 )
EndIf
RunProgram ( "links.htm" )
EndProcedure
Procedure SetWinOpacity ( hWin.l, Opacity.l )
SetWindowLong_ ( hWin, #GWL_EXSTYLE, #WS_EX_LAYERED )
SetLayeredWindowAttributes_ ( hWin, 0, Opacity, 2 )
EndProcedure
Procedure RestoreImageAndPlot ( image_label_name.s, offset_x.l, offset_y.l, img_color.l, back_color.l )
Select image_label_name
Case "move_image"
Restore move_image
Case "search_image"
Restore search_image
Case "settings_image"
Restore settings_image
Case "exit_image"
Restore exit_image
EndSelect
For y = 1 To 9
For x = 1 To 9
Read point.b
If point = 1
point_color = img_color
Else
point_color = back_color
EndIf
Plot ( x + offset_x, y + offset_y, point_color )
Next x
Next y
EndProcedure
Procedure PaintWindowBackground ()
Protected hwnd.l
Protected ww.l
Protected wh.l
Protected background
Protected window_region
Protected hDC
Protected hBrushBackground
Protected window_id
hwnd = WindowID ( #Window )
ww = WindowWidth ( #Window )
wh = WindowHeight ( #Window )
If ww > 0 And wh > 0
background.l = CreateImage ( 0, ww, wh )
window_region.l = CreateRoundRectRgn_ ( 0, 0, ww, wh, 3, 3 )
If background
hDC = StartDrawing ( ImageOutput ( 0 ) )
If hDC
; background
Box ( 0, 0, ww, wh, background_color )
; move button
move_button\left = 0
move_button\top = 0
move_button\right = move_button\left + move_button_width
move_button\bottom = move_button\top + main_window_height
Box ( move_button\left, move_button\top, move_button\right - move_button\left, move_button\bottom - move_button\top, move_color )
RestoreImageAndPlot ( "move_image", move_button\left, move_button\top, RGB ( 255, 255, 255 ), move_color )
; execute button
execute_button\left = ww - close_button_width - settings_button_width - execute_button_width
execute_button\top = 0
execute_button\right = execute_button\left + execute_button_width
execute_button\bottom = execute_button\top + main_window_height
Box ( execute_button\left, execute_button\top, execute_button\right - execute_button\left, execute_button\bottom - execute_button\top, execute_color )
RestoreImageAndPlot ( "search_image", execute_button\left, execute_button\top, RGB ( 255, 255, 255 ), execute_color )
; settings button
settings_button\left = ww - close_button_width - settings_button_width
settings_button\top = 0
settings_button\right = settings_button\left + settings_button_width
settings_button\bottom = settings_button\top + main_window_height
Box ( settings_button\left, settings_button\top, settings_button\right - settings_button\left, settings_button\bottom - settings_button\top, settings_color )
RestoreImageAndPlot ( "setting_image", settings_button\left, settings_button\top, RGB ( 255, 255, 255 ), settings_color )
; close button
close_button\left = ww - close_button_width
close_button\top = 0
close_button\right = close_button\left + close_button_width
close_button\bottom = close_button\top + main_window_height
Box ( close_button\left, close_button\top, close_button\right - close_button\left, close_button\bottom - close_button\top, close_color )
RestoreImageAndPlot ( "exit_image", close_button\left, close_button\top, RGB ( 255, 255, 255 ), close_color )
StopDrawing()
EndIf
hBrushBackground.l = CreatePatternBrush_ ( background )
SetClassLong_ ( hwnd, #GCL_HBRBACKGROUND, hBrushBackground )
InvalidateRect_ ( hwnd, #Null, #True )
SetWindowRgn_ ( hwnd, window_region, #True )
DeleteObject_ ( hBrushBackground )
EndIf
DeleteObject_ ( window_region )
FreeImage ( 0 )
EndIf
EndProcedure
If OpenWindow ( #Window, 300, 0, 1, 1, "Internet Search", #PB_Window_BorderLess ) And CreateGadgetList ( WindowID ( #Window ) )
StickyWindow(#Window, 1)
hwnd.l = WindowID ( #Window )
ClassLong = GetClassLong_ ( hwnd, #GCL_STYLE ) | #CS_DBLCLKS
SetClassLong_ ( hwnd, #GCL_STYLE, ClassLong )
SetWinOpacity ( hwnd, opacity )
ResizeWindow ( #Window, #PB_Ignore, #PB_Ignore, window_width, main_window_height )
PaintWindowBackground()
input_with = window_width - text_width - move_button_width - settings_button_width - close_button_width - execute_button_width - 20
TextGadget ( #text, 20, 2, text_width, 20, "Internet Search" )
StringGadget ( #input, 150, 2, input_with, 20, "" )
SetGadgetColor ( #text, #PB_Gadget_BackColor, background_color )
ButtonGadget ( #enter_button, 0, 0, 0, 0, "" )
HideGadget ( #enter_button, 1 )
SetActiveGadget ( #input )
CreatePopupMenu ( 0 )
MenuItem ( 1, "Altavista [Enabled]" )
MenuItem ( 3, "Google [Enabled]" )
search_altavista = #True
search_google = #True
Repeat
EventID.l = WaitWindowEvent()
mx.l = WindowMouseX ( #Window )
my.l = WindowMouseY ( #Window )
Select EventID
Case #PB_Event_Menu
Select EventMenu()
Case 1
If search_altavista = #True
search_altavista = #False
SetMenuItemText(0, 1, "Altavista [Disabled]")
Else
search_altavista = #True
SetMenuItemText(0, 1, "Altavista [Enabled]")
EndIf
Case 3
If search_google = #True
search_google = #False
SetMenuItemText(0, 3, "Google [Disabled]")
Else
search_google = #True
SetMenuItemText(0, 3, "Google [Enabled]")
EndIf
EndSelect
Case #WM_PAINT
PaintWindowBackground()
While WindowEvent() : Wend
Case #WM_LBUTTONDBLCLK
If PtInRect_ ( @move_button, mx, my )
ShowWindow_ ( hwnd, #SW_MINIMIZE )
EndIf
Case #WM_LBUTTONDOWN
If PtInRect_ ( @move_button, mx, my )
ReleaseCapture_()
SendMessage_ ( WindowID ( #Window ), #WM_NCLBUTTONDOWN, #HTCAPTION, #Null )
EndIf
If PtInRect_ ( @execute_button, mx, my )
query$ = GetGadgetText ( #input )
If Not query$ = ""
query$ = xURLEncode ( query$ )
If search_altavista = #True : AltavistaTextSearch ( query$, 10 ) : EndIf
If search_google = #True : GoogleTextSearch ( query$, 2 ) : EndIf
ParseResults ()
GenerateHtmlExport ( query$ )
EndIf
EndIf
If PtInRect_ ( @settings_button, mx, my )
DisplayPopupMenu ( 0, WindowID ( #Window ) )
EndIf
If PtInRect_ ( @close_button, mx, my )
Result.l = MessageRequester ( "Close?", "", #PB_MessageRequester_YesNo )
If Result = #IDYES
EventID = #PB_Event_CloseWindow
EndIf
EndIf
EndSelect
Until EventID = #PB_Event_CloseWindow
EndIf
End
DataSection
move_image:
Data.b 0, 0, 0, 0, 1, 0, 0, 0, 0
Data.b 0, 0, 0, 1, 1, 1, 0, 0, 0
Data.b 0, 0, 0, 0, 1, 0, 0, 0, 0
Data.b 0, 1, 0, 0, 1, 0, 0, 1, 0
Data.b 1, 1, 1, 1, 1, 1, 1, 1, 1
Data.b 0, 1, 0, 0, 1, 0, 0, 1, 0
Data.b 0, 0, 0, 0, 1, 0, 0, 0, 0
Data.b 0, 0, 0, 1, 1, 1, 0, 0, 0
Data.b 0, 0, 0, 0, 1, 0, 0, 0, 0
search_image:
Data.b 0, 0, 0, 0, 1, 0, 0, 0, 0
Data.b 0, 0, 0, 0, 1, 1, 0, 0, 0
Data.b 0, 0, 0, 0, 1, 1, 1, 0, 0
Data.b 1, 1, 1, 1, 1, 1, 1, 1, 0
Data.b 1, 1, 1, 1, 1, 1, 1, 1, 1
Data.b 1, 1, 1, 1, 1, 1, 1, 1, 0
Data.b 0, 0, 0, 0, 1, 1, 1, 0, 0
Data.b 0, 0, 0, 0, 1, 1, 0, 0, 0
Data.b 0, 0, 0, 0, 1, 0, 0, 0, 0
settings_image:
Data.b 0, 1, 1, 1, 1, 1, 1, 1, 0
Data.b 1, 0, 0, 0, 0, 0, 0, 0, 1
Data.b 1, 0, 1, 1, 1, 1, 1, 0, 1
Data.b 1, 0, 1, 0, 0, 0, 1, 0, 1
Data.b 1, 0, 1, 0, 0, 0, 1, 0, 1
Data.b 1, 0, 1, 0, 0, 0, 1, 0, 1
Data.b 1, 0, 1, 1, 1, 1, 1, 0, 1
Data.b 1, 0, 0, 0, 0, 0, 0, 0, 1
Data.b 0, 1, 1, 1, 1, 1, 1, 1, 0
exit_image:
Data.b 0, 1, 1, 1, 1, 1, 1, 1, 0
Data.b 1, 0, 0, 0, 0, 0, 0, 0, 1
Data.b 1, 0, 1, 0, 0, 0, 1, 0, 1
Data.b 1, 0, 0, 1, 0, 1, 0, 0, 1
Data.b 1, 0, 0, 0, 1, 0, 0, 0, 1
Data.b 1, 0, 0, 1, 0, 1, 0, 0, 1
Data.b 1, 0, 1, 0, 0, 0, 1, 0, 1
Data.b 1, 0, 0, 0, 0, 0, 0, 0, 1
Data.b 0, 1, 1, 1, 1, 1, 1, 1, 0
EndDataSection