Google PageRank, Alexa Rank and google search results
Posted: Thu Mar 29, 2007 11:58 am
Code updated for 5.20+
Hi all as discussed here http://www.purebasic.fr/english/viewtopic.php?t=26351 i wanted to know google PR checksum and other stuff, so here is the code i did to get
PageRank and Alexa rank and google seach results in kinda bulk mode.
again thanks to clutch
P.S take note this is prob not the best coding there is! :roll:
Hi all as discussed here http://www.purebasic.fr/english/viewtopic.php?t=26351 i wanted to know google PR checksum and other stuff, so here is the code i did to get
PageRank and Alexa rank and google seach results in kinda bulk mode.
again thanks to clutch

Code: Select all
;#########################################################################################
; Page Rank Check
; Alexa Rank Check
; Google Results for search term
;
; Code by Pantcho (Arye)
; Pagerank Hash and example modified by me, original code by Clutch - thanks man!
;#########################################################################################
; Google Hash stuff for PR caclulation
Macro Hex8(expr)
RSet(Hex(expr), 2, "0")
EndMacro
;
Macro Hex32(expr)
Hex8((expr) >> 24 & $FF) + Hex8((expr) >> 16 & $FF) + Hex8((expr) >> 8 & $FF) + Hex8((expr) & $FF)
EndMacro
;
Procedure.s GPRHash(url$)
seed$ = "Mining PageRank is AGAINST GOOGLE'S TERMS OF SERVICE. Yes, I'm talking to you, scammer."
hash.q = 16909125
For i = 0 To Len(url$) - 1
*s.Character = @seed$ + (i % Len(seed$))
*u.Character = @url$ + i
hash ! *s\c ! *u\c
hash >> 23 | (hash << 9 & $FFFFFFFF)
Next i
ProcedureReturn "8" + Hex32(hash)
EndProcedure
;#############################################
; Ranking check
Procedure.s GetPageRank(page$)
#GOOGLE_AGENT = "PBAGENT/1.0"
hInet.l = InternetOpen_(#GOOGLE_AGENT, 0, #Null, #Null, 0)
RawPage$ = "-1" ; Default if no connect
If (hInet)
query$ = "http://www.google.com/search?client=navclient-auto&ch="
query$ + GPRHash(page$) + "&features=Rank&q=info:" + page$
hFile = InternetOpenUrl_(hInet, @query$, #Null, 0, $80000000, #Null)
If (hFile)
chunk$ = Space(1024)
Repeat
ret = InternetReadFile_(hFile, @chunk$, 1024, @numRead)
buffer$ + Left(chunk$, numRead)
Until (numRead = 0)
InternetCloseHandle_(hFile)
RawPage$ = StringField(buffer$, 3, #LF$)
EndIf
InternetCloseHandle_(hInet)
hInet = 0
RawPage$ = StringField(RawPage$,3,":")
If Trim(RawPage$) = "" : RawPage$ = "-1":EndIf ; in any case of error
EndIf
ProcedureReturn RawPage$
EndProcedure
;
Procedure.s GetAlexaRank(page$)
#ALEXA_AGENT = "PBAGENT/1.0"
AlexaXML$ = "http://xml.alexa.com/data?cli=10&dat=nsa&url="
page$ = AlexaXML$ + page$
RawRank$ = "-1" ; Default if no connect
hInet.l = InternetOpen_(#ALEXA_AGENT, 0, #Null, #Null, 0)
If (hInet)
hFile = InternetOpenUrl_(hInet, @page$, #Null, 0, $80000000, #Null)
If (hFile)
chunk$ = Space(1024)
Repeat
ret = InternetReadFile_(hFile, @chunk$, 1024, @numRead)
buffer$ + Left(chunk$, numRead)
Until (numRead = 0)
InternetCloseHandle_(hFile)
EndIf
InternetCloseHandle_(hInet)
hInet = 0
; Grab Rank
RankPlace.l = FindString(buffer$,"<POPULARITY",1)
If RankPlace>0
RankPlace = FindString(buffer$,"TEXT="+Chr(34),RankPlace)
EndRank.l = FindString(buffer$,"/>",RankPlace)
RankPlace + 6
RawRank$ = Mid(buffer$,RankPlace,EndRank-1-RankPlace)
EndIf
EndIf
ProcedureReturn RawRank$
EndProcedure
;#############################################
; Google Search
Procedure.s GetGoogleResult(SearchKeyword$,NumResults.l,LineNumbering=0)
GoogleResult$ = "" ; Default if no connect
; this is for recursion since google don't display more then 100 results in one page
If NumResults > 100
Delay(150) ; yes google BANS ip if you hammer their service
GoogleResult$ = GetGoogleResult(SearchKeyword$,NumResults-100,LineNumbering)
EndIf
; if we got from our last recursive double CRLF means google have no more results
; (our requested results number was too high for the search term)
If Right(GoogleResult$,4) = #CRLF$ + #CRLF$
ProcedureReturn GoogleResult$
EndIf
; Search query
; note: using the google.com/ie? for quick results!
#GOOGLE_SEARCH_AGENT = "PBAGENT/1.0"
SearchQuery$ = "http://www.google.com/ie?q=" + SearchKeyword$ + "&num=" +Str(NumResults)+"&hl=en&start=" + Str(NumResults-100)+ "&sa=n"
; Network open
hInet.l = InternetOpen_(#GOOGLE_SEARCH_AGENT, 0, #Null, #Null, 0)
If (hInet)
; request file
hFile = InternetOpenUrl_(hInet, @SearchQuery$, #Null, 0, $80000000, #Null)
If (hFile)
chunk$ = Space(1024)
Repeat
ret = InternetReadFile_(hFile, @chunk$, 1024, @numRead)
buffer$ + Left(chunk$, numRead)
Until (numRead = 0)
InternetCloseHandle_(hFile)
EndIf
; close
InternetCloseHandle_(hInet)
hInet = 0
; Request Info
NumGoogleResults.l = CountString(buffer$,"<nobr>")
Debug "Query: " + SearchQuery$
Debug "num res: " + Str(NumGoogleResults) + " num wanted: " + Str(NumResults)
Debug ""
; Grab Links (Parse)
; vars
TagPlace.l = 1
LinkStart.l = 1
LinkEnd.l
; main parsing
For x = 1 To NumGoogleResults
; i used this <nobr> tag for seperating the links (just like stringfield in PB)
TagPlace = FindString(buffer$,"<nobr>",TagPlace+6) ; tag len
; Find our title - this is for correct link parsing, u can modify it to get the title also
TitlePlaceStart = FindString(buffer$,Chr(34),TagPlace+6)
TitlePlaceEnd = FindString(buffer$,Chr(34),TitlePlaceStart+1)
LinkStart = FindString(buffer$,"href=",TitlePlaceEnd+1) ; link len
LinkStart+5
LinkEnd = FindString(buffer$,">",LinkStart)
; line numbering
If LineNumbering = 1 : GoogleResult$ + Str((NumResults-100) + x) + ". " : EndIf
; Update our results
GoogleResult$ + Mid(buffer$,LinkStart,LinkEnd-LinkStart) +#CRLF$
Next
; This is a note to the recursive BEFORE procedure to let know google don't have more results
; (Again: for example you requested 1000 results but there are only 400)
If NumGoogleResults < 100
GoogleResult$ + #CRLF$
EndIf
EndIf
ProcedureReturn GoogleResult$
EndProcedure
;#############################################
; Test Stuff
Debug "The site pagerank is: "+ GetPageRank("www.cellphoto.net")
Debug "The site Alexa rank is:" +GetAlexaRank("www.cellphoto.net")
results$ = GetGoogleResult("purebasic",1000,1)
Debug "Google search results - copy from debugger and paste to notepad :)"
Debug results$