Page 1 of 1

PB6.20 | PAC file parser

Posted: Fri Feb 14, 2025 9:05 am
by Kukulkan
Hi,

this is a PAC file parser using the new WebViewGadget as JavaScript engine. This are the features:
  • compiles to a single file (needed js functions are included in the compiled binary)
  • handles the provided PAC file content (you need to load/read the pac file by yourself)
  • tested with PB 6.20 x64 on Windows 10 and Ubuntu 24.04, very likely also working on MacOS?
  • the WebViewGadget has some drawback causing crashes in the worst case.
    • I solved by giving time to process internal stuff before closing but you might need to increase the waiting time to become more reliable (if you experience crashes).
    • See code comment, search for "crash".
  • the alert() function is not working.
    • I encapsulated and forwarded to console.log to prevent any error messages because, as the WebViewGadget is invisible, the alert is never visible to the user.
    • In theory, it is possible to implement using a callback, but this likely depends on your environment.
  • no guarantee that the contained JavaScript functions work in all environments and are error free. I have not tested all of them in detail. If you find errors or made enhancements, please share here below in this thread!
The PB include:

Code: Select all

; Parse PAC file using WebViewGadget
;
; PAC reference: 
; https://developer.mozilla.org/en-US/docs/Web/HTTP/Proxy_servers_and_tunneling/Proxy_Auto-Configuration_PAC_file
;
; (c) 2025 / Volker Schmid
;
; PureBasic 6.20

EnableExplicit

DataSection
  ; Include the needed JS code into the generated binary so we do not
  ; create any file dependency for the runtime executable.
  pac_include_js:
  IncludeBinary #PB_Compiler_FilePath + "pac_include.js"
  Data.a 0
EndDataSection

Structure pacInstance
  winId.i       ; the window id
  webViewId.i   ; the webviewgadget it
  call.s        ; the call to execute if DOM has loaded
  result.s      ; catches the result
EndStructure

Global globalPAC.pacInstance

Procedure _createPACEnvironment()
  globalPAC\winId = OpenWindow(#PB_Any, 0, 0, 0, 0, "", #PB_Window_NoActivate)
  If globalPAC\winId = 0
    Debug "PAC: Failed to create dummy window"
    End
  EndIf
  
  globalPAC\webViewId = WebViewGadget(#PB_Any, 0, 0, 0, 0) ; add #PB_WebView_Debug flag if needed
  If globalPAC\webViewId = 0
    Debug "PAC: Failed to create dummy WebViewGadget. Missing libraries/dependencies?"
    End
  EndIf
EndProcedure

Procedure _closePACEnvironment()
  If IsGadget(globalPAC\webViewId)
    FreeGadget(globalPAC\webViewId)
  EndIf
  If IsWindow(globalPAC\winId)
    CloseWindow(globalPAC\winId)
  EndIf
  globalPAC\winId = 0
  globalPAC\webViewId = 0
  globalPAC\result = ""
  globalPAC\call = ""
EndProcedure
 
 ; Triggered upon DOM loaded. Then triggers PAC file execution.
Procedure _pacTrigger(jsonParam.s)
  WebViewExecuteScript(globalPAC\webViewId, globalPAC\call)
  ProcedureReturn #Null
EndProcedure
 
; Receiving the result of PAC file execution
Procedure _pacCallback(jsonParam.s)
  Protected json.i = ParseJSON(#PB_Any, jsonParam.s)
  If JSONArraySize(JSONValue(json.i)) <> 1
    Debug("PAC: _pacCallback had wrong parameter count!")
    FreeJSON(json.i)
    ProcedureReturn #Null
  EndIf
 
  Protected v.i = GetJSONElement(JSONValue(json.i), 0) ; 0 = url
  Protected pacRes.s = GetJSONString(v.i)
  FreeJSON(json.i)
  globalPAC\result = pacRes.s
  ProcedureReturn #Null
EndProcedure
 
Procedure.s ParsePACFile(PACfileContent.s, url.s, host.s)
  _createPACEnvironment()
  Protected includes.s = PeekS(?pac_include_js, -1, #PB_UTF8) ; the needed JS code
  Protected html.s = ~"<!DOCTYPE html>\n<html><body><script>\n" + includes.s +
                     ~"\n\n// ----------- user PAC function ------------\n" + PACfileContent.s + 
                     ~"\n\n// ----------- PB Helper ------------\n" +
                     ~"function runPAC(url, host) {\n" +
                     ~"    var result = FindProxyForURL(url, host);\n" +
                     ~"    window.pacCallback(result); // run PB callback function\n" +
                     ~"}\n" +
                     ~"document.addEventListener(\"DOMContentLoaded\", () => {\n" +
                     ~"    window.pacTrigger();\n" +
                     ~"});\n" +
                     ~"</script></body></html>"
  
  BindWebViewCallback(globalPAC\webViewId, "pacCallback", @_pacCallback())
  BindWebViewCallback(globalPAC\webViewId, "pacTrigger", @_pacTrigger())
  
  SetGadgetItemText(globalPAC\webViewId, #PB_Web_HtmlCode, html.s)
  
  globalPac\call = ~"runPAC(\"" + url.s + ~"\", \"" + host.s + ~"\");"
  
  ; Wait for max 5 seconds for result
  Protected startTime.i = ElapsedMilliseconds()
  Protected event.i
  Repeat
    event = WindowEvent()
    Delay(10)
  Until globalPAC\result <> "" Or ElapsedMilliseconds() > startTime.i + 5000
  Protected result.s = globalPAC\result ; keep because _closePACEnvironment() cleans up
  
  startTime.i = ElapsedMilliseconds()
  Repeat
    event = WindowEvent()
    Delay(1)
  Until ElapsedMilliseconds() > startTime.i + 100 ; increase this time if you encounter a crash
  
  UnbindWebViewCallback(globalPAC\webViewId, "pacCallback")
  UnbindWebViewCallback(globalPAC\webViewId, "pacTrigger")
  
  _closePACEnvironment()
  ProcedureReturn result.s
EndProcedure

; RUNS ONLY IF YOU RUN THIS INCLUDE AS MAIN FILE
CompilerIf #PB_Compiler_IsMainFile = 1

  ; Helpful file loader for testing the PAC library
  Procedure.s _testLoadFile(filename.s)
    Protected fh.i = ReadFile(#PB_Any, filename.s, #PB_File_SharedRead)
    If fh.i = 0
      Debug "PAC: Failed to load [" + filename.s + "]"
      ProcedureReturn ""
    EndIf
    
    Protected fl.i = Lof(fh.i)
    Protected *mem = AllocateMemory(fl.i)
    ReadData(fh.i, *mem, fl.i)
    Protected content.s = PeekS(*mem, fl.i, #PB_UTF8 | #PB_ByteLength)
    FreeMemory(*mem)
    
    CloseFile(fh.i)
    ProcedureReturn content.s
  EndProcedure

  Debug "PAC-Parser Result 1: [" + ParsePACFile(_testLoadFile("pac_testfile.pac"), "https://test.mozilla.org", "test.mozilla.org") + "]"
  Debug "PAC-Parser Result 2: [" + ParsePACFile(_testLoadFile("pac_testfile.pac"), "https://www.google.com", "www.google.com") + "]"
CompilerEndIf
The needed "pac_include.js" bust be saved at the same location than the above include:

Code: Select all

var weekdayMap = {'SUN': 0, 'MON': 1, 'TUE': 2, 'WED': 3, 'THU': 4, 'FRI': 5, 'SAT': 6}; // Map weekday strings to numbers (SUN=0 to SAT=6)
var dnsCache = {}; // Cache DNS lookups
var ipCache = new Map(); // For isInNet caching
var timezoneOffset = new Date().getTimezoneOffset(); // Add time zone offset cache

function isPlainHostName(host) {
    return host.indexOf('.') === -1;
}

function dnsDomainIs(host, domain) {
    if (!domain.startsWith('.')) domain = '.' + domain;
    return host.toLowerCase().endsWith(domain.toLowerCase());
}

function localHostOrDomainIs(host, hostDom) {
    return host === hostDom || (!host.includes('.') && host === hostDom.split('.')[0]);
}

function dnsResolve(host) {
    if (!dnsCache[host]) {
        dnsCache[host] = helper_resolveDNS(host);
    }
    return dnsCache[host];
}

function helper_resolveDNS(host) {
    let xhr = new XMLHttpRequest();
    try {
        xhr.open('GET', 'https://dns.google/resolve?name=' + encodeURIComponent(host), false);
        xhr.send();
        if (xhr.status != 200) {
            console.error(`Error ${xhr.status}: ${xhr.statusText}`);
            return "";
        }
        var response = xhr.response;
        var json = JSON.parse(response);
        console.log(json);
        return json["Answer"][0]["data"];
    } catch(err) { // instead of onerror
        return "";
    }
}

function isResolvable(host) {
    try {
        const ip = dnsResolve(host);
        return ip !== null && ip.length > 0;
    } catch (error) {
        return false;
    }
}

function myIpAddress() {
    if (navigator.connection && navigator.connection.remoteAddresses) {
        return Promise.resolve(
            navigator.connection.remoteAddresses[0]
        );
    }
    resolve(helper_getLocalIP());
}

function helper_getLocalIP() {
    const RTCPeerConnection = window.RTCPeerConnection ||
                             window.mozRTCPeerConnection ||
                             window.webkitRTCPeerConnection;
    if (!RTCPeerConnection) {
        return '127.0.0.1';
    }
    const pc = new RTCPeerConnection();
    const noop = () => {};
    pc.createDataChannel('');
    pc.createOffer(pc.setLocalDescription.bind(pc), noop);
    return new Promise(resolve => {
        pc.onicecandidate = event => {
            if (event.candidate) {
                resolve(event.candidate.candidate.split(':')[3]);
                pc.close();
            }
        };
    });
}

function helper_isValidIP(ip) {
    const ipRegex = /^(\d{1,3}\.){3}\d{1,3}$/;
    if (!ipRegex.test(ip)) return false;

    const octets = ip.split('.');
    return octets.every(octet => {
        const num = parseInt(octet);
        return num >= 0 && num <= 255;
    });
}

function helper_ipToInteger(ip) {
    const octets = ip.split('.');
    let result = 0;
    for (let i = 0; i < 4; i++) {
        result = ((result << 8) | parseInt(octets[i]));
    }
    return result >>> 0;
}

function isInNet(host, pattern, mask) {
    const key = `${host}:${pattern}:${mask}`;
    if (ipCache.has(key)) {
        return ipCache.get(key);
    }
    let ipToCheck = host;
    if (!helper_isValidIP(host)) {
        ipToCheck = dnsResolve(host);
        if (!ipToCheck) return false;
    }
    const ipNum = helper_ipToInteger(ipToCheck);
    const patternNum = helper_ipToInteger(pattern);
    const maskNum = helper_ipToInteger(mask);

    const result = (ipNum & maskNum) === (patternNum & maskNum);
    ipCache.set(key, result);
    return result;
}

function dnsDomainLevels(host) {
    return host.split('.').length - 1;
}

function shExpMatch(str, pattern) {
    const escapeRegex = str.replace(/[-\/\\^$*+?.()|[\]{}]/g, '\\$&');
    const regexPattern = pattern.replace(/\*/g, '.*').replace(/\?/g, '.');
    return new RegExp('^' + regexPattern + '$').test(escapeRegex);
}

function weekdayRange(wd1, wd2, gmt) {
    const weekdays = ['SUN', 'MON', 'TUE', 'WED', 'THU', 'FRI', 'SAT'];
    const now = new Date();
    const currentDay = weekdays[now.getDay()];
    if (!wd2) {
        return currentDay === wd1.toUpperCase();
    }
    const startDay = weekdayMap[wd1.toUpperCase()];
    const endDay = weekdayMap[wd2.toUpperCase()];
    if (startDay > endDay) {
        return currentDay >= wd1.toUpperCase() || 
               currentDay <= wd2.toUpperCase();
    }
    return currentDay >= wd1.toUpperCase() && 
           currentDay <= wd2.toUpperCase();
}

function timeRange(start, end, [gmt]) {
    const now = new Date();
    if (gmt) {
        now.setTime(now.getTime() + (timezoneOffset * 60000));
    }
    const parseTime = (timeStr) => {
        const [hours, minutes] = timeStr.split(':').map(Number);
        return hours * 60 + minutes;
    };
    const currentTime = now.getHours() * 60 + now.getMinutes();
    const startTime = parseTime(start);
    const endTime = parseTime(end);
    if (endTime < startTime) {
        return currentTime >= startTime || currentTime <= endTime;
    }
    return currentTime >= startTime && currentTime <= endTime;
}

function dateRange(date1, date2, gmt = false) {
    const now = new Date();
    if (gmt) {
        now.setTime(now.getTime() + (timezoneOffset * 60000));
    }
    const d1 = typeof date1 === 'string' ? new Date(date1) : date1;
    const d2 = typeof date2 === 'string' ? new Date(date2) : date2;
    return now >= d1 && now <= d2;
}

// Overwrite alert function because this does not
// work for our PB implementation
function alert(message) {
    console.log(message);
}
Some test PAC file:

Code: Select all

function FindProxyForURL(url, host) {
  if (
    (isPlainHostName(host) || dnsDomainIs(host, ".mozilla.org")) &&
    !localHostOrDomainIs(host, "www.mozilla.org") &&
    !localHostOrDomainIs(host, "merchant.mozilla.org")
  ) {
    return "DIRECT";
  } else {
    return "PROXY w3proxy.mozilla.org:8080; DIRECT";
  }
}
See the end of the include for a test with given pac file.