From f6396c1e81033292cee9b1cc85eefbc7190ad4a2 Mon Sep 17 00:00:00 2001 From: daijro Date: Sun, 29 Sep 2024 03:18:08 -0500 Subject: [PATCH] pythonlib: Add locale, geolocation/locale from IP, & more 0.2.0 - Added `geoip` parameter to automatically find the target IP's longitude, latitude, timezone, country, language/region locale, & spoof the WebRTC IP address. #5 - Added parameter to set the locale #16 - Automatically calculate the locale based on the distribution of speakers in the target geoip region. - Added allow_webgl option #17 - By default, use the current version of Camoufox instead of Browserforge's Firefox versions #14. --- .gitignore | 2 + pythonlib/README.md | 81 +- pythonlib/camoufox/__main__.py | 18 +- pythonlib/camoufox/async_api.py | 101 +- pythonlib/camoufox/browserforge.yml | 37 +- pythonlib/camoufox/exceptions.py | 48 + pythonlib/camoufox/fingerprints.py | 24 +- pythonlib/camoufox/fonts.json | 2 +- pythonlib/camoufox/ip.py | 100 ++ pythonlib/camoufox/locale.py | 274 ++++ pythonlib/camoufox/pkgman.py | 8 +- pythonlib/camoufox/sync_api.py | 101 +- pythonlib/camoufox/territoryInfo.xml | 2024 ++++++++++++++++++++++++++ pythonlib/camoufox/utils.py | 106 +- pythonlib/pyproject.toml | 8 +- 15 files changed, 2770 insertions(+), 164 deletions(-) create mode 100644 pythonlib/camoufox/ip.py create mode 100644 pythonlib/camoufox/locale.py create mode 100644 pythonlib/camoufox/territoryInfo.xml diff --git a/.gitignore b/.gitignore index f4f3fbf..4dfa7fe 100644 --- a/.gitignore +++ b/.gitignore @@ -16,3 +16,5 @@ __pycache__/ wget-log *.kate-swp *.log +test.py +*.mmdb diff --git a/pythonlib/README.md b/pythonlib/README.md index ab23aad..2d98ef2 100644 --- a/pythonlib/README.md +++ b/pythonlib/README.md @@ -13,10 +13,12 @@ First, install the `camoufox` package: ```bash -pip install -U camoufox +pip install -U camoufox[geoip] ``` -Then, download the Camoufox browser: +The `geoip` parameter is optional, but heavily recommended if you are using proxies. It will download an extra dataset to determine the user's longitude, latitude, timezone, country, & locale. + +Next, download the Camoufox browser: **Windows** @@ -85,7 +87,7 @@ Accepts all Playwright Firefox launch options, along with the following: Parameters: config (Optional[Dict[str, Any]]): - Camoufox properties to use. (read https://github.com/daijro/camoufox/blob/main/README.md) + Camoufox properties to use. os (Optional[ListOrString]): Operating system to use for the fingerprint generation. Can be "windows", "macos", or "linux", or a list of these to choose from randomly. @@ -94,28 +96,40 @@ Parameters: Whether to block all images. block_webrtc (Optional[bool]): Whether to block WebRTC entirely. - firefox_user_prefs (Optional[Dict[str, Any]]): - Firefox user preferences to set. + allow_webgl (Optional[bool]): + Whether to allow WebGL. To prevent leaks, only use this for special cases. + geoip (Optional[Union[str, bool]]): + Calculate longitude, latitude, timezone, country, & locale based on the IP address. + Pass the target IP address to use, or `True` to find the IP address automatically. + locale (Optional[str]): + Locale to use in Camoufox. addons (Optional[List[str]]): - List of Firefox addons to use. - fingerprint (Optional[Fingerprint]): - BrowserForge fingerprint to use. - If not provided, a random fingerprint will be generated based on the provided os & user_agent. - exclude_addons (Optional[List[DefaultAddons]]): - Default addons to exclude. Passed as a list of camoufox.DefaultAddons enums. - user_agent (Optional[ListOrString]): - User agent to use for the fingerprint generation. Either a string or a list of strings. - Note: This must be a valid BrowserForge User-Agent string. - To use a different user agent, set the "navigator.userAgent" preference in `config`. + List of Firefox addons to use. fonts (Optional[List[str]]): Fonts to load into Camoufox (in addition to the default fonts for the target `os`). Takes a list of font family names that are installed on the system. + exclude_addons (Optional[List[DefaultAddons]]): + Default addons to exclude. Passed as a list of camoufox.DefaultAddons enums. + fingerprint (Optional[Fingerprint]): + Use a custom BrowserForge fingerprint. Note: Not all values will be implemented. + If not provided, a random fingerprint will be generated based on the provided os & user_agent. + screen (Optional[Screen]): + NOT YET IMPLEMENTED: Constrains the screen dimensions of the generated fingerprint. + Takes a browserforge.fingerprints.Screen instance. + executable_path (Optional[str]): + Custom Camoufox browser executable path. + firefox_user_prefs (Optional[Dict[str, Any]]): + Firefox user preferences to set. + proxy (Optional[Dict[str, str]]): + Proxy to use for the browser. + Note: If geoip is True, a request will be sent through this proxy to find the target IP. + ff_version (Optional[int]): + Firefox version to use. Defaults to the current Camoufox version. + To prevent leaks, only use this for special cases. args (Optional[List[str]]): Arguments to pass to the browser. env (Optional[Dict[str, Union[str, float, bool]]]): Environment variables to set. - executable_path (Optional[str]): - Custom Camoufox browser executable path. **launch_options (Dict[str, Any]): Additional Firefox launch options. ``` @@ -143,6 +157,39 @@ with Camoufox(
+### GeoIP & Proxy Support + +By passing `geoip=True`, or passing in a target IP address, Camoufox will automatically use the target IP's longitude, latitude, timezone, country, locale, & spoof the WebRTC IP address. + +It will also calculate and spoof the browser's language based on the distribution of language speakers in the target region. + +#### Installation + +Install Camoufox with the `geoip` extra: + +```bash +pip install -U camoufox[geoip] +``` + +#### Usage + +Pass in the proxy dictionary as you would with Playwright's `proxy` parameter: + +```python +with Camoufox( + geoip=True, + proxy={ + 'server': 'http://example.com:8080', + 'username': 'username', + 'password': 'password' + } +) as browser: + page = browser.new_page() + page.goto("https://www.browserscan.net") +``` + +
+ ### BrowserForge Integration Camoufox is compatible with [BrowserForge](https://github.com/daijro/browserforge) fingerprints. diff --git a/pythonlib/camoufox/__main__.py b/pythonlib/camoufox/__main__.py index 5c3dc0e..6249548 100644 --- a/pythonlib/camoufox/__main__.py +++ b/pythonlib/camoufox/__main__.py @@ -6,11 +6,13 @@ Adapted from https://github.com/daijro/hrequests/blob/main/hrequests/__main__.py from importlib.metadata import PackageNotFoundError from importlib.metadata import version as pkg_version +from os import environ from typing import Optional import click -from .pkgman import CamoufoxFetcher, installed_verstr, rprint +from .locale import download_mmdb, remove_mmdb +from .pkgman import INSTALL_DIR, CamoufoxFetcher, installed_verstr, rprint class CamoufoxUpdate(CamoufoxFetcher): @@ -71,6 +73,8 @@ def fetch() -> None: Fetch the latest version of Camoufox """ CamoufoxUpdate().update() + # Fetch the GeoIP database + download_mmdb() @cli.command(name='remove') @@ -80,6 +84,8 @@ def remove() -> None: """ if not CamoufoxUpdate().cleanup(): rprint("Camoufox binaries not found!", fg="red") + # Remove the GeoIP database + remove_mmdb() @cli.command(name='test') @@ -90,13 +96,21 @@ def test(url: Optional[str] = None) -> None: """ from .sync_api import Camoufox - with Camoufox(headless=False) as browser: + with Camoufox(headless=False, env=environ) as browser: page = browser.new_page() if url: page.goto(url) page.pause() # Open the Playwright inspector +@cli.command(name='path') +def path() -> None: + """ + Display the path to the Camoufox executable + """ + rprint(INSTALL_DIR, fg="green") + + @cli.command(name='version') def version() -> None: """ diff --git a/pythonlib/camoufox/async_api.py b/pythonlib/camoufox/async_api.py index cb41b9d..3876288 100644 --- a/pythonlib/camoufox/async_api.py +++ b/pythonlib/camoufox/async_api.py @@ -33,72 +33,79 @@ async def AsyncNewBrowser( playwright: Playwright, *, config: Optional[Dict[str, Any]] = None, - addons: Optional[List[str]] = None, - fingerprint: Optional[Fingerprint] = None, - exclude_addons: Optional[List[DefaultAddons]] = None, - screen: Optional[Screen] = None, os: Optional[ListOrString] = None, - user_agent: Optional[ListOrString] = None, - fonts: Optional[List[str]] = None, - args: Optional[List[str]] = None, - executable_path: Optional[str] = None, block_images: Optional[bool] = None, block_webrtc: Optional[bool] = None, + allow_webgl: Optional[bool] = None, + geoip: Optional[Union[str, bool]] = None, + locale: Optional[str] = None, + addons: Optional[List[str]] = None, + fonts: Optional[List[str]] = None, + exclude_addons: Optional[List[DefaultAddons]] = None, + fingerprint: Optional[Fingerprint] = None, + screen: Optional[Screen] = None, + executable_path: Optional[str] = None, firefox_user_prefs: Optional[Dict[str, Any]] = None, + proxy: Optional[Dict[str, str]] = None, + ff_version: Optional[int] = None, + args: Optional[List[str]] = None, env: Optional[Dict[str, Union[str, float, bool]]] = None, **launch_options: Dict[str, Any] ) -> Browser: """ Launches a new browser instance for Camoufox. + Accepts all Playwright Firefox launch options, along with the following: Parameters: - playwright (Playwright): - Playwright instance to use. config (Optional[Dict[str, Any]]): - Configuration to use. - addons (Optional[List[str]]): - Addons to use. - fingerprint (Optional[Fingerprint]): - BrowserForge fingerprint to use. - exclude_addons (Optional[List[DefaultAddons]]): - Default addons to exclude. Passed as a list of camoufox.DefaultAddons enums. - screen (Optional[browserforge.fingerprints.Screen]): - BrowserForge screen constraints to use. + Camoufox properties to use. (read https://github.com/daijro/camoufox/blob/main/README.md) os (Optional[ListOrString]): - Operating system to use for the fingerprint. Either a string or a list of strings. - user_agent (Optional[ListOrString]): - User agent to use for the fingerprint. Either a string or a list of strings. - fonts (Optional[List[str]]): - Fonts to load into Camoufox, in addition to the default fonts. - args (Optional[List[str]]): - Arguments to pass to the browser. + Operating system to use for the fingerprint generation. + Can be "windows", "macos", or "linux", or a list of these to choose from randomly. + Default: ["windows", "macos", "linux"] block_images (Optional[bool]): Whether to block all images. block_webrtc (Optional[bool]): Whether to block WebRTC entirely. - firefox_user_prefs (Optional[Dict[str, Any]]): - Firefox user preferences to set. - env (Optional[Dict[str, Union[str, float, bool]]]): - Environment variables to set. + allow_webgl (Optional[bool]): + Whether to allow WebGL. To prevent leaks, only use this for special cases. + geoip (Optional[Union[str, bool]]): + Calculate longitude, latitude, timezone, country, & locale based on the IP address. + Pass the target IP address to use, or `True` to find the IP address automatically. + locale (Optional[str]): + Locale to use in Camoufox. + addons (Optional[List[str]]): + List of Firefox addons to use. + fonts (Optional[List[str]]): + Fonts to load into Camoufox (in addition to the default fonts for the target `os`). + Takes a list of font family names that are installed on the system. + exclude_addons (Optional[List[DefaultAddons]]): + Default addons to exclude. Passed as a list of camoufox.DefaultAddons enums. + fingerprint (Optional[Fingerprint]): + Use a custom BrowserForge fingerprint. Note: Not all values will be implemented. + If not provided, a random fingerprint will be generated based on the provided os & user_agent. + screen (Optional[Screen]): + NOT YET IMPLEMENTED: Constrains the screen dimensions of the generated fingerprint. + Takes a browserforge.fingerprints.Screen instance. executable_path (Optional[str]): Custom Camoufox browser executable path. + firefox_user_prefs (Optional[Dict[str, Any]]): + Firefox user preferences to set. + proxy (Optional[Dict[str, str]]): + Proxy to use for the browser. + Note: If geoip is True, a request will be sent through this proxy to find the target IP. + ff_version (Optional[int]): + Firefox version to use. Defaults to the current Camoufox version. + To prevent leaks, only use this for special cases. + args (Optional[List[str]]): + Arguments to pass to the browser. + env (Optional[Dict[str, Union[str, float, bool]]]): + Environment variables to set. **launch_options (Dict[str, Any]): Additional Firefox launch options. """ - opt = get_launch_options( - config=config, - addons=addons, - fingerprint=fingerprint, - exclude_addons=exclude_addons, - screen=screen, - os=os, - user_agent=user_agent, - fonts=fonts, - args=args, - executable_path=executable_path, - env=env, - block_images=block_images, - block_webrtc=block_webrtc, - firefox_user_prefs=firefox_user_prefs, - ) - return await playwright.firefox.launch(**opt, **launch_options) + data = locals() + data.pop('playwright') + + opt = get_launch_options(**data) + return await playwright.firefox.launch(**opt) diff --git a/pythonlib/camoufox/browserforge.yml b/pythonlib/camoufox/browserforge.yml index a52d0f5..a62b040 100644 --- a/pythonlib/camoufox/browserforge.yml +++ b/pythonlib/camoufox/browserforge.yml @@ -1,7 +1,8 @@ # Mappings of Browserforge fingerprints to Camoufox config properties. navigator: - # Note: Browserforge tends to have outdated UAs + # Note: Browserforge tends to have outdated UAs. + # The version will be replaced in Camoufox. userAgent: navigator.userAgent # userAgentData not in Firefox doNotTrack: navigator.doNotTrack @@ -10,8 +11,9 @@ navigator: appVersion: navigator.appVersion oscpu: navigator.oscpu # webdriver is always True - language: navigator.language - languages: navigator.languages + # Locale is now implemented separately: + # language: navigator.language + # languages: navigator.languages platform: navigator.platform # deviceMemory not in Firefox hardwareConcurrency: navigator.hardwareConcurrency @@ -26,26 +28,27 @@ navigator: screen: # hasHDR is not implemented in Camoufox - availHeight: screen.availHeight - availWidth: screen.availWidth - availTop: screen.availTop - availLeft: screen.availLeft - height: screen.height - width: screen.width + # Screen size values seem to be inconsistent, and will not be implemented for the time being. + # availHeight: screen.availHeight + # availWidth: screen.availWidth + # availTop: screen.availTop + # availLeft: screen.availLeft + # height: screen.height + # width: screen.width colorDepth: screen.colorDepth pixelDepth: screen.pixelDepth # devicePixelRatio is not recommended. Any value other than 1.0 is suspicious. pageXOffset: screen.pageXOffset pageYOffset: screen.pageYOffset # Disable viewport hijacking temporarily. - # outerHeight: window.outerHeight - # outerWidth: window.outerWidth - # innerHeight: window.innerHeight - # innerWidth: window.innerWidth + # outerHeight: window.outerHeight + # outerWidth: window.outerWidth + # innerHeight: window.innerHeight + # innerWidth: window.innerWidth screenX: window.screenX - # These seem to not be generating properly in Browserforge: - # clientWidth: document.body.clientWidth - # clientHeight: document.body.clientHeight + # Tends to generate out of bounds (network inconsistencies): + # clientWidth: document.body.clientWidth + # clientHeight: document.body.clientHeight videoCard: renderer: webGl:renderer @@ -53,7 +56,7 @@ videoCard: headers: # headers.User-Agent is redundant with navigator.userAgent - Accept-Language: headers.Accept-Language + # headers.Accept-Language is redundant with locale:* Accept-Encoding: headers.Accept-Encoding battery: diff --git a/pythonlib/camoufox/exceptions.py b/pythonlib/camoufox/exceptions.py index 3f817d7..e646475 100644 --- a/pythonlib/camoufox/exceptions.py +++ b/pythonlib/camoufox/exceptions.py @@ -52,3 +52,51 @@ class MissingDebugPort(ValueError): """ ... + + +class LocaleError(Exception): + """ + Raised when the locale is invalid. + """ + + ... + + +class InvalidIP(Exception): + """ + Raised when an IP address is invalid. + """ + + ... + + +class InvalidProxy(Exception): + """ + Raised when a proxy is invalid. + """ + + ... + + +class UnknownIPLocation(LocaleError): + """ + Raised when the location of an IP is unknown. + """ + + ... + + +class UnknownTerritory(LocaleError): + """ + Raised when the territory is unknown. + """ + + ... + + +class NotInstalledGeoIPExtra(ImportError): + """ + Raised when the geoip2 module is not installed. + """ + + ... diff --git a/pythonlib/camoufox/fingerprints.py b/pythonlib/camoufox/fingerprints.py index 4963936..f870f49 100644 --- a/pythonlib/camoufox/fingerprints.py +++ b/pythonlib/camoufox/fingerprints.py @@ -1,5 +1,7 @@ import os.path +import re from dataclasses import asdict +from typing import Optional from browserforge.fingerprints import Fingerprint, FingerprintGenerator from yaml import CLoader, load @@ -11,7 +13,9 @@ with open(os.path.join(os.path.dirname(__file__), 'browserforge.yml'), 'r') as f FP_GENERATOR = FingerprintGenerator(browser='firefox', os=('linux', 'macos', 'windows')) -def _cast_to_properties(camoufox_data: dict, cast_enum: dict, bf_dict: dict) -> None: +def _cast_to_properties( + camoufox_data: dict, cast_enum: dict, bf_dict: dict, ff_version: Optional[str] = None +) -> None: """ Casts Browserforge fingerprints to Camoufox config properties. """ @@ -25,26 +29,34 @@ def _cast_to_properties(camoufox_data: dict, cast_enum: dict, bf_dict: dict) -> continue # If the value is a dictionary, recursively recall if isinstance(data, dict): - _cast_to_properties(camoufox_data, type_key, data) + _cast_to_properties(camoufox_data, type_key, data, ff_version) continue # Fix values that are out of bounds if type_key.startswith("screen.") and isinstance(data, int) and data < 0: data = 0 + # Replace the Firefox versions with ff_version + if ff_version and isinstance(data, str): + data = re.sub(r'(? dict: +def from_browserforge(fingerprint: Fingerprint, ff_version: Optional[str] = None) -> dict: camoufox_data = {} - _cast_to_properties(camoufox_data, cast_enum=BROWSERFORGE_DATA, bf_dict=asdict(fingerprint)) + _cast_to_properties( + camoufox_data, + cast_enum=BROWSERFORGE_DATA, + bf_dict=asdict(fingerprint), + ff_version=ff_version, + ) return camoufox_data -def generate(**config) -> dict: +def generate(ff_version: Optional[str] = None, **config) -> dict: """ Generates a Firefox fingerprint. """ data = FP_GENERATOR.generate(**config) - return from_browserforge(data) + return from_browserforge(data, ff_version=ff_version) if __name__ == "__main__": diff --git a/pythonlib/camoufox/fonts.json b/pythonlib/camoufox/fonts.json index dc92e4e..8662b4c 100644 --- a/pythonlib/camoufox/fonts.json +++ b/pythonlib/camoufox/fonts.json @@ -1,5 +1,5 @@ { - "win": [ + "win": [ "Arial", "Arial Black", "Bahnschrift", "Calibri", "Calibri Light", "Cambria", "Cambria Math", "Candara", "Candara Light", "Comic Sans MS", "Consolas", "Constantia", "Corbel", "Corbel Light", "Courier New", "Ebrima", "Franklin Gothic Medium", "Gabriola", "Gadugi", "Georgia", "HoloLens MDL2 Assets", "Impact", "Ink Free", "Javanese Text", "Leelawadee UI", "Leelawadee UI Semilight", "Lucida Console", "Lucida Sans Unicode", "MS Gothic", "MS PGothic", "MS UI Gothic", "MV Boli", "Malgun Gothic", "Malgun Gothic Semilight", "Marlett", "Microsoft Himalaya", "Microsoft JhengHei", "Microsoft JhengHei Light", "Microsoft JhengHei UI", "Microsoft JhengHei UI Light", "Microsoft New Tai Lue", "Microsoft PhagsPa", "Microsoft Sans Serif", "Microsoft Tai Le", "Microsoft YaHei", "Microsoft YaHei Light", "Microsoft YaHei UI", "Microsoft YaHei UI Light", "Microsoft Yi Baiti", "MingLiU-ExtB", "MingLiU_HKSCS-ExtB", "Mongolian Baiti", "Myanmar Text", "NSimSun", "Nirmala UI", "Nirmala UI Semilight", "PMingLiU-ExtB", "Palatino Linotype", "Segoe Fluent Icons", "Segoe MDL2 Assets", "Segoe Print", "Segoe Script", "Segoe UI", "Segoe UI Black", "Segoe UI Emoji", "Segoe UI Historic", "Segoe UI Light", "Segoe UI Semibold", "Segoe UI Semilight", "Segoe UI Symbol", "Segoe UI Variable", "SimSun", "SimSun-ExtB", "Sitka", "Sitka Text", "Sylfaen", "Symbol", "Tahoma", "Times New Roman", "Trebuchet MS", "Twemoji Mozilla", "Verdana", "Webdings", "Wingdings", "Yu Gothic", "Yu Gothic Light", "Yu Gothic Medium", "Yu Gothic UI", "Yu Gothic UI Light", "Yu Gothic UI Semibold", "Yu Gothic UI Semilight", "\u5b8b\u4f53", "\u5fae\u8edf\u6b63\u9ed1\u9ad4", "\u5fae\u8edf\u6b63\u9ed1\u9ad4 Light", "\u5fae\u8f6f\u96c5\u9ed1", "\u5fae\u8f6f\u96c5\u9ed1 Light", "\u65b0\u5b8b\u4f53", "\u65b0\u7d30\u660e\u9ad4-ExtB", "\u6e38\u30b4\u30b7\u30c3\u30af", "\u6e38\u30b4\u30b7\u30c3\u30af Light", "\u6e38\u30b4\u30b7\u30c3\u30af Medium", "\u7d30\u660e\u9ad4-ExtB", "\u7d30\u660e\u9ad4_HKSCS-ExtB", "\ub9d1\uc740 \uace0\ub515", "\ub9d1\uc740 \uace0\ub515 Semilight", "\uff2d\uff33 \u30b4\u30b7\u30c3\u30af", "\uff2d\uff33 \uff30\u30b4\u30b7\u30c3\u30af" ], "mac": [ diff --git a/pythonlib/camoufox/ip.py b/pythonlib/camoufox/ip.py new file mode 100644 index 0000000..b018612 --- /dev/null +++ b/pythonlib/camoufox/ip.py @@ -0,0 +1,100 @@ +import re +from dataclasses import dataclass +from functools import lru_cache +from typing import Dict, Optional, Tuple + +import requests + +from .exceptions import InvalidIP, InvalidProxy + +""" +Helpers to find the user's public IP address for geolocation. +""" + + +@dataclass +class Proxy: + """ + Stores proxy information. + """ + + server: str + username: str + password: str + + @staticmethod + def parse_server(server: str) -> Tuple[str, str, Optional[str]]: + """ + Parses the proxy server string. + """ + proxy_match = re.match(r'^(?P\w+)://(?P.*?)(?:\:(?P\d+))?$', server) + if not proxy_match: + raise InvalidProxy(f"Invalid proxy server: {server}") + return proxy_match['schema'], proxy_match['url'], proxy_match['port'] + + def as_string(self) -> str: + schema, url, port = self.parse_server(self.server) + result = f"{schema}://" + if self.username: + result += f"{self.username}" + if self.password: + result += f":{self.password}" + + result += f"@{url}" + if port: + result += f":{port}" + return result + + @staticmethod + def as_requests_proxy(proxy_string: str) -> Dict[str, str]: + """ + Converts the proxy to a requests proxy dictionary. + """ + return { + 'http': proxy_string, + 'https': proxy_string, + } + + +def valid_ipv4(ip: str) -> bool: + return bool(re.match(r'^(?:[0-9]{1,3}\.){3}[0-9]{1,3}$', ip)) + + +def valid_ipv6(ip: str) -> bool: + return bool(re.match(r'^(([0-9a-fA-F]{0,4}:){1,7}[0-9a-fA-F]{0,4})$', ip)) + + +def validate_ip(ip: str) -> None: + if not valid_ipv4(ip) and not valid_ipv6(ip): + raise InvalidIP(f"Invalid IP address: {ip}") + + +@lru_cache(maxsize=None) +def public_ip(proxy: Optional[str] = None) -> str: + """ + Sends a request to a public IP api + """ + URLS = [ + # Prefers IPv4 + "https://api.ipify.org", + "https://checkip.amazonaws.com", + "https://ipinfo.io/ip", + # IPv4 & IPv6 + "https://icanhazip.com", + "https://ifconfig.co/ip", + "https://ipecho.net/plain", + ] + for url in URLS: + try: + resp = requests.get( + url, + proxies=Proxy.as_requests_proxy(proxy) if proxy else None, + timeout=5, + ) + resp.raise_for_status() + ip = resp.text.strip() + validate_ip(ip) + return ip + except (requests.RequestException, InvalidIP): + pass + raise InvalidIP("Failed to get IP address") diff --git a/pythonlib/camoufox/locale.py b/pythonlib/camoufox/locale.py new file mode 100644 index 0000000..91e3ba8 --- /dev/null +++ b/pythonlib/camoufox/locale.py @@ -0,0 +1,274 @@ +import os +import xml.etree.ElementTree as ET # nosec +from dataclasses import dataclass +from pathlib import Path +from random import choice as randchoice +from typing import Any, Dict, List, Optional, Tuple, cast + +import numpy as np +from language_tags import tags + +from camoufox.pkgman import rprint, webdl + +from .exceptions import NotInstalledGeoIPExtra, UnknownIPLocation, UnknownTerritory +from .ip import validate_ip + +try: + import geoip2.database # type: ignore +except ImportError: + ALLOW_GEOIP = False +else: + ALLOW_GEOIP = True + +LOCAL_DATA = Path(os.path.abspath(__file__)).parent + + +""" +Data structures for locale and geolocation info +""" + + +@dataclass +class Locale: + """ + Stores locale, region, and script information. + """ + + language: str + region: str + script: Optional[str] = None + + @property + def as_string(self) -> str: + return f"{self.language}-{self.region}" + + def as_config(self) -> Dict[str, str]: + """ + Converts the locale to a config dictionary. + """ + data = { + 'locale:region': self.region, + 'locale:language': self.language, + } + if self.script: + data['locale:script'] = self.script + return data + + +@dataclass +class Geolocation: + """ + Stores geolocation information. + """ + + locale: Locale + longitude: float + latitude: float + timezone: str + accuracy: Optional[float] = None + + def as_config(self) -> Dict[str, Any]: + """ + Converts the geolocation to a config dictionary. + """ + data = { + 'geolocation:longitude': self.longitude, + 'geolocation:latitude': self.latitude, + 'timezone': self.timezone, + **self.locale.as_config(), + } + if self.accuracy: + data['geolocation:accuracy'] = self.accuracy + return data + + +""" +Helpers to validate and normalize locales +""" + + +def verify_locales(locales: List[str]) -> None: + """ + Verifies that all locales are valid. + """ + for loc in locales: + if tags.check(loc): + continue + raise ValueError( + f"Invalid locale: '{loc}'. All locales must be in the format of language[-script][-region]" + ) + + +def normalize_locale(locale: str) -> Locale: + """ + Normalizes and validates a locale code. + """ + locales = locale.split(',') + verify_locales(locales) + if len(locales) > 1: + locale = randchoice(locales) # nosec + + # Parse the locale + parser = tags.tag(locale) + if not parser.region: + raise ValueError(f"Invalid locale: {locale}. Region is required.") + + record = parser.language.data['record'] + + # Return a formatted locale object + return Locale( + language=record['Subtag'], + region=parser.region.data['record']['Subtag'], + script=record.get('Suppress-Script'), + ) + + +""" +Helpers to fetch geolocation, timezone, and locale data given an IP. +""" + +MMDB_FILE = LOCAL_DATA / 'GeoLite2-City.mmdb' +MMDB_URL = 'https://github.com/P3TERX/GeoLite.mmdb/releases/latest/download/GeoLite2-City.mmdb' + + +def geoip_allowed() -> None: + """ + Checks if the geoip2 module is available. + """ + if not ALLOW_GEOIP: + raise NotInstalledGeoIPExtra( + 'Please install the geoip extra to use this feature: pip install camoufox[geoip]' + ) + + +def download_mmdb() -> None: + """ + Downloads the MaxMind GeoIP2 database. + """ + geoip_allowed() + + with open(MMDB_FILE, 'wb') as f: + webdl( + MMDB_URL, + desc='Downloading GeoIP database', + buffer=f, + ) + + +def remove_mmdb() -> None: + """ + Removes the MaxMind GeoIP2 database. + """ + if not MMDB_FILE.exists(): + rprint("GeoIP database not found.") + return + + MMDB_FILE.unlink() + rprint("GeoIP database removed.") + + +def get_geolocation(ip: str) -> Geolocation: + """ + Gets the geolocation for an IP address. + """ + # Check if the database is downloaded + if not MMDB_FILE.exists(): + download_mmdb() + + # Validate the IP address + validate_ip(ip) + + with geoip2.database.Reader(str(MMDB_FILE)) as reader: + resp = reader.city(ip) + iso_code = cast(str, resp.registered_country.iso_code) + location = resp.location + + # Check if any required attributes are missing + if any(not getattr(location, attr) for attr in ('longitude', 'latitude', 'time_zone')): + raise UnknownIPLocation(f"Unknown IP location: {ip}") + + # Get a statistically correct locale based on the country code + locale_finder = GetLocaleFromTerritory(iso_code) + locale = locale_finder.get_locale() + + return Geolocation( + locale=locale, + longitude=cast(float, resp.location.longitude), + latitude=cast(float, resp.location.latitude), + timezone=cast(str, resp.location.time_zone), + ) + + +""" +Gets a random language based on the territory code. +""" + + +def get_unicode_info() -> ET.Element: + """ + Fetches supplemental data from the territoryInfo.xml file. + Source: https://raw.githubusercontent.com/unicode-org/cldr/master/common/supplemental/supplementalData.xml + """ + with open(LOCAL_DATA / 'territoryInfo.xml', 'rb') as f: + data = ET.XML(f.read()) + assert data is not None, 'Failed to load territoryInfo.xml' + return data + + +class GetLocaleFromTerritory: + """ + Calculates a random language based on the territory code, + based on the probability that a person speaks the language in the territory. + """ + + def __init__(self, iso_code: str): + self.iso_code = iso_code.upper() + self.root = get_unicode_info() + self.languages, self.probabilities = self._load_territory_data() + + def _load_territory_data(self) -> Tuple[np.ndarray, np.ndarray]: + territory = self.root.find(f"territory[@type='{self.iso_code}']") + + if territory is None: + raise UnknownTerritory(f"Unknown territory: {self.iso_code}") + + lang_population = territory.findall('languagePopulation') + + if not lang_population: + raise ValueError(f"No language data found for territory: {self.iso_code}") + + # Use list comprehension for faster data extraction + languages = np.array([lang.get('type') for lang in lang_population]) + percentages = np.array( + [float(lang.get('populationPercent', '0')) for lang in lang_population] + ) + + # Normalize probabilities + total = np.sum(percentages) + probabilities = percentages / total + + return languages, probabilities + + def get_random_language(self) -> str: + """ + Get a random language based on the territory ISO code. + """ + return np.random.choice(self.languages, p=self.probabilities) + + def get_locale(self) -> Locale: + """ + Get a random locale based on the territory ISO code. + Returns as a Locale object. + """ + language = self.get_random_language() + return normalize_locale(f"{language}-{self.iso_code}") + + +if __name__ == "__main__": + # Extra tests... + from timeit import timeit + + print('LanguageSelector:', timeit(lambda: GetLocaleFromTerritory('ES'), number=100)) + + ts = GetLocaleFromTerritory('ES') + print('get_random_language:', timeit(lambda: ts.get_random_language(), number=10000)) diff --git a/pythonlib/camoufox/pkgman.py b/pythonlib/camoufox/pkgman.py index d4f70bb..258ab17 100644 --- a/pythonlib/camoufox/pkgman.py +++ b/pythonlib/camoufox/pkgman.py @@ -5,7 +5,7 @@ import shlex import shutil import sys import tempfile -from io import BytesIO +from io import BufferedWriter, BytesIO from pathlib import Path from typing import List, Optional, Union from zipfile import ZipFile @@ -19,7 +19,7 @@ from typing_extensions import TypeAlias from .exceptions import UnsupportedArchitecture, UnsupportedOS -DownloadBuffer: TypeAlias = Union[BytesIO, tempfile._TemporaryFileWrapper] +DownloadBuffer: TypeAlias = Union[BytesIO, tempfile._TemporaryFileWrapper, BufferedWriter] # Map machine architecture to Camoufox binary name ARCH_MAP: dict[str, str] = { @@ -58,8 +58,8 @@ def rprint(*a, **k): class CamoufoxFetcher: def __init__(self) -> None: self.arch = self.get_platform_arch() - self._version: str | None = None - self._release: str | None = None + self._version: Optional[str] = None + self._release: Optional[str] = None self.pattern: re.Pattern = re.compile(rf'camoufox-(.+)-(.+)-{OS_NAME}\.{self.arch}\.zip') self.fetch_latest() diff --git a/pythonlib/camoufox/sync_api.py b/pythonlib/camoufox/sync_api.py index 28fd343..8174f79 100644 --- a/pythonlib/camoufox/sync_api.py +++ b/pythonlib/camoufox/sync_api.py @@ -33,72 +33,79 @@ def NewBrowser( playwright: Playwright, *, config: Optional[Dict[str, Any]] = None, - addons: Optional[List[str]] = None, - fingerprint: Optional[Fingerprint] = None, - exclude_addons: Optional[List[DefaultAddons]] = None, - screen: Optional[Screen] = None, os: Optional[ListOrString] = None, - user_agent: Optional[ListOrString] = None, - fonts: Optional[List[str]] = None, - args: Optional[List[str]] = None, - executable_path: Optional[str] = None, block_images: Optional[bool] = None, block_webrtc: Optional[bool] = None, + allow_webgl: Optional[bool] = None, + geoip: Optional[Union[str, bool]] = None, + locale: Optional[str] = None, + addons: Optional[List[str]] = None, + fonts: Optional[List[str]] = None, + exclude_addons: Optional[List[DefaultAddons]] = None, + fingerprint: Optional[Fingerprint] = None, + screen: Optional[Screen] = None, + executable_path: Optional[str] = None, firefox_user_prefs: Optional[Dict[str, Any]] = None, + proxy: Optional[Dict[str, str]] = None, + ff_version: Optional[int] = None, + args: Optional[List[str]] = None, env: Optional[Dict[str, Union[str, float, bool]]] = None, **launch_options: Dict[str, Any] ) -> Browser: """ Launches a new browser instance for Camoufox. + Accepts all Playwright Firefox launch options, along with the following: Parameters: - playwright (Playwright): - Playwright instance to use. config (Optional[Dict[str, Any]]): - Configuration to use. - addons (Optional[List[str]]): - Addons to use. - fingerprint (Optional[Fingerprint]): - BrowserForge fingerprint to use. - exclude_addons (Optional[List[DefaultAddons]]): - Default addons to exclude. Passed as a list of camoufox.DefaultAddons enums. - screen (Optional[browserforge.fingerprints.Screen]): - BrowserForge screen constraints to use. + Camoufox properties to use. (read https://github.com/daijro/camoufox/blob/main/README.md) os (Optional[ListOrString]): - Operating system to use for the fingerprint. Either a string or a list of strings. - user_agent (Optional[ListOrString]): - User agent to use for the fingerprint. Either a string or a list of strings. - fonts (Optional[List[str]]): - Fonts to load into Camoufox, in addition to the default fonts. - args (Optional[List[str]]): - Arguments to pass to the browser. + Operating system to use for the fingerprint generation. + Can be "windows", "macos", or "linux", or a list of these to choose from randomly. + Default: ["windows", "macos", "linux"] block_images (Optional[bool]): Whether to block all images. block_webrtc (Optional[bool]): Whether to block WebRTC entirely. - firefox_user_prefs (Optional[Dict[str, Any]]): - Firefox user preferences to set. - env (Optional[Dict[str, Union[str, float, bool]]]): - Environment variables to set. + allow_webgl (Optional[bool]): + Whether to allow WebGL. To prevent leaks, only use this for special cases. + geoip (Optional[Union[str, bool]]): + Calculate longitude, latitude, timezone, country, & locale based on the IP address. + Pass the target IP address to use, or `True` to find the IP address automatically. + locale (Optional[str]): + Locale to use in Camoufox. + addons (Optional[List[str]]): + List of Firefox addons to use. + fonts (Optional[List[str]]): + Fonts to load into Camoufox (in addition to the default fonts for the target `os`). + Takes a list of font family names that are installed on the system. + exclude_addons (Optional[List[DefaultAddons]]): + Default addons to exclude. Passed as a list of camoufox.DefaultAddons enums. + fingerprint (Optional[Fingerprint]): + Use a custom BrowserForge fingerprint. Note: Not all values will be implemented. + If not provided, a random fingerprint will be generated based on the provided os & user_agent. + screen (Optional[Screen]): + NOT YET IMPLEMENTED: Constrains the screen dimensions of the generated fingerprint. + Takes a browserforge.fingerprints.Screen instance. executable_path (Optional[str]): Custom Camoufox browser executable path. + firefox_user_prefs (Optional[Dict[str, Any]]): + Firefox user preferences to set. + proxy (Optional[Dict[str, str]]): + Proxy to use for the browser. + Note: If geoip is True, a request will be sent through this proxy to find the target IP. + ff_version (Optional[int]): + Firefox version to use. Defaults to the current Camoufox version. + To prevent leaks, only use this for special cases. + args (Optional[List[str]]): + Arguments to pass to the browser. + env (Optional[Dict[str, Union[str, float, bool]]]): + Environment variables to set. **launch_options (Dict[str, Any]): Additional Firefox launch options. """ - opt = get_launch_options( - config=config, - addons=addons, - fingerprint=fingerprint, - exclude_addons=exclude_addons, - screen=screen, - os=os, - user_agent=user_agent, - fonts=fonts, - args=args, - executable_path=executable_path, - env=env, - block_images=block_images, - block_webrtc=block_webrtc, - firefox_user_prefs=firefox_user_prefs, - ) - return playwright.firefox.launch(**opt, **launch_options) + data = locals() + data.pop('playwright') + + opt = get_launch_options(**data) + return playwright.firefox.launch(**opt) diff --git a/pythonlib/camoufox/territoryInfo.xml b/pythonlib/camoufox/territoryInfo.xml new file mode 100644 index 0000000..3526fca --- /dev/null +++ b/pythonlib/camoufox/territoryInfo.xmldiff --git a/pythonlib/camoufox/utils.py b/pythonlib/camoufox/utils.py index 1143c22..113d811 100644 --- a/pythonlib/camoufox/utils.py +++ b/pythonlib/camoufox/utils.py @@ -18,7 +18,9 @@ from .addons import ( ) from .exceptions import InvalidPropertyType, UnknownProperty from .fingerprints import from_browserforge, generate -from .pkgman import OS_NAME, get_path +from .ip import Proxy, public_ip, valid_ipv4, valid_ipv6 +from .locale import geoip_allowed, get_geolocation, normalize_locale +from .pkgman import OS_NAME, get_path, installed_verstr from .xpi_dl import add_default_addons LAUNCH_FILE = { @@ -151,6 +153,25 @@ def update_fonts(config: Dict[str, Any], target_os: str) -> None: config['fonts'] = fonts +def merge_into(target: Dict[str, Any], source: Dict[str, Any]) -> None: + """ + Merges new keys/values from the source dictionary into the target dictionary. + Given that the key does not exist in the target dictionary. + """ + for key, value in source.items(): + if key not in target: + target[key] = value + + +def set_into(target: Dict[str, Any], key: str, value: Any) -> None: + """ + Sets a new key/value into the target dictionary. + Given that the key does not exist in the target dictionary. + """ + if key not in target: + target[key] = value + + def get_launch_options( *, config: Optional[Dict[str, Any]] = None, @@ -158,20 +179,25 @@ def get_launch_options( fingerprint: Optional[Fingerprint] = None, exclude_addons: Optional[List[DefaultAddons]] = None, screen: Optional[Screen] = None, + geoip: Optional[Union[str, bool]] = None, + locale: Optional[str] = None, os: Optional[ListOrString] = None, - user_agent: Optional[ListOrString] = None, fonts: Optional[List[str]] = None, args: Optional[List[str]] = None, executable_path: Optional[str] = None, env: Optional[Dict[str, Union[str, float, bool]]] = None, block_images: Optional[bool] = None, block_webrtc: Optional[bool] = None, + allow_webgl: Optional[bool] = None, + proxy: Optional[Dict[str, str]] = None, + ff_version: Optional[int] = None, firefox_user_prefs: Optional[Dict[str, Any]] = None, + launch_options: Optional[Dict[str, Any]] = None, ) -> Dict[str, Any]: """ Builds the launch options for the Camoufox browser. """ - # Validate the config + # Build the config if config is None: config = {} @@ -179,6 +205,8 @@ def get_launch_options( addons = [] if args is None: args = [] + if firefox_user_prefs is None: + firefox_user_prefs = {} # Add the default addons add_default_addons(addons, exclude_addons) @@ -187,42 +215,74 @@ def get_launch_options( if addons: confirm_paths(addons) + # Get the Firefox version + if ff_version: + ff_version_str = str(ff_version) + else: + ff_version_str = installed_verstr().split('.', 1)[0] + # Generate new fingerprint if fingerprint is None: - config = { - **generate( + merge_into( + config, + generate( + ff_version=ff_version_str, screen=screen, os=os, - user_agent=user_agent, ), - **config, - } + ) else: - config = { - **from_browserforge(fingerprint), - **config, - } + merge_into( + config, + from_browserforge(fingerprint, ff_version_str), + ) + target_os = get_target_os(config) # Set a random window.history.length - config['window.history.length'] = randrange(1, 6) - - if fonts: - config['fonts'] = fonts - - validate_config(config) + set_into(config, 'window.history.length', randrange(1, 6)) # nosec # Update fonts list - target_os = get_target_os(config) + if fonts: + config['fonts'] = fonts update_fonts(config, target_os) - # Set Firefox user preferences - if firefox_user_prefs is None: - firefox_user_prefs = {} + # Set geolocation + if geoip: + geoip_allowed() # Assert that geoip is allowed + if geoip is True: + # Find the user's IP address + if proxy: + geoip = public_ip(Proxy(**proxy).as_string()) + else: + geoip = public_ip() + + # Spoof WebRTC if not blocked + if not block_webrtc: + if valid_ipv4(geoip): + set_into(config, 'webrtc:ipv4', geoip) + firefox_user_prefs['network.dns.disableIPv6'] = True + elif valid_ipv6(geoip): + set_into(config, 'webrtc:ipv6', geoip) + + geolocation = get_geolocation(geoip) + config.update(geolocation.as_config()) + + # Set locale + if locale: + parsed_locale = normalize_locale(locale) + config.update(parsed_locale.as_config()) + + # Validate the config + validate_config(config) + + # Set Firefox user preferences if block_images: firefox_user_prefs['permissions.default.image'] = 2 if block_webrtc: firefox_user_prefs['media.peerconnection.enabled'] = False + if allow_webgl: + firefox_user_prefs['webgl.disabled'] = False # Launch threaded_try_load_addons(get_debug_port(args), addons) @@ -235,4 +295,6 @@ def get_launch_options( "args": args, "env": env_vars, "firefox_user_prefs": firefox_user_prefs, + "proxy": proxy, + **(launch_options if launch_options is not None else {}), } diff --git a/pythonlib/pyproject.toml b/pythonlib/pyproject.toml index 02849e6..7f0091b 100644 --- a/pythonlib/pyproject.toml +++ b/pythonlib/pyproject.toml @@ -4,7 +4,7 @@ build-backend = "poetry.core.masonry.api" [tool.poetry] name = "camoufox" -version = "0.1.3" +version = "0.2.0" description = "Wrapper around Playwright to help launch Camoufox" authors = ["daijro "] license = "MIT" @@ -38,6 +38,12 @@ tqdm = "*" numpy = "*" ua_parser = "*" typing_extensions = "*" +lxml = "*" +language-tags = "*" +geoip2 = {version = "*", optional = true} + +[tool.poetry.extras] +geoip = ["geoip2"] [tool.poetry.scripts] camoufox = "camoufox.__main__:cli"