pythonlib: Add locale, geolocation/locale from IP, & more 0.2.0

- Added `geoip` parameter to automatically find the target IP's longitude, latitude, timezone, country, language/region locale, & spoof the WebRTC IP address. #5
- Added parameter to set the locale #16
- Automatically calculate the locale based on the distribution of speakers in the target geoip region.
- Added allow_webgl option #17
- By default, use the current version of Camoufox instead of Browserforge's Firefox versions #14.
This commit is contained in:
daijro 2024-09-29 03:18:08 -05:00
parent 5263cb6305
commit f6396c1e81
15 changed files with 2770 additions and 164 deletions

2
.gitignore vendored
View file

@ -16,3 +16,5 @@ __pycache__/
wget-log
*.kate-swp
*.log
test.py
*.mmdb

View file

@ -13,10 +13,12 @@
First, install the `camoufox` package:
```bash
pip install -U camoufox
pip install -U camoufox[geoip]
```
Then, download the Camoufox browser:
The `geoip` parameter is optional, but heavily recommended if you are using proxies. It will download an extra dataset to determine the user's longitude, latitude, timezone, country, & locale.
Next, download the Camoufox browser:
**Windows**
@ -85,7 +87,7 @@ Accepts all Playwright Firefox launch options, along with the following:
Parameters:
config (Optional[Dict[str, Any]]):
Camoufox properties to use. (read https://github.com/daijro/camoufox/blob/main/README.md)
Camoufox properties to use.
os (Optional[ListOrString]):
Operating system to use for the fingerprint generation.
Can be "windows", "macos", or "linux", or a list of these to choose from randomly.
@ -94,28 +96,40 @@ Parameters:
Whether to block all images.
block_webrtc (Optional[bool]):
Whether to block WebRTC entirely.
firefox_user_prefs (Optional[Dict[str, Any]]):
Firefox user preferences to set.
allow_webgl (Optional[bool]):
Whether to allow WebGL. To prevent leaks, only use this for special cases.
geoip (Optional[Union[str, bool]]):
Calculate longitude, latitude, timezone, country, & locale based on the IP address.
Pass the target IP address to use, or `True` to find the IP address automatically.
locale (Optional[str]):
Locale to use in Camoufox.
addons (Optional[List[str]]):
List of Firefox addons to use.
fingerprint (Optional[Fingerprint]):
BrowserForge fingerprint to use.
If not provided, a random fingerprint will be generated based on the provided os & user_agent.
exclude_addons (Optional[List[DefaultAddons]]):
Default addons to exclude. Passed as a list of camoufox.DefaultAddons enums.
user_agent (Optional[ListOrString]):
User agent to use for the fingerprint generation. Either a string or a list of strings.
Note: This must be a valid BrowserForge User-Agent string.
To use a different user agent, set the "navigator.userAgent" preference in `config`.
List of Firefox addons to use.
fonts (Optional[List[str]]):
Fonts to load into Camoufox (in addition to the default fonts for the target `os`).
Takes a list of font family names that are installed on the system.
exclude_addons (Optional[List[DefaultAddons]]):
Default addons to exclude. Passed as a list of camoufox.DefaultAddons enums.
fingerprint (Optional[Fingerprint]):
Use a custom BrowserForge fingerprint. Note: Not all values will be implemented.
If not provided, a random fingerprint will be generated based on the provided os & user_agent.
screen (Optional[Screen]):
NOT YET IMPLEMENTED: Constrains the screen dimensions of the generated fingerprint.
Takes a browserforge.fingerprints.Screen instance.
executable_path (Optional[str]):
Custom Camoufox browser executable path.
firefox_user_prefs (Optional[Dict[str, Any]]):
Firefox user preferences to set.
proxy (Optional[Dict[str, str]]):
Proxy to use for the browser.
Note: If geoip is True, a request will be sent through this proxy to find the target IP.
ff_version (Optional[int]):
Firefox version to use. Defaults to the current Camoufox version.
To prevent leaks, only use this for special cases.
args (Optional[List[str]]):
Arguments to pass to the browser.
env (Optional[Dict[str, Union[str, float, bool]]]):
Environment variables to set.
executable_path (Optional[str]):
Custom Camoufox browser executable path.
**launch_options (Dict[str, Any]):
Additional Firefox launch options.
```
@ -143,6 +157,39 @@ with Camoufox(
<hr width=50>
### GeoIP & Proxy Support
By passing `geoip=True`, or passing in a target IP address, Camoufox will automatically use the target IP's longitude, latitude, timezone, country, locale, & spoof the WebRTC IP address.
It will also calculate and spoof the browser's language based on the distribution of language speakers in the target region.
#### Installation
Install Camoufox with the `geoip` extra:
```bash
pip install -U camoufox[geoip]
```
#### Usage
Pass in the proxy dictionary as you would with Playwright's `proxy` parameter:
```python
with Camoufox(
geoip=True,
proxy={
'server': 'http://example.com:8080',
'username': 'username',
'password': 'password'
}
) as browser:
page = browser.new_page()
page.goto("https://www.browserscan.net")
```
<hr width=50>
### BrowserForge Integration
Camoufox is compatible with [BrowserForge](https://github.com/daijro/browserforge) fingerprints.

View file

@ -6,11 +6,13 @@ Adapted from https://github.com/daijro/hrequests/blob/main/hrequests/__main__.py
from importlib.metadata import PackageNotFoundError
from importlib.metadata import version as pkg_version
from os import environ
from typing import Optional
import click
from .pkgman import CamoufoxFetcher, installed_verstr, rprint
from .locale import download_mmdb, remove_mmdb
from .pkgman import INSTALL_DIR, CamoufoxFetcher, installed_verstr, rprint
class CamoufoxUpdate(CamoufoxFetcher):
@ -71,6 +73,8 @@ def fetch() -> None:
Fetch the latest version of Camoufox
"""
CamoufoxUpdate().update()
# Fetch the GeoIP database
download_mmdb()
@cli.command(name='remove')
@ -80,6 +84,8 @@ def remove() -> None:
"""
if not CamoufoxUpdate().cleanup():
rprint("Camoufox binaries not found!", fg="red")
# Remove the GeoIP database
remove_mmdb()
@cli.command(name='test')
@ -90,13 +96,21 @@ def test(url: Optional[str] = None) -> None:
"""
from .sync_api import Camoufox
with Camoufox(headless=False) as browser:
with Camoufox(headless=False, env=environ) as browser:
page = browser.new_page()
if url:
page.goto(url)
page.pause() # Open the Playwright inspector
@cli.command(name='path')
def path() -> None:
"""
Display the path to the Camoufox executable
"""
rprint(INSTALL_DIR, fg="green")
@cli.command(name='version')
def version() -> None:
"""

View file

@ -33,72 +33,79 @@ async def AsyncNewBrowser(
playwright: Playwright,
*,
config: Optional[Dict[str, Any]] = None,
addons: Optional[List[str]] = None,
fingerprint: Optional[Fingerprint] = None,
exclude_addons: Optional[List[DefaultAddons]] = None,
screen: Optional[Screen] = None,
os: Optional[ListOrString] = None,
user_agent: Optional[ListOrString] = None,
fonts: Optional[List[str]] = None,
args: Optional[List[str]] = None,
executable_path: Optional[str] = None,
block_images: Optional[bool] = None,
block_webrtc: Optional[bool] = None,
allow_webgl: Optional[bool] = None,
geoip: Optional[Union[str, bool]] = None,
locale: Optional[str] = None,
addons: Optional[List[str]] = None,
fonts: Optional[List[str]] = None,
exclude_addons: Optional[List[DefaultAddons]] = None,
fingerprint: Optional[Fingerprint] = None,
screen: Optional[Screen] = None,
executable_path: Optional[str] = None,
firefox_user_prefs: Optional[Dict[str, Any]] = None,
proxy: Optional[Dict[str, str]] = None,
ff_version: Optional[int] = None,
args: Optional[List[str]] = None,
env: Optional[Dict[str, Union[str, float, bool]]] = None,
**launch_options: Dict[str, Any]
) -> Browser:
"""
Launches a new browser instance for Camoufox.
Accepts all Playwright Firefox launch options, along with the following:
Parameters:
playwright (Playwright):
Playwright instance to use.
config (Optional[Dict[str, Any]]):
Configuration to use.
addons (Optional[List[str]]):
Addons to use.
fingerprint (Optional[Fingerprint]):
BrowserForge fingerprint to use.
exclude_addons (Optional[List[DefaultAddons]]):
Default addons to exclude. Passed as a list of camoufox.DefaultAddons enums.
screen (Optional[browserforge.fingerprints.Screen]):
BrowserForge screen constraints to use.
Camoufox properties to use. (read https://github.com/daijro/camoufox/blob/main/README.md)
os (Optional[ListOrString]):
Operating system to use for the fingerprint. Either a string or a list of strings.
user_agent (Optional[ListOrString]):
User agent to use for the fingerprint. Either a string or a list of strings.
fonts (Optional[List[str]]):
Fonts to load into Camoufox, in addition to the default fonts.
args (Optional[List[str]]):
Arguments to pass to the browser.
Operating system to use for the fingerprint generation.
Can be "windows", "macos", or "linux", or a list of these to choose from randomly.
Default: ["windows", "macos", "linux"]
block_images (Optional[bool]):
Whether to block all images.
block_webrtc (Optional[bool]):
Whether to block WebRTC entirely.
firefox_user_prefs (Optional[Dict[str, Any]]):
Firefox user preferences to set.
env (Optional[Dict[str, Union[str, float, bool]]]):
Environment variables to set.
allow_webgl (Optional[bool]):
Whether to allow WebGL. To prevent leaks, only use this for special cases.
geoip (Optional[Union[str, bool]]):
Calculate longitude, latitude, timezone, country, & locale based on the IP address.
Pass the target IP address to use, or `True` to find the IP address automatically.
locale (Optional[str]):
Locale to use in Camoufox.
addons (Optional[List[str]]):
List of Firefox addons to use.
fonts (Optional[List[str]]):
Fonts to load into Camoufox (in addition to the default fonts for the target `os`).
Takes a list of font family names that are installed on the system.
exclude_addons (Optional[List[DefaultAddons]]):
Default addons to exclude. Passed as a list of camoufox.DefaultAddons enums.
fingerprint (Optional[Fingerprint]):
Use a custom BrowserForge fingerprint. Note: Not all values will be implemented.
If not provided, a random fingerprint will be generated based on the provided os & user_agent.
screen (Optional[Screen]):
NOT YET IMPLEMENTED: Constrains the screen dimensions of the generated fingerprint.
Takes a browserforge.fingerprints.Screen instance.
executable_path (Optional[str]):
Custom Camoufox browser executable path.
firefox_user_prefs (Optional[Dict[str, Any]]):
Firefox user preferences to set.
proxy (Optional[Dict[str, str]]):
Proxy to use for the browser.
Note: If geoip is True, a request will be sent through this proxy to find the target IP.
ff_version (Optional[int]):
Firefox version to use. Defaults to the current Camoufox version.
To prevent leaks, only use this for special cases.
args (Optional[List[str]]):
Arguments to pass to the browser.
env (Optional[Dict[str, Union[str, float, bool]]]):
Environment variables to set.
**launch_options (Dict[str, Any]):
Additional Firefox launch options.
"""
opt = get_launch_options(
config=config,
addons=addons,
fingerprint=fingerprint,
exclude_addons=exclude_addons,
screen=screen,
os=os,
user_agent=user_agent,
fonts=fonts,
args=args,
executable_path=executable_path,
env=env,
block_images=block_images,
block_webrtc=block_webrtc,
firefox_user_prefs=firefox_user_prefs,
)
return await playwright.firefox.launch(**opt, **launch_options)
data = locals()
data.pop('playwright')
opt = get_launch_options(**data)
return await playwright.firefox.launch(**opt)

View file

@ -1,7 +1,8 @@
# Mappings of Browserforge fingerprints to Camoufox config properties.
navigator:
# Note: Browserforge tends to have outdated UAs
# Note: Browserforge tends to have outdated UAs.
# The version will be replaced in Camoufox.
userAgent: navigator.userAgent
# userAgentData not in Firefox
doNotTrack: navigator.doNotTrack
@ -10,8 +11,9 @@ navigator:
appVersion: navigator.appVersion
oscpu: navigator.oscpu
# webdriver is always True
language: navigator.language
languages: navigator.languages
# Locale is now implemented separately:
# language: navigator.language
# languages: navigator.languages
platform: navigator.platform
# deviceMemory not in Firefox
hardwareConcurrency: navigator.hardwareConcurrency
@ -26,26 +28,27 @@ navigator:
screen:
# hasHDR is not implemented in Camoufox
availHeight: screen.availHeight
availWidth: screen.availWidth
availTop: screen.availTop
availLeft: screen.availLeft
height: screen.height
width: screen.width
# Screen size values seem to be inconsistent, and will not be implemented for the time being.
# availHeight: screen.availHeight
# availWidth: screen.availWidth
# availTop: screen.availTop
# availLeft: screen.availLeft
# height: screen.height
# width: screen.width
colorDepth: screen.colorDepth
pixelDepth: screen.pixelDepth
# devicePixelRatio is not recommended. Any value other than 1.0 is suspicious.
pageXOffset: screen.pageXOffset
pageYOffset: screen.pageYOffset
# Disable viewport hijacking temporarily.
# outerHeight: window.outerHeight
# outerWidth: window.outerWidth
# innerHeight: window.innerHeight
# innerWidth: window.innerWidth
# outerHeight: window.outerHeight
# outerWidth: window.outerWidth
# innerHeight: window.innerHeight
# innerWidth: window.innerWidth
screenX: window.screenX
# These seem to not be generating properly in Browserforge:
# clientWidth: document.body.clientWidth
# clientHeight: document.body.clientHeight
# Tends to generate out of bounds (network inconsistencies):
# clientWidth: document.body.clientWidth
# clientHeight: document.body.clientHeight
videoCard:
renderer: webGl:renderer
@ -53,7 +56,7 @@ videoCard:
headers:
# headers.User-Agent is redundant with navigator.userAgent
Accept-Language: headers.Accept-Language
# headers.Accept-Language is redundant with locale:*
Accept-Encoding: headers.Accept-Encoding
battery:

View file

@ -52,3 +52,51 @@ class MissingDebugPort(ValueError):
"""
...
class LocaleError(Exception):
"""
Raised when the locale is invalid.
"""
...
class InvalidIP(Exception):
"""
Raised when an IP address is invalid.
"""
...
class InvalidProxy(Exception):
"""
Raised when a proxy is invalid.
"""
...
class UnknownIPLocation(LocaleError):
"""
Raised when the location of an IP is unknown.
"""
...
class UnknownTerritory(LocaleError):
"""
Raised when the territory is unknown.
"""
...
class NotInstalledGeoIPExtra(ImportError):
"""
Raised when the geoip2 module is not installed.
"""
...

View file

@ -1,5 +1,7 @@
import os.path
import re
from dataclasses import asdict
from typing import Optional
from browserforge.fingerprints import Fingerprint, FingerprintGenerator
from yaml import CLoader, load
@ -11,7 +13,9 @@ with open(os.path.join(os.path.dirname(__file__), 'browserforge.yml'), 'r') as f
FP_GENERATOR = FingerprintGenerator(browser='firefox', os=('linux', 'macos', 'windows'))
def _cast_to_properties(camoufox_data: dict, cast_enum: dict, bf_dict: dict) -> None:
def _cast_to_properties(
camoufox_data: dict, cast_enum: dict, bf_dict: dict, ff_version: Optional[str] = None
) -> None:
"""
Casts Browserforge fingerprints to Camoufox config properties.
"""
@ -25,26 +29,34 @@ def _cast_to_properties(camoufox_data: dict, cast_enum: dict, bf_dict: dict) ->
continue
# If the value is a dictionary, recursively recall
if isinstance(data, dict):
_cast_to_properties(camoufox_data, type_key, data)
_cast_to_properties(camoufox_data, type_key, data, ff_version)
continue
# Fix values that are out of bounds
if type_key.startswith("screen.") and isinstance(data, int) and data < 0:
data = 0
# Replace the Firefox versions with ff_version
if ff_version and isinstance(data, str):
data = re.sub(r'(?<!\d)(1[0-9]{2})(\.0)(?!\d)', rf'{ff_version}\2', data)
camoufox_data[type_key] = data
def from_browserforge(fingerprint: Fingerprint) -> dict:
def from_browserforge(fingerprint: Fingerprint, ff_version: Optional[str] = None) -> dict:
camoufox_data = {}
_cast_to_properties(camoufox_data, cast_enum=BROWSERFORGE_DATA, bf_dict=asdict(fingerprint))
_cast_to_properties(
camoufox_data,
cast_enum=BROWSERFORGE_DATA,
bf_dict=asdict(fingerprint),
ff_version=ff_version,
)
return camoufox_data
def generate(**config) -> dict:
def generate(ff_version: Optional[str] = None, **config) -> dict:
"""
Generates a Firefox fingerprint.
"""
data = FP_GENERATOR.generate(**config)
return from_browserforge(data)
return from_browserforge(data, ff_version=ff_version)
if __name__ == "__main__":

View file

@ -1,5 +1,5 @@
{
"win": [
"win": [
"Arial", "Arial Black", "Bahnschrift", "Calibri", "Calibri Light", "Cambria", "Cambria Math", "Candara", "Candara Light", "Comic Sans MS", "Consolas", "Constantia", "Corbel", "Corbel Light", "Courier New", "Ebrima", "Franklin Gothic Medium", "Gabriola", "Gadugi", "Georgia", "HoloLens MDL2 Assets", "Impact", "Ink Free", "Javanese Text", "Leelawadee UI", "Leelawadee UI Semilight", "Lucida Console", "Lucida Sans Unicode", "MS Gothic", "MS PGothic", "MS UI Gothic", "MV Boli", "Malgun Gothic", "Malgun Gothic Semilight", "Marlett", "Microsoft Himalaya", "Microsoft JhengHei", "Microsoft JhengHei Light", "Microsoft JhengHei UI", "Microsoft JhengHei UI Light", "Microsoft New Tai Lue", "Microsoft PhagsPa", "Microsoft Sans Serif", "Microsoft Tai Le", "Microsoft YaHei", "Microsoft YaHei Light", "Microsoft YaHei UI", "Microsoft YaHei UI Light", "Microsoft Yi Baiti", "MingLiU-ExtB", "MingLiU_HKSCS-ExtB", "Mongolian Baiti", "Myanmar Text", "NSimSun", "Nirmala UI", "Nirmala UI Semilight", "PMingLiU-ExtB", "Palatino Linotype", "Segoe Fluent Icons", "Segoe MDL2 Assets", "Segoe Print", "Segoe Script", "Segoe UI", "Segoe UI Black", "Segoe UI Emoji", "Segoe UI Historic", "Segoe UI Light", "Segoe UI Semibold", "Segoe UI Semilight", "Segoe UI Symbol", "Segoe UI Variable", "SimSun", "SimSun-ExtB", "Sitka", "Sitka Text", "Sylfaen", "Symbol", "Tahoma", "Times New Roman", "Trebuchet MS", "Twemoji Mozilla", "Verdana", "Webdings", "Wingdings", "Yu Gothic", "Yu Gothic Light", "Yu Gothic Medium", "Yu Gothic UI", "Yu Gothic UI Light", "Yu Gothic UI Semibold", "Yu Gothic UI Semilight", "\u5b8b\u4f53", "\u5fae\u8edf\u6b63\u9ed1\u9ad4", "\u5fae\u8edf\u6b63\u9ed1\u9ad4 Light", "\u5fae\u8f6f\u96c5\u9ed1", "\u5fae\u8f6f\u96c5\u9ed1 Light", "\u65b0\u5b8b\u4f53", "\u65b0\u7d30\u660e\u9ad4-ExtB", "\u6e38\u30b4\u30b7\u30c3\u30af", "\u6e38\u30b4\u30b7\u30c3\u30af Light", "\u6e38\u30b4\u30b7\u30c3\u30af Medium", "\u7d30\u660e\u9ad4-ExtB", "\u7d30\u660e\u9ad4_HKSCS-ExtB", "\ub9d1\uc740 \uace0\ub515", "\ub9d1\uc740 \uace0\ub515 Semilight", "\uff2d\uff33 \u30b4\u30b7\u30c3\u30af", "\uff2d\uff33 \uff30\u30b4\u30b7\u30c3\u30af"
],
"mac": [

100
pythonlib/camoufox/ip.py Normal file
View file

@ -0,0 +1,100 @@
import re
from dataclasses import dataclass
from functools import lru_cache
from typing import Dict, Optional, Tuple
import requests
from .exceptions import InvalidIP, InvalidProxy
"""
Helpers to find the user's public IP address for geolocation.
"""
@dataclass
class Proxy:
"""
Stores proxy information.
"""
server: str
username: str
password: str
@staticmethod
def parse_server(server: str) -> Tuple[str, str, Optional[str]]:
"""
Parses the proxy server string.
"""
proxy_match = re.match(r'^(?P<schema>\w+)://(?P<url>.*?)(?:\:(?P<port>\d+))?$', server)
if not proxy_match:
raise InvalidProxy(f"Invalid proxy server: {server}")
return proxy_match['schema'], proxy_match['url'], proxy_match['port']
def as_string(self) -> str:
schema, url, port = self.parse_server(self.server)
result = f"{schema}://"
if self.username:
result += f"{self.username}"
if self.password:
result += f":{self.password}"
result += f"@{url}"
if port:
result += f":{port}"
return result
@staticmethod
def as_requests_proxy(proxy_string: str) -> Dict[str, str]:
"""
Converts the proxy to a requests proxy dictionary.
"""
return {
'http': proxy_string,
'https': proxy_string,
}
def valid_ipv4(ip: str) -> bool:
return bool(re.match(r'^(?:[0-9]{1,3}\.){3}[0-9]{1,3}$', ip))
def valid_ipv6(ip: str) -> bool:
return bool(re.match(r'^(([0-9a-fA-F]{0,4}:){1,7}[0-9a-fA-F]{0,4})$', ip))
def validate_ip(ip: str) -> None:
if not valid_ipv4(ip) and not valid_ipv6(ip):
raise InvalidIP(f"Invalid IP address: {ip}")
@lru_cache(maxsize=None)
def public_ip(proxy: Optional[str] = None) -> str:
"""
Sends a request to a public IP api
"""
URLS = [
# Prefers IPv4
"https://api.ipify.org",
"https://checkip.amazonaws.com",
"https://ipinfo.io/ip",
# IPv4 & IPv6
"https://icanhazip.com",
"https://ifconfig.co/ip",
"https://ipecho.net/plain",
]
for url in URLS:
try:
resp = requests.get(
url,
proxies=Proxy.as_requests_proxy(proxy) if proxy else None,
timeout=5,
)
resp.raise_for_status()
ip = resp.text.strip()
validate_ip(ip)
return ip
except (requests.RequestException, InvalidIP):
pass
raise InvalidIP("Failed to get IP address")

View file

@ -0,0 +1,274 @@
import os
import xml.etree.ElementTree as ET # nosec
from dataclasses import dataclass
from pathlib import Path
from random import choice as randchoice
from typing import Any, Dict, List, Optional, Tuple, cast
import numpy as np
from language_tags import tags
from camoufox.pkgman import rprint, webdl
from .exceptions import NotInstalledGeoIPExtra, UnknownIPLocation, UnknownTerritory
from .ip import validate_ip
try:
import geoip2.database # type: ignore
except ImportError:
ALLOW_GEOIP = False
else:
ALLOW_GEOIP = True
LOCAL_DATA = Path(os.path.abspath(__file__)).parent
"""
Data structures for locale and geolocation info
"""
@dataclass
class Locale:
"""
Stores locale, region, and script information.
"""
language: str
region: str
script: Optional[str] = None
@property
def as_string(self) -> str:
return f"{self.language}-{self.region}"
def as_config(self) -> Dict[str, str]:
"""
Converts the locale to a config dictionary.
"""
data = {
'locale:region': self.region,
'locale:language': self.language,
}
if self.script:
data['locale:script'] = self.script
return data
@dataclass
class Geolocation:
"""
Stores geolocation information.
"""
locale: Locale
longitude: float
latitude: float
timezone: str
accuracy: Optional[float] = None
def as_config(self) -> Dict[str, Any]:
"""
Converts the geolocation to a config dictionary.
"""
data = {
'geolocation:longitude': self.longitude,
'geolocation:latitude': self.latitude,
'timezone': self.timezone,
**self.locale.as_config(),
}
if self.accuracy:
data['geolocation:accuracy'] = self.accuracy
return data
"""
Helpers to validate and normalize locales
"""
def verify_locales(locales: List[str]) -> None:
"""
Verifies that all locales are valid.
"""
for loc in locales:
if tags.check(loc):
continue
raise ValueError(
f"Invalid locale: '{loc}'. All locales must be in the format of language[-script][-region]"
)
def normalize_locale(locale: str) -> Locale:
"""
Normalizes and validates a locale code.
"""
locales = locale.split(',')
verify_locales(locales)
if len(locales) > 1:
locale = randchoice(locales) # nosec
# Parse the locale
parser = tags.tag(locale)
if not parser.region:
raise ValueError(f"Invalid locale: {locale}. Region is required.")
record = parser.language.data['record']
# Return a formatted locale object
return Locale(
language=record['Subtag'],
region=parser.region.data['record']['Subtag'],
script=record.get('Suppress-Script'),
)
"""
Helpers to fetch geolocation, timezone, and locale data given an IP.
"""
MMDB_FILE = LOCAL_DATA / 'GeoLite2-City.mmdb'
MMDB_URL = 'https://github.com/P3TERX/GeoLite.mmdb/releases/latest/download/GeoLite2-City.mmdb'
def geoip_allowed() -> None:
"""
Checks if the geoip2 module is available.
"""
if not ALLOW_GEOIP:
raise NotInstalledGeoIPExtra(
'Please install the geoip extra to use this feature: pip install camoufox[geoip]'
)
def download_mmdb() -> None:
"""
Downloads the MaxMind GeoIP2 database.
"""
geoip_allowed()
with open(MMDB_FILE, 'wb') as f:
webdl(
MMDB_URL,
desc='Downloading GeoIP database',
buffer=f,
)
def remove_mmdb() -> None:
"""
Removes the MaxMind GeoIP2 database.
"""
if not MMDB_FILE.exists():
rprint("GeoIP database not found.")
return
MMDB_FILE.unlink()
rprint("GeoIP database removed.")
def get_geolocation(ip: str) -> Geolocation:
"""
Gets the geolocation for an IP address.
"""
# Check if the database is downloaded
if not MMDB_FILE.exists():
download_mmdb()
# Validate the IP address
validate_ip(ip)
with geoip2.database.Reader(str(MMDB_FILE)) as reader:
resp = reader.city(ip)
iso_code = cast(str, resp.registered_country.iso_code)
location = resp.location
# Check if any required attributes are missing
if any(not getattr(location, attr) for attr in ('longitude', 'latitude', 'time_zone')):
raise UnknownIPLocation(f"Unknown IP location: {ip}")
# Get a statistically correct locale based on the country code
locale_finder = GetLocaleFromTerritory(iso_code)
locale = locale_finder.get_locale()
return Geolocation(
locale=locale,
longitude=cast(float, resp.location.longitude),
latitude=cast(float, resp.location.latitude),
timezone=cast(str, resp.location.time_zone),
)
"""
Gets a random language based on the territory code.
"""
def get_unicode_info() -> ET.Element:
"""
Fetches supplemental data from the territoryInfo.xml file.
Source: https://raw.githubusercontent.com/unicode-org/cldr/master/common/supplemental/supplementalData.xml
"""
with open(LOCAL_DATA / 'territoryInfo.xml', 'rb') as f:
data = ET.XML(f.read())
assert data is not None, 'Failed to load territoryInfo.xml'
return data
class GetLocaleFromTerritory:
"""
Calculates a random language based on the territory code,
based on the probability that a person speaks the language in the territory.
"""
def __init__(self, iso_code: str):
self.iso_code = iso_code.upper()
self.root = get_unicode_info()
self.languages, self.probabilities = self._load_territory_data()
def _load_territory_data(self) -> Tuple[np.ndarray, np.ndarray]:
territory = self.root.find(f"territory[@type='{self.iso_code}']")
if territory is None:
raise UnknownTerritory(f"Unknown territory: {self.iso_code}")
lang_population = territory.findall('languagePopulation')
if not lang_population:
raise ValueError(f"No language data found for territory: {self.iso_code}")
# Use list comprehension for faster data extraction
languages = np.array([lang.get('type') for lang in lang_population])
percentages = np.array(
[float(lang.get('populationPercent', '0')) for lang in lang_population]
)
# Normalize probabilities
total = np.sum(percentages)
probabilities = percentages / total
return languages, probabilities
def get_random_language(self) -> str:
"""
Get a random language based on the territory ISO code.
"""
return np.random.choice(self.languages, p=self.probabilities)
def get_locale(self) -> Locale:
"""
Get a random locale based on the territory ISO code.
Returns as a Locale object.
"""
language = self.get_random_language()
return normalize_locale(f"{language}-{self.iso_code}")
if __name__ == "__main__":
# Extra tests...
from timeit import timeit
print('LanguageSelector:', timeit(lambda: GetLocaleFromTerritory('ES'), number=100))
ts = GetLocaleFromTerritory('ES')
print('get_random_language:', timeit(lambda: ts.get_random_language(), number=10000))

View file

@ -5,7 +5,7 @@ import shlex
import shutil
import sys
import tempfile
from io import BytesIO
from io import BufferedWriter, BytesIO
from pathlib import Path
from typing import List, Optional, Union
from zipfile import ZipFile
@ -19,7 +19,7 @@ from typing_extensions import TypeAlias
from .exceptions import UnsupportedArchitecture, UnsupportedOS
DownloadBuffer: TypeAlias = Union[BytesIO, tempfile._TemporaryFileWrapper]
DownloadBuffer: TypeAlias = Union[BytesIO, tempfile._TemporaryFileWrapper, BufferedWriter]
# Map machine architecture to Camoufox binary name
ARCH_MAP: dict[str, str] = {
@ -58,8 +58,8 @@ def rprint(*a, **k):
class CamoufoxFetcher:
def __init__(self) -> None:
self.arch = self.get_platform_arch()
self._version: str | None = None
self._release: str | None = None
self._version: Optional[str] = None
self._release: Optional[str] = None
self.pattern: re.Pattern = re.compile(rf'camoufox-(.+)-(.+)-{OS_NAME}\.{self.arch}\.zip')
self.fetch_latest()

View file

@ -33,72 +33,79 @@ def NewBrowser(
playwright: Playwright,
*,
config: Optional[Dict[str, Any]] = None,
addons: Optional[List[str]] = None,
fingerprint: Optional[Fingerprint] = None,
exclude_addons: Optional[List[DefaultAddons]] = None,
screen: Optional[Screen] = None,
os: Optional[ListOrString] = None,
user_agent: Optional[ListOrString] = None,
fonts: Optional[List[str]] = None,
args: Optional[List[str]] = None,
executable_path: Optional[str] = None,
block_images: Optional[bool] = None,
block_webrtc: Optional[bool] = None,
allow_webgl: Optional[bool] = None,
geoip: Optional[Union[str, bool]] = None,
locale: Optional[str] = None,
addons: Optional[List[str]] = None,
fonts: Optional[List[str]] = None,
exclude_addons: Optional[List[DefaultAddons]] = None,
fingerprint: Optional[Fingerprint] = None,
screen: Optional[Screen] = None,
executable_path: Optional[str] = None,
firefox_user_prefs: Optional[Dict[str, Any]] = None,
proxy: Optional[Dict[str, str]] = None,
ff_version: Optional[int] = None,
args: Optional[List[str]] = None,
env: Optional[Dict[str, Union[str, float, bool]]] = None,
**launch_options: Dict[str, Any]
) -> Browser:
"""
Launches a new browser instance for Camoufox.
Accepts all Playwright Firefox launch options, along with the following:
Parameters:
playwright (Playwright):
Playwright instance to use.
config (Optional[Dict[str, Any]]):
Configuration to use.
addons (Optional[List[str]]):
Addons to use.
fingerprint (Optional[Fingerprint]):
BrowserForge fingerprint to use.
exclude_addons (Optional[List[DefaultAddons]]):
Default addons to exclude. Passed as a list of camoufox.DefaultAddons enums.
screen (Optional[browserforge.fingerprints.Screen]):
BrowserForge screen constraints to use.
Camoufox properties to use. (read https://github.com/daijro/camoufox/blob/main/README.md)
os (Optional[ListOrString]):
Operating system to use for the fingerprint. Either a string or a list of strings.
user_agent (Optional[ListOrString]):
User agent to use for the fingerprint. Either a string or a list of strings.
fonts (Optional[List[str]]):
Fonts to load into Camoufox, in addition to the default fonts.
args (Optional[List[str]]):
Arguments to pass to the browser.
Operating system to use for the fingerprint generation.
Can be "windows", "macos", or "linux", or a list of these to choose from randomly.
Default: ["windows", "macos", "linux"]
block_images (Optional[bool]):
Whether to block all images.
block_webrtc (Optional[bool]):
Whether to block WebRTC entirely.
firefox_user_prefs (Optional[Dict[str, Any]]):
Firefox user preferences to set.
env (Optional[Dict[str, Union[str, float, bool]]]):
Environment variables to set.
allow_webgl (Optional[bool]):
Whether to allow WebGL. To prevent leaks, only use this for special cases.
geoip (Optional[Union[str, bool]]):
Calculate longitude, latitude, timezone, country, & locale based on the IP address.
Pass the target IP address to use, or `True` to find the IP address automatically.
locale (Optional[str]):
Locale to use in Camoufox.
addons (Optional[List[str]]):
List of Firefox addons to use.
fonts (Optional[List[str]]):
Fonts to load into Camoufox (in addition to the default fonts for the target `os`).
Takes a list of font family names that are installed on the system.
exclude_addons (Optional[List[DefaultAddons]]):
Default addons to exclude. Passed as a list of camoufox.DefaultAddons enums.
fingerprint (Optional[Fingerprint]):
Use a custom BrowserForge fingerprint. Note: Not all values will be implemented.
If not provided, a random fingerprint will be generated based on the provided os & user_agent.
screen (Optional[Screen]):
NOT YET IMPLEMENTED: Constrains the screen dimensions of the generated fingerprint.
Takes a browserforge.fingerprints.Screen instance.
executable_path (Optional[str]):
Custom Camoufox browser executable path.
firefox_user_prefs (Optional[Dict[str, Any]]):
Firefox user preferences to set.
proxy (Optional[Dict[str, str]]):
Proxy to use for the browser.
Note: If geoip is True, a request will be sent through this proxy to find the target IP.
ff_version (Optional[int]):
Firefox version to use. Defaults to the current Camoufox version.
To prevent leaks, only use this for special cases.
args (Optional[List[str]]):
Arguments to pass to the browser.
env (Optional[Dict[str, Union[str, float, bool]]]):
Environment variables to set.
**launch_options (Dict[str, Any]):
Additional Firefox launch options.
"""
opt = get_launch_options(
config=config,
addons=addons,
fingerprint=fingerprint,
exclude_addons=exclude_addons,
screen=screen,
os=os,
user_agent=user_agent,
fonts=fonts,
args=args,
executable_path=executable_path,
env=env,
block_images=block_images,
block_webrtc=block_webrtc,
firefox_user_prefs=firefox_user_prefs,
)
return playwright.firefox.launch(**opt, **launch_options)
data = locals()
data.pop('playwright')
opt = get_launch_options(**data)
return playwright.firefox.launch(**opt)

File diff suppressed because it is too large Load diff

View file

@ -18,7 +18,9 @@ from .addons import (
)
from .exceptions import InvalidPropertyType, UnknownProperty
from .fingerprints import from_browserforge, generate
from .pkgman import OS_NAME, get_path
from .ip import Proxy, public_ip, valid_ipv4, valid_ipv6
from .locale import geoip_allowed, get_geolocation, normalize_locale
from .pkgman import OS_NAME, get_path, installed_verstr
from .xpi_dl import add_default_addons
LAUNCH_FILE = {
@ -151,6 +153,25 @@ def update_fonts(config: Dict[str, Any], target_os: str) -> None:
config['fonts'] = fonts
def merge_into(target: Dict[str, Any], source: Dict[str, Any]) -> None:
"""
Merges new keys/values from the source dictionary into the target dictionary.
Given that the key does not exist in the target dictionary.
"""
for key, value in source.items():
if key not in target:
target[key] = value
def set_into(target: Dict[str, Any], key: str, value: Any) -> None:
"""
Sets a new key/value into the target dictionary.
Given that the key does not exist in the target dictionary.
"""
if key not in target:
target[key] = value
def get_launch_options(
*,
config: Optional[Dict[str, Any]] = None,
@ -158,20 +179,25 @@ def get_launch_options(
fingerprint: Optional[Fingerprint] = None,
exclude_addons: Optional[List[DefaultAddons]] = None,
screen: Optional[Screen] = None,
geoip: Optional[Union[str, bool]] = None,
locale: Optional[str] = None,
os: Optional[ListOrString] = None,
user_agent: Optional[ListOrString] = None,
fonts: Optional[List[str]] = None,
args: Optional[List[str]] = None,
executable_path: Optional[str] = None,
env: Optional[Dict[str, Union[str, float, bool]]] = None,
block_images: Optional[bool] = None,
block_webrtc: Optional[bool] = None,
allow_webgl: Optional[bool] = None,
proxy: Optional[Dict[str, str]] = None,
ff_version: Optional[int] = None,
firefox_user_prefs: Optional[Dict[str, Any]] = None,
launch_options: Optional[Dict[str, Any]] = None,
) -> Dict[str, Any]:
"""
Builds the launch options for the Camoufox browser.
"""
# Validate the config
# Build the config
if config is None:
config = {}
@ -179,6 +205,8 @@ def get_launch_options(
addons = []
if args is None:
args = []
if firefox_user_prefs is None:
firefox_user_prefs = {}
# Add the default addons
add_default_addons(addons, exclude_addons)
@ -187,42 +215,74 @@ def get_launch_options(
if addons:
confirm_paths(addons)
# Get the Firefox version
if ff_version:
ff_version_str = str(ff_version)
else:
ff_version_str = installed_verstr().split('.', 1)[0]
# Generate new fingerprint
if fingerprint is None:
config = {
**generate(
merge_into(
config,
generate(
ff_version=ff_version_str,
screen=screen,
os=os,
user_agent=user_agent,
),
**config,
}
)
else:
config = {
**from_browserforge(fingerprint),
**config,
}
merge_into(
config,
from_browserforge(fingerprint, ff_version_str),
)
target_os = get_target_os(config)
# Set a random window.history.length
config['window.history.length'] = randrange(1, 6)
if fonts:
config['fonts'] = fonts
validate_config(config)
set_into(config, 'window.history.length', randrange(1, 6)) # nosec
# Update fonts list
target_os = get_target_os(config)
if fonts:
config['fonts'] = fonts
update_fonts(config, target_os)
# Set Firefox user preferences
if firefox_user_prefs is None:
firefox_user_prefs = {}
# Set geolocation
if geoip:
geoip_allowed() # Assert that geoip is allowed
if geoip is True:
# Find the user's IP address
if proxy:
geoip = public_ip(Proxy(**proxy).as_string())
else:
geoip = public_ip()
# Spoof WebRTC if not blocked
if not block_webrtc:
if valid_ipv4(geoip):
set_into(config, 'webrtc:ipv4', geoip)
firefox_user_prefs['network.dns.disableIPv6'] = True
elif valid_ipv6(geoip):
set_into(config, 'webrtc:ipv6', geoip)
geolocation = get_geolocation(geoip)
config.update(geolocation.as_config())
# Set locale
if locale:
parsed_locale = normalize_locale(locale)
config.update(parsed_locale.as_config())
# Validate the config
validate_config(config)
# Set Firefox user preferences
if block_images:
firefox_user_prefs['permissions.default.image'] = 2
if block_webrtc:
firefox_user_prefs['media.peerconnection.enabled'] = False
if allow_webgl:
firefox_user_prefs['webgl.disabled'] = False
# Launch
threaded_try_load_addons(get_debug_port(args), addons)
@ -235,4 +295,6 @@ def get_launch_options(
"args": args,
"env": env_vars,
"firefox_user_prefs": firefox_user_prefs,
"proxy": proxy,
**(launch_options if launch_options is not None else {}),
}

View file

@ -4,7 +4,7 @@ build-backend = "poetry.core.masonry.api"
[tool.poetry]
name = "camoufox"
version = "0.1.3"
version = "0.2.0"
description = "Wrapper around Playwright to help launch Camoufox"
authors = ["daijro <daijro.dev@gmail.com>"]
license = "MIT"
@ -38,6 +38,12 @@ tqdm = "*"
numpy = "*"
ua_parser = "*"
typing_extensions = "*"
lxml = "*"
language-tags = "*"
geoip2 = {version = "*", optional = true}
[tool.poetry.extras]
geoip = ["geoip2"]
[tool.poetry.scripts]
camoufox = "camoufox.__main__:cli"