From bd59304481e4d07200b02e7dc81cad93aaaf3d6c Mon Sep 17 00:00:00 2001 From: daijro Date: Tue, 8 Oct 2024 05:10:12 -0500 Subject: [PATCH] pythonlib: Add Xvfb integration #26 0.2.9 - Automatically uses virtual displays on Linux - Update documentation on README --- pythonlib/README.md | 52 ++++++++++++- pythonlib/camoufox/exceptions.py | 25 +++++++ pythonlib/camoufox/pkgman.py | 2 +- pythonlib/camoufox/utils.py | 54 +++++++++++--- pythonlib/camoufox/virtdisplay.py | 120 ++++++++++++++++++++++++++++++ pythonlib/camoufox/warnings.yml | 14 +++- pythonlib/pyproject.toml | 2 +- 7 files changed, 251 insertions(+), 18 deletions(-) create mode 100644 pythonlib/camoufox/virtdisplay.py diff --git a/pythonlib/README.md b/pythonlib/README.md index adb4906..ae892f6 100644 --- a/pythonlib/README.md +++ b/pythonlib/README.md @@ -8,6 +8,16 @@ --- +## What is this? + +This Python library wraps around Playwright's API to help automatically generate & inject unique device characteristics (OS, CPU info, navigator, fonts, headers, screen dimensions, viewport size, WebGL, addons, etc.) into Camoufox. + +It uses [BrowserForge](https://github.com/daijro/browserforge) under the hood to generate fingerprints that mimic the statistical distribution of device characteristics in real-world traffic. + +In addition, it will also calculate your target geolocation, timezone, and locale to avoid proxy protection ([see demo](https://i.imgur.com/UhSHfaV.png)). + +--- + ## Installation First, install the `camoufox` package: @@ -150,6 +160,8 @@ Parameters: +Camoufox will warn you if your passed configuration might cause leaks. + --- ### Config @@ -169,6 +181,8 @@ with Camoufox( page.goto("https://www.browserscan.net/webrtc") ``` +This can be used to enable fetures that have not yet been implemented into the Python library. (You shouldn't use this for injecting device fingerprints. Device data is automatically populated.) +
### GeoIP & Proxy Support @@ -177,6 +191,8 @@ By passing `geoip=True`, or passing in a target IP address, Camoufox will automa It will also calculate and spoof the browser's language based on the distribution of language speakers in the target region. +[See demo](https://i.imgur.com/UhSHfaV.png). + #### Installation Install Camoufox with the `geoip` extra: @@ -206,8 +222,9 @@ with Camoufox( ### Remote Server (experimental) -> [!WARNING] -> This feature is experimental and not meant for production use. It uses a hacky workaround to gain access to undocumented Playwright methods. +**Warning! This feature is experimental. It uses a hacky workaround to gain access to undocumented Playwright methods.** + +Camoufox can be ran as a remote websocket server. It can be accessed from other devices, and languages other than Python supporting the Playwright API. #### Launching @@ -251,6 +268,35 @@ with sync_playwright() as p:
+### Virtual Display + +In headless mode, all browsers are prone to being detected by anti-bot services due to the drastic differences in the browser's architecture. It is generally **NOT** recommended to use Camoufox in headless mode on a non-Linux OS. + +If you are running Linux, and would like to run Camoufox headlessly in a virtual display, install `xvfb`: + +#### Debian-based distros + +```bash +sudo apt-get install xvfb +``` + +#### Arch-based distros + +```bash +sudo pacman -S xorg-server-xvfb +``` + +#### Confirm `Xvfb` is installed: + +```bash +$ which Xvfb +/usr/bin/Xvfb +``` + +Now, passing `headless=True` will spawn a new lightweight virtual display in the background for Camoufox to run in. + +
+ ### BrowserForge Integration Camoufox is compatible with [BrowserForge](https://github.com/daijro/browserforge) fingerprints. @@ -271,7 +317,7 @@ with Camoufox( **Notes:** -- If Camoufox is being ran in headful mode, the max screen size will be generated based on your monitor's dimensions (+15%). +- If Camoufox is being ran in headful mode, the max screen size will be generated based on your monitor's dimensions unless otherwise specified. - To prevent UA-spoofing leaks, Camoufox only generates fingerprints with the same browser version as the current Camoufox version by default. diff --git a/pythonlib/camoufox/exceptions.py b/pythonlib/camoufox/exceptions.py index bcbb6d5..c3d02b3 100644 --- a/pythonlib/camoufox/exceptions.py +++ b/pythonlib/camoufox/exceptions.py @@ -116,3 +116,28 @@ class InvalidOS(ValueError): """ ... + + +class VirtualDisplayError(Exception): + """ + Raised when there is an error with the virtual display. + """ + + ... + + +class CannotFindXvfb(VirtualDisplayError): + """ + Raised when Xvfb cannot be found. + """ + + ... + pass + + +class CannotExecuteXvfb(VirtualDisplayError): + """ + Raised when Xvfb cannot be executed. + """ + + ... diff --git a/pythonlib/camoufox/pkgman.py b/pythonlib/camoufox/pkgman.py index d7579d7..e4152c6 100644 --- a/pythonlib/camoufox/pkgman.py +++ b/pythonlib/camoufox/pkgman.py @@ -344,5 +344,5 @@ def load_yaml(file: str) -> dict: """ Loads a local YAML file and returns it as a dictionary. """ - with open(Path(__file__).parent / file, 'r') as f: + with open(LOCAL_DATA / file, 'r') as f: return load(f, Loader=CLoader) diff --git a/pythonlib/camoufox/utils.py b/pythonlib/camoufox/utils.py index 3534d66..b720268 100644 --- a/pythonlib/camoufox/utils.py +++ b/pythonlib/camoufox/utils.py @@ -3,6 +3,7 @@ import sys from os import environ from pprint import pprint from random import randrange +from shutil import which from typing import Any, Dict, List, Literal, Optional, Tuple, Union, cast import numpy as np @@ -31,6 +32,9 @@ from .pkgman import OS_NAME, get_path, installed_verstr from .warnings import LeakWarning from .xpi_dl import add_default_addons +if OS_NAME == 'lin': + from .virtdisplay import VIRTUAL_DISPLAY + LAUNCH_FILE = { 'win': 'camoufox.exe', 'lin': 'camoufox-bin', @@ -162,8 +166,7 @@ def get_screen_cons(headless: Optional[bool] = None) -> Optional[Screen]: # Use the dimensions from the monitor with greatest screen real estate monitor = max(monitors, key=lambda m: m.width * m.height) - # Add 15% buffer - return Screen(max_width=int(monitor.width * 1.15), max_height=int(monitor.height * 1.15)) + return Screen(max_width=monitor.width, max_height=monitor.height) def update_fonts(config: Dict[str, Any], target_os: str) -> None: @@ -224,6 +227,35 @@ def _clean_locals(data: Dict[str, Any]) -> Dict[str, Any]: return data +def handle_headless( + headless: Optional[bool], + env: Dict[str, Union[str, float, bool]], + debug: Optional[bool], + i_know_what_im_doing: Optional[bool], +) -> bool: + """ + Handles the headless mode. + """ + # If headless is not being used, return False + if not headless: + return False + + # Warn the user if headless is being used on a non-Linux OS + # https://github.com/daijro/camoufox/issues/26 + if OS_NAME != 'lin': + LeakWarning.warn('headless-non-linux', i_know_what_im_doing) + return True + + # If Xvfb is avaliable, use it instead of headless to prevent leaks + if which('Xvfb'): + env['DISPLAY'] = VIRTUAL_DISPLAY.new_or_reuse(debug=debug) + return False + + # If Linux is being used and Xvfb is not avaliable, warn the user + LeakWarning.warn('headless-linux', i_know_what_im_doing) + return True + + def merge_into(target: Dict[str, Any], source: Dict[str, Any]) -> None: """ Merges new keys/values from the source dictionary into the target dictionary. @@ -324,13 +356,11 @@ def get_launch_options( firefox_user_prefs = {} if i_know_what_im_doing is None: i_know_what_im_doing = False + if env is None: + env = cast(Dict[str, Union[str, float, bool]], environ) - # Warn the user if headless is being used - # https://github.com/daijro/camoufox/issues/26 - if headless: - LeakWarning.warn('headless', i_know_what_im_doing) - elif headless is None: - headless = False + # Handle headless mode cases + headless = handle_headless(headless, env, debug, i_know_what_im_doing) # Warn the user for manual config settings if not i_know_what_im_doing: @@ -350,13 +380,14 @@ def get_launch_options( # Get the Firefox version if ff_version: ff_version_str = str(ff_version) + LeakWarning.warn('ff_version', i_know_what_im_doing) else: ff_version_str = installed_verstr().split('.', 1)[0] # Generate a fingerprint if fingerprint is None: fingerprint = generate_fingerprint( - screen=screen or get_screen_cons(headless), + screen=screen or get_screen_cons(headless or 'DISPLAY' in env), os=os, ) else: @@ -402,7 +433,8 @@ def get_launch_options( geolocation = get_geolocation(geoip) config.update(geolocation.as_config()) - # Raise a warning when a proxy is being used without spoofing geolocation + # Raise a warning when a proxy is being used without spoofing geolocation. + # This is a very bad idea; the warning cannot be ignored with i_know_what_im_doing. elif ( proxy and 'localhost' not in proxy.get('server', '') @@ -444,7 +476,7 @@ def get_launch_options( # Prepare environment variables to pass to Camoufox env_vars = { **get_env_vars(config, target_os), - **(cast(Dict[str, Union[str, float, bool]], environ) if env is None else env), + **env, } return { "executable_path": executable_path or get_path(LAUNCH_FILE[OS_NAME]), diff --git a/pythonlib/camoufox/virtdisplay.py b/pythonlib/camoufox/virtdisplay.py new file mode 100644 index 0000000..1e09ffd --- /dev/null +++ b/pythonlib/camoufox/virtdisplay.py @@ -0,0 +1,120 @@ +import os +import subprocess # nosec +from glob import glob +from shutil import which +from typing import List, Optional + +from camoufox.exceptions import CannotExecuteXvfb, CannotFindXvfb + + +class VirtualDisplay: + """ + A minimal virtual display implementation for Linux. + """ + + def __init__(self) -> None: + """ + Constructor for the VirtualDisplay class (singleton object). + """ + self.proc: Optional[subprocess.Popen] = None + self._display: Optional[int] = None + + xvfb_args = ( + # fmt: off + "-screen", "0", "1x1x8", + "-ac", + "-nolisten", "tcp", + "-extension", "RENDER", + "-extension", "GLX", + "-extension", "COMPOSITE", + "-extension", "XVideo", + "-extension", "XVideo-MotionCompensation", + "-extension", "XINERAMA", + "-shmem", + "-fp", "built-ins", + "-nocursor", + "-br", + # fmt: on + ) + + @property + def xvfb_path(self) -> str: + """ + Get the path to the xvfb executable + """ + path = which("Xvfb") + if not path: + raise CannotFindXvfb("Please install Xvfb to use headless mode.") + if not os.access(path, os.X_OK): + raise CannotExecuteXvfb(f"I do not have permission to execute Xvfb: {path}") + return path + + @property + def xvfb_cmd(self) -> List[str]: + """ + Get the xvfb command + """ + return [self.xvfb_path, f':{self.display}', *self.xvfb_args] + + def execute_xvfb_singleton(self, debug: Optional[bool] = False): + """ + Spawn a detatched process + """ + if debug: + print('Starting virtual display:', ' '.join(self.xvfb_cmd)) + self.proc = subprocess.Popen( # nosec + self.xvfb_cmd, + stdout=None if debug else subprocess.DEVNULL, + stderr=None if debug else subprocess.DEVNULL, + ) + + def new_or_reuse(self, debug: Optional[bool] = False) -> str: + """ + Get the display number + """ + if self.proc is None: + self.execute_xvfb_singleton(debug) + elif debug: + print(f'Using virtual display: {self.display}') + return f':{self.display}' + + def __del__(self): + """ + Terminate the xvfb process + """ + if self.proc: + self.proc.terminate() + + @staticmethod + def _get_lock_files() -> List[str]: + """ + Get list of lock files in /tmp + """ + tmpd = os.environ.get('TMPDIR', '/tmp') # nosec + try: + lock_files = glob(os.path.join(tmpd, ".X*-lock")) + except FileNotFoundError: + return [] + return [p for p in lock_files if os.path.isfile(p)] + + @staticmethod + def _free_display() -> int: + """ + Search for free display + """ + ls = list( + map(lambda x: int(x.split("X")[1].split("-")[0]), VirtualDisplay._get_lock_files()) + ) + return max(99, max(ls) + 3) if ls else 99 + + @property + def display(self) -> int: + """ + Get the display number + """ + if self._display is None: + self._display = self._free_display() + return self._display + + +VIRTUAL_DISPLAY = VirtualDisplay() diff --git a/pythonlib/camoufox/warnings.yml b/pythonlib/camoufox/warnings.yml index 9d97183..0aefae7 100644 --- a/pythonlib/camoufox/warnings.yml +++ b/pythonlib/camoufox/warnings.yml @@ -1,7 +1,12 @@ -headless: >- - Headless mode in Camoufox is not recommended at this time. +headless-non-linux: >- + Headless mode is only recommended on Linux at this time. Some WAFs are able to detect headless browsers. The issue is currently being investigated. +headless-linux: >- + Headless mode is only recommended on Linux with Xvfb installed. + Please see the install guide here: + https://github.com/daijro/camoufox/tree/main/pythonlib#virtual-display + navigator: >- Manually setting navigator properties is not recommended. Device information is automatically generated within Camoufox @@ -30,3 +35,8 @@ proxy_without_geoip: >- allow_webgl: >- Enabling WebGL can lead to Canvas fingerprinting and detection. Camoufox will automatically spoof your vendor and renderer, but it cannot spoof your WebGL fingerprint. + +ff_version: >- + Spoofing the Firefox version will likely lead to detection. + If rotating the Firefox version is absolutely necessary, it would be more advisable to + rotate between older versions of Camoufox instead. \ No newline at end of file diff --git a/pythonlib/pyproject.toml b/pythonlib/pyproject.toml index a861b1a..1b38ba3 100644 --- a/pythonlib/pyproject.toml +++ b/pythonlib/pyproject.toml @@ -4,7 +4,7 @@ build-backend = "poetry.core.masonry.api" [tool.poetry] name = "camoufox" -version = "0.2.8" +version = "0.2.9" description = "Wrapper around Playwright to help launch Camoufox" authors = ["daijro "] license = "MIT"