diff --git a/.gitignore b/.gitignore index 53aa603..ad5a06c 100644 --- a/.gitignore +++ b/.gitignore @@ -10,6 +10,7 @@ launch.exe # Internal testing /extra-docs pythonlib/test* +jsonvv/test* /.vscode /bundle/fonts/extra pythonlib/*.png diff --git a/jsonvv/README.md b/jsonvv/README.md new file mode 100644 index 0000000..d4874f2 --- /dev/null +++ b/jsonvv/README.md @@ -0,0 +1,706 @@ +# JSONvv + +JSON value validator + +## Overview + +This is a simple JSON schema validator library. It was created for Camoufox to validate passed user configurations. Because I found it useful for other projects, I decided to extract it into a separate library. + +JSONvv's syntax parser is written in pure Python. It does not rely on any dependencies. + +### Example + + + + + + + + + + +
ConfigurationValidator
+ +```python +config = { + "username": "johndoe", + "email": "johndoe@example.com", + "age": 30, + "chat": "Hello world!", + "preferences": { + "notifications": True, + "theme": "dark" + }, + "allowed_commands": [ + "/help", "/time", "/weather" + ], + "location": [40.7128, -74.0060], + "hobbies": [ + { + "name": "Traveling", + "cities": ["Paris", "London"] + }, + { + "name": "reading", + "hours": { + "Sunday": 2, + "Monday": 3, + } + } + ] +} +``` + + + +```python +validator = { + "username": "str", # Basic username + "email": "str[/\S+@\S+\.\S+/]", # Validate emails + "age": "int[>=18]", # Age must be 18 or older + "chat": "str | nil", # Optional chat message + "preferences": { + "notifications": "bool", + "theme": "str[light, dark] | nil", # Optional theme + }, + # Commands must start with "/", but not contain "sudo" + "allowed_commands": "array[str[/^//] - str[/sudo/]]", + # Validate coordinate ranges + "location": "tuple[double[-90 - 90], double[-180 - 180]]", + # Handle an array of hobby types + "hobbies": "array[@traveling | @other, >=1]", + "@traveling": { + # Require 1 or more cities/countries iff name is "Traveling" + "*name,type": "str[Traveling]", + "*cities,countries": "array[str[A-Za-z*], >=1]", + }, + "@other": { + "name,type": "str - str[Traveling]", # Non-traveling types + # If hour(s) is specified, require days have >0 hours + "/hours?/": { + "*/.*day/": "int[>0]" + } + } +} +``` + +
+ +
+ +Then, validate the configuration like this: + +```python +from jsonvv import JsonValidator, JvvRuntimeException + +val = JsonValidator(validator) +try: + val.validate(config) +except JvvRuntimeException as exc: + print("Failed:", exc) +else: + print('Config is valid!') +``` + +--- + +## Table of Contents + +- [Key Syntax](#key-syntax) + - [Regex patterns](#regex-patterns) + - [Lists of possible values](#lists-of-possible-values) + - [Required fields (`*`)](#required-fields-) +- [Supported Types](#supported-types) + - [String (`str`)](#string-str) + - [Integer (`int`)](#integer-int) + - [Double (`double`)](#double-double) + - [Boolean (`bool`)](#boolean-bool) + - [Array (`array`)](#array-array) + - [Tuple (`tuple`)](#tuple-tuple) + - [Nested Dictionaries](#nested-dictionaries) + - [Nil (`nil`)](#nil-nil) + - [Any (`any`)](#any-any) + - [Required fields (`*`)](#required-fields-) + - [Type References (`@`)](#type-references-) +- [Advanced Features](#advanced-features) + - [Subtracting Domains (`-`)](#subtracting-domains--) + - [Union Types (`|`)](#union-types-) + - [Conditional Ranges and Values](#conditional-ranges-and-values) +- [Error Handling](#error-handling) + +--- + +## Keys Syntax + +Dictionary keys can be specified in several possible ways: + +- `"key": "type"` +- `"key1,key2,key3": "type"` +- `"/key\d+/": "type"` +- `"*required_key": "type"` + +### Regex patterns + +To use regex in a key, wrap it in `/ ... /`. + +**Syntax:** + +```python +"/key\d+/": "type" +``` + +### Lists of possible values + +To specify a list of keys, use a comma-separated string. + +**Syntax:** + +```python +"key1,key2,key3": "type" +"/k[ey]{2}1/,key2": "type" +``` + +To escape a comma, use `_`. + +### Required fields (`*`) + +Fields marked with `*` are required. The validation will fail without them. + +**Syntax:** + +```python +"*key1": "type" +"*/key\d+/": "type" +``` + +--- + +## Supported Types + +### String (`str`) + +Represents a string value. Optionally, you can specify a regex pattern that the string must match. + +**Syntax:** + +- Basic string: `"str"` +- With regex pattern: `"str[regex_pattern]"` +- The escape character for regex is `\`, and for commas is `_`. + +**Arguments:** + +- `regex_pattern`: A regular expression that the string must match. If not specified, any string is accepted. + +**Examples:** + +1. Basic string: + + ```python + "username": "str" + ``` + + Accepts any string value for the key `username`. + +2. String with regex pattern: + + ```python + "fullname": "str[/[A-Z][a-z]+ [A-Z][a-z]+/]" + ``` + + Accepts a string that matches the pattern of a first and last name starting with uppercase letters. + +### Integer (`int`) + +Represents an integer value. You can specify conditions like exact values, ranges, and inequalities. + +**Syntax:** + +- Basic integer: `"int"` +- With conditions: `"int[conditions]"` + +**Arguments:** + +- `conditions`: A comma-separated list of conditions. + +**Condition Operators:** + +- `==`: Equal to a specific value. +- `>=`: Greater than or equal to a value. +- `<=`: Less than or equal to a value. +- `>`: Greater than a value. +- `<`: Less than a value. +- `range`: A range between two values (inclusive). + +**Examples:** + +1. Basic integer: + + ```python + "age": "int" + ``` + + Accepts any integer value for the key `age`. + +2. Integer with conditions: + + ```python + "userage": "int[>=0, <=120]" + ``` + + Accepts integer values between 0 and 120 inclusive. + +3. Specific values and ranges + + ```python + "rating": "int[1-5]" + "rating": "int[1,2,3,4-5]" + ``` + + Accepts integer values 1, 2, 3, 4, or 5. + +4. Ranges with negative numbers: + + ```python + "rating": "int[-100 - -90]" + ``` + + Accepts integer values from -100 to -90. + +### Double (`double`) + +Represents a floating-point number. Supports the same conditions as integers. + +**Syntax:** + +- Basic double: `"double"` +- With conditions: `"double[conditions]"` + +**Arguments:** + +- `conditions`: A comma-separated list of conditions. + +**Examples:** + +1. Basic double: + + ```python + "price": "double" + ``` + + Accepts any floating-point number for the key `price`. + +2. Double with conditions: + + ```python + "percentage": "double[>=0.0,<=100.0]" + ``` + + Accepts double values between 0.0 and 100.0 inclusive. + +### Boolean (`bool`) + +Represents a boolean value (`True` or `False`). + +**Syntax:** + +```python +"isActive": "bool" +``` + +Accepts a boolean value for the key `isActive`. + +### Array (`array`) + +Represents a list of elements of a specified type. You can specify conditions on the length of the array. + +**Syntax:** + +- Basic array: `"array[element_type]"` +- With length conditions: `"array[element_type,length_conditions]"` + +**Arguments:** + +- `element_type`: The type of the elements in the array. +- `length_conditions`: Conditions on the array length (same as integer conditions). + +**Examples:** + +1. Basic array: + + ```python + "tags": "array[str]" + ``` + + Accepts a list of strings for the key `tags`. + +2. Array with length conditions: + + ```python + "scores": "array[int[>=0,<=100],>=1,<=5]" + ``` + + Accepts a list of 1 to 5 integers between 0 and 100 inclusive. + +3. Fixed-length array: + + ```python + "coordinates": "array[double, 2]" + ``` + + Accepts a list of exactly 2 double values. + +4. More complex restraints: + ```python + "coordinates": "array[array[int[>0]] - tuple[1, 1]], 2]" + ``` + +### Tuple (`tuple`) + +Represents a fixed-size sequence of elements of specified types. + +**Syntax:** + +```python +"tuple[element_type1, element_type2]" +``` + +**Arguments:** + +- `element_typeN`: The type of the Nth element in the tuple. + +**Examples:** + +1. Basic tuple: + + ```python + "point": "tuple[int, int]" + ``` + + Accepts a tuple or list of two integers. + +2. Tuple with mixed types: + + ```python + "userInfo": "tuple[str, int, bool]" + ``` + + Accepts a tuple of a string, an integer, and a boolean. + +### Nested Dictionaries + +Represents a nested dictionary structure. Dictionaries are defined using Python's dictionary syntax `{}` in the type definitions. + +**Syntax:** + +```python +"settings": { + "volume": "int[>=0,<=100]", + "brightness": "int[>=0,<=100]", + "mode": "str" +} +``` + +**Usage:** + +- Define the expected keys and their types within the dictionary. +- You can use all the supported types for the values. + +**Examples:** + +1. Nested dictionary: + + ```python + "user": { + "name": "str", + "age": "int[>=0]", + "preferences": { + "theme": "str", + "notifications": "bool" + } + } + ``` + + Defines a nested dictionary structure for the key `user`. + +### Nil (`nil`) + +Represents a `None` value. + +**Syntax:** + +```python +"optionalValue": "int | nil" +``` + +**Usage:** + +- Use `nil` to allow a value to be `None`. +- Often used with union types to specify optional values. + +### Any (`any`) + +Represents any value. + +**Syntax:** + +```python +"metadata": "any" +``` + +**Usage:** + +- Use `any` when any value is acceptable. +- Useful for keys where the value is not constrained. + +### Type References (`@`) + +Allows you to define reusable types and reference them. + +**Syntax:** + +- Define a named type: + + ```python + "@typeName": "type_definition" + ``` + +- Reference a named type: + + ```python + "key": "@typeName" + ``` + +**Examples:** + +1. Defining and using a named type: + + ```python + "@positiveInt": "int[>0]" + "userId": "@positiveInt" + ``` + + Defines a reusable type `@positiveInt` and uses it for the key `userId`. + +--- + +## Advanced Features + +### Subtracting Domains (`-`) + +Allows you to specify that a value should not match a certain type or condition. + +**Syntax:** + +```python +"typeA - typeB" +``` + +**Usage:** + +- The value must match `typeA` but not `typeB`. + +**Examples:** + +1. Excluding certain strings: + + ```python + "message": "str - str[.*error.*]" + ``` + + Accepts any string that does not match the regex pattern `.*error.*`. + +2. Excluding a range of numbers: + + ```python + "score": "int[0-100] - int[>=90]" + ``` + + Accepts integers between 0 and 100, excluding values greater than or equal to 90. + +3. Excluding multiple types: + + ```python + "score": "int[>0,<100] - int[>90] - int[<10]" + # Union, then subtraction: + "score": "int[>0,<100] - int[>90] | int[<10]" + "score": "int[>0,<100] - (int[>90] | int[<10])" # same thing + # Use parenthesis to run subtraction first + "score": "int[>0,<50] | (int[<100] - int[<10])" + "score": "(int[<100] - int[<10]) | int[>0,<50]" + ``` + + **Note**: Union is handled before subtraction. + +4. Allowing all but a specific value: + + ```python + "specialNumber": "any - int[0]" + ``` + +### Union Types (`|`) + +Allows you to specify that a value can be one of multiple types. + +**Syntax:** + +```python +"typeA | typeB | typeC" +``` + +**Usage:** + +- The value must match at least one of the specified types. + +**Examples:** + +1. Multiple possible types: + + ```python + "data": "int | str | bool" + ``` + + Accepts an integer, string, or boolean value for the key `data`. + +2. Combining with arrays: + + ```python + "mixedList": "array[int | str]" + ``` + + Accepts a list of integers or strings. + +### Conditional Ranges and Values + +Specifies conditions that values must satisfy, including ranges and specific values. + +**Syntax:** + +- Greater than: `">value"` +- Less than: `"=value"` +- Less than or equal to: `"<="value"` +- Range: `"start-end"` +- Specific values: `"value1,value2,value3"` + +**Examples:** + +1. Integer conditions: + + ```python + "level": "int[>=1,<=10]" + ``` + + Accepts integers from 1 to 10 inclusive. + +2. Double with range: + + ```python + "latitude": "double[-90.0 - 90.0]" + ``` + + Accepts doubles between -90.0 and 90.0 inclusive. + +3. Specific values: + + ```python + "status": "int[1,2,3]" + ``` + + Accepts integers that are either 1, 2, or 3. + +--- + +## Error Handling + +```mermaid +graph TD + Exception --> JvvException + JvvException --> JvvRuntimeException + JvvException --> JvvSyntaxError + + JvvRuntimeException --> UnknownProperty["UnknownProperty
Raised when a key in config
isn't defined in property types
"] + JvvRuntimeException --> InvalidPropertyType["InvalidPropertyType
Raised when a value doesn't
match its type definition
"] + InvalidPropertyType --> MissingRequiredKey["MissingRequiredKey
Raised when a required key
is missing from config
"] + + JvvSyntaxError --> PropertySyntaxError["PropertySyntaxError
Raised when property type
definitions have syntax errors
"] + + classDef base fill:#eee,stroke:#333,stroke-width:2px; + classDef jvv fill:#d4e6f1,stroke:#2874a6,stroke-width:2px; + classDef runtime fill:#d5f5e3,stroke:#196f3d,stroke-width:2px; + classDef syntax fill:#fdebd0,stroke:#b9770e,stroke-width:2px; + classDef error fill:#fadbd8,stroke:#943126,stroke-width:2px; + + class Exception base; + class JvvException jvv; + class JvvRuntimeException,JvvSyntaxError runtime; + class PropertySyntaxError syntax; + class UnknownProperty,InvalidPropertyType,MissingRequiredKey error; +``` + +--- + +### Types + +- **str**: Basic string type. + + - Arguments: + - `regex_pattern` (optional): A regex pattern the string must match. + - Example: `"str[^[A-Za-z]+$]"` + +- **int**: Integer type with conditions. + + - Arguments: + - `conditions`: Inequalities (`>=`, `<=`, `>`, `<`), specific values (`value1,value2`), ranges (`start-end`). + - Example: `"int[>=0,<=100]"` + +- **double**: Double (floating-point) type with conditions. + + - Arguments: + - Same as `int`. + - Example: `"double[>0.0]"` + +- **bool**: Boolean type. + + - Arguments: None. + - Example: `"bool"` + +- **array**: Array (list) of elements of a specified type. + + - Arguments: + - `element_type`: Type of elements in the array. + - `length_conditions` (optional): Conditions on the array length. + - Example: `"array[int[>=0],>=1,<=10]"` + +- **tuple**: Fixed-size sequence of elements of specified types. + + - Arguments: + - List of element types. + - Example: `"tuple[str, int, bool]"` + +- **nil**: Represents a `None` value. + + - Arguments: None. + - Example: `"nil"` + +- **any**: Accepts any value. + + - Arguments: None. + - Example: `"any"` + +- **Type References**: Reusable type definitions. + - Arguments: + - `@typeName`: Reference to a named type. + - Example: + - Define: `"@positiveInt": "int[>0]"` + - Use: `"userId": "@positiveInt"` + +### Type Combinations + +- **Union Types** (`|`): Value must match one of multiple types. + + - Syntax: `"typeA | typeB"` + - Example: `"str | int"` + +- **Subtracting Domains** (`-`): Value must match `typeA` but not `typeB`. + - Syntax: `"typeA - typeB"` + - Example: `"int - int[13]"` (any integer except 13) diff --git a/jsonvv/jsonvv/__init__.py b/jsonvv/jsonvv/__init__.py new file mode 100644 index 0000000..ced6e1e --- /dev/null +++ b/jsonvv/jsonvv/__init__.py @@ -0,0 +1,22 @@ +from .exceptions import ( + InvalidPropertyType, + JvvException, + JvvRuntimeException, + JvvSyntaxError, + MissingRequiredKey, + PropertySyntaxError, + UnknownProperty, +) +from .validator import JsonValidator, validate_config + +__all__ = [ + 'JvvRuntimeException', + 'JvvSyntaxError', + 'PropertySyntaxError', + 'JsonValidator', + 'JvvException', + 'InvalidPropertyType', + 'UnknownProperty', + 'MissingRequiredKey', + 'validate_config', +] diff --git a/jsonvv/jsonvv/__main__.py b/jsonvv/jsonvv/__main__.py new file mode 100644 index 0000000..2d9fa8b --- /dev/null +++ b/jsonvv/jsonvv/__main__.py @@ -0,0 +1,70 @@ +import argparse +import json +import sys +from pathlib import Path +from typing import Any, Dict + +from jsonvv.exceptions import InvalidPropertyType, JvvSyntaxError, UnknownProperty +from jsonvv.validator import JsonValidator + + +def load_json(file_path: Path) -> Dict[str, Any]: + """ + Load and parse a JSON file. + """ + try: + with open(file_path) as f: + return json.load(f) + except json.JSONDecodeError as e: + raise ValueError(f"Invalid JSON in {file_path}: {e}") + except FileNotFoundError: + raise ValueError(f"File not found: {file_path}") + + +def main(): + """JSON Value Validator - Validate JSON data against a schema.""" + parser = argparse.ArgumentParser( + description="JSON Value Validator - Validate JSON data against a schema." + ) + parser.add_argument( + 'properties_file', type=Path, help='JSON file containing the property type definitions' + ) + parser.add_argument( + '-i', '--input', type=Path, help='JSON file containing the data to validate' + ) + parser.add_argument( + '--check', action='store_true', help='Check if the properties file is valid' + ) + + args = parser.parse_args() + + try: + # Load property types + property_types = load_json(args.properties_file) + validator = JsonValidator(property_types) + + if args.check: + print("✓ Property types are valid") + return + + if not args.input: + parser.error("Either --input or --check must be specified") + + # Load and validate data + data = load_json(args.input) + validator.validate(data) + print("✓ Data is valid") + + except (InvalidPropertyType, UnknownProperty) as e: + print(f"Validation Error: {e}", file=sys.stderr) + sys.exit(1) + except JvvSyntaxError as e: + print(f"Syntax Error: {e}", file=sys.stderr) + sys.exit(1) + except ValueError as e: + print(f"File Error: {e}", file=sys.stderr) + sys.exit(1) + + +if __name__ == "__main__": + main() diff --git a/jsonvv/jsonvv/exceptions.py b/jsonvv/jsonvv/exceptions.py new file mode 100644 index 0000000..d5c3e27 --- /dev/null +++ b/jsonvv/jsonvv/exceptions.py @@ -0,0 +1,29 @@ +"""Exception classes for jsonvv""" + + +class JvvException(Exception): + pass + + +class JvvRuntimeException(JvvException): + pass + + +class JvvSyntaxError(JvvException): + pass + + +class UnknownProperty(JvvRuntimeException, ValueError): + pass + + +class InvalidPropertyType(JvvRuntimeException, TypeError): + pass + + +class MissingRequiredKey(InvalidPropertyType): + pass + + +class PropertySyntaxError(JvvSyntaxError): + pass diff --git a/jsonvv/jsonvv/parser.py b/jsonvv/jsonvv/parser.py new file mode 100644 index 0000000..cc9e31b --- /dev/null +++ b/jsonvv/jsonvv/parser.py @@ -0,0 +1,309 @@ +from dataclasses import dataclass +from typing import Any, Dict, List + +from .exceptions import InvalidPropertyType +from .strings import string_validator +from .types import ( + AnyType, + ArrayType, + BaseType, + BoolType, + DoubleType, + IntType, + NilType, + StringType, + SubtractionType, + TupleType, + Type, + UnionType, +) + + +class Parser: + def __init__(self, type_str: str): + self.type_str = type_str + self.pos = 0 + self.length = len(type_str) + + def parse(self) -> Type: + """Main entry point""" + result = self.parse_subtraction() # Start with subtraction instead of union + self.skip_whitespace() + if self.pos < self.length: + raise RuntimeError(f"Unexpected character at position {self.pos}") + return result + + def parse_union(self) -> Type: + """Handles type1 | type2 | type3""" + types = [self.parse_term()] # Parse first term + + while self.pos < self.length: + self.skip_whitespace() + if not self.match('|'): + break + types.append(self.parse_term()) # Parse additional terms + + return types[0] if len(types) == 1 else UnionType(types) + + def parse_subtraction(self) -> Type: + """Handles type1 - type2""" + left = self.parse_union() # Start with union + + while self.pos < self.length: + self.skip_whitespace() + if not self.match('-'): + break + right = self.parse_union() # Parse right side as union + left = SubtractionType(left, right) + + return left + + def parse_term(self) -> Type: + """Handles basic terms and parenthesized expressions""" + self.skip_whitespace() + + if self.match('('): + type_obj = self.parse_subtraction() # Parse subtraction inside parens + if not self.match(')'): + raise RuntimeError("Unclosed parenthesis") + return type_obj + + return self.parse_basic_type() + + def parse_basic_type(self) -> Type: + """Handles basic types with conditions""" + name = self.parse_identifier() + + # Special handling for array type + if name == 'array': + return self.parse_array_type() + + # Special handling for tuple type + if name == 'tuple': + # Don't advance position, let parse_tuple_type handle it + return self.parse_tuple_type() + + conditions = None + self.skip_whitespace() + + if self.match('['): + start = self.pos + # For all types, just capture everything until the closing bracket + bracket_count = 1 # Track nested brackets + while self.pos < self.length: + if self.type_str[self.pos] == '[': + bracket_count += 1 + elif self.type_str[self.pos] == ']': + bracket_count -= 1 + if bracket_count == 0: + break + self.pos += 1 + + if bracket_count > 0: + raise RuntimeError("Unclosed '['") + conditions = self.type_str[start : self.pos] + + if not self.match(']'): + raise RuntimeError("Expected ']'") + + # Return appropriate type based on name + if name == 'str': + return StringType(conditions) + elif name == 'int': + return IntType(conditions) + elif name == 'double': + return DoubleType(conditions) + elif name == 'bool': + return BoolType() + elif name == 'any': + return AnyType() + elif name == 'nil': + return NilType() # Add this type + elif name == 'tuple': + return self.parse_tuple_type() + elif name.startswith('@'): + return ReferenceType(name[1:]) + return BaseType(name, conditions) + + def peek(self, char: str) -> bool: + """Looks ahead for a character without advancing position""" + self.skip_whitespace() + return self.pos < self.length and self.type_str[self.pos] == char + + def parse_array_type(self) -> Type: + """Handles array[type, length?]""" + if not self.match('['): + return ArrayType(AnyType(), None) # Default array type + + # Parse the element type (which could be a complex type) + element_type = self.parse_subtraction() # Start with subtraction to handle all cases + + length_conditions = None + self.skip_whitespace() + + # Check for length conditions after comma + if self.match(','): + self.skip_whitespace() + start = self.pos + while self.pos < self.length and self.type_str[self.pos] != ']': + self.pos += 1 + if self.pos >= self.length: + raise RuntimeError("Unclosed array type") + length_conditions = self.type_str[start : self.pos].strip() + + if not self.match(']'): + raise RuntimeError("Expected ']' in array type") + + return ArrayType(element_type, length_conditions) + + def parse_tuple_type(self) -> Type: + """Handles tuple[type1, type2, ...]""" + + if not self.match('['): + raise RuntimeError("Expected '[' after 'tuple'") + + types = [] + while True: + self.skip_whitespace() + if self.match(']'): + break + + # Parse complex type expressions within tuple arguments + type_obj = self.parse_subtraction() # Start with subtraction to handle all operations + types.append(type_obj) + + self.skip_whitespace() + if not self.match(','): + if self.match(']'): + break + raise RuntimeError("Expected ',' or ']' in tuple type") + + return TupleType(types) + + def parse_identifier(self) -> str: + """Parses an identifier""" + self.skip_whitespace() + start = self.pos + + # Only consume alphanumeric and underscore characters + while self.pos < self.length and ( + self.type_str[self.pos].isalnum() or self.type_str[self.pos] in '_@' + ): + self.pos += 1 + + if start == self.pos: + raise RuntimeError(f'Expected identifier at position {self.pos}') + + result = self.type_str[start : self.pos] + return result + + def skip_whitespace(self) -> None: + """Skips whitespace characters""" + while self.pos < self.length and self.type_str[self.pos].isspace(): + self.pos += 1 + + def match(self, char: str) -> bool: + """Tries to match a character, advances position if matched""" + self.skip_whitespace() + if self.pos < self.length and self.type_str[self.pos] == char: + self.pos += 1 + return True + return False + + def peek_word(self, word: str) -> bool: + """Looks ahead for a word without advancing position""" + self.skip_whitespace() + return ( + self.pos + len(word) <= self.length + and self.type_str[self.pos : self.pos + len(word)] == word + and ( + self.pos + len(word) == self.length + or not self.type_str[self.pos + len(word)].isalnum() + ) + ) + + +''' +Python's import system is a pain, +so I'm moving DictType and ReferenceType here. +''' + + +@dataclass +class DictType(Type): + type_dict: Dict[str, Any] + type_registry: Dict[str, Any] + + def validate(self, value: Any, path: List[str], type_registry: Dict[str, Type]) -> None: + if not isinstance(value, dict): + raise InvalidPropertyType(f"Expected dict at {'.'.join(path)}, got {type(value)}") + + # Track matched patterns and required keys + any_pattern_matched = False + required_patterns = { + pattern[1:]: False for pattern in self.type_dict if pattern.startswith('*') + } + + for key, val in value.items(): + pattern_matched = False + for pattern, type_def in self.type_dict.items(): + # Strip * for required patterns when matching + match_pattern = pattern[1:] if pattern.startswith('*') else pattern + + if string_validator(key, match_pattern): + pattern_matched = True + any_pattern_matched = True + + # Mark required pattern as found + if pattern.startswith('*'): + required_patterns[match_pattern] = True + + # Parse the type definition string into a Type object + expected_type = parse_type_def(type_def, type_registry) + expected_type.validate(val, path + [key], type_registry) + + if not pattern_matched: + raise InvalidPropertyType( + f"Key {key} at {'.'.join(path)} does not match any allowed patterns" + ) + + # Check if all required patterns were matched + missing_required = [pattern for pattern, found in required_patterns.items() if not found] + if missing_required: + raise InvalidPropertyType( + f"Missing required properties matching patterns: {', '.join(missing_required)} at {'.'.join(path)}" + ) + + if not any_pattern_matched: + raise InvalidPropertyType(f"No properties at {'.'.join(path)} matched any patterns") + + +@dataclass +class ReferenceType(Type): + name: str + + def validate(self, value: Any, path: List[str], type_registry: Dict[str, Type]) -> None: + if self.name not in type_registry: + raise RuntimeError(f"Unknown type reference: @{self.name}") + + ref_type = type_registry[self.name] + + if isinstance(ref_type, dict): + # Create a DictType for dictionary references + dict_type = DictType(ref_type, type_registry) + dict_type.validate(value, path, type_registry) + else: + # For non-dictionary types + ref_type.validate(value, path, type_registry) + + def __str__(self) -> str: + return f"@{self.name}" + + +def parse_type_def(type_def: Any, type_registry: Dict[str, Type]) -> Type: + if isinstance(type_def, str): + parser = Parser(type_def) + return parser.parse() + elif isinstance(type_def, dict): + return DictType(type_def, type_registry) + raise InvalidPropertyType(f"Invalid type definition: {type_def}") diff --git a/jsonvv/jsonvv/strings.py b/jsonvv/jsonvv/strings.py new file mode 100644 index 0000000..3fabe5e --- /dev/null +++ b/jsonvv/jsonvv/strings.py @@ -0,0 +1,65 @@ +import re +from typing import List + + +class StringValidator: + def __init__(self, pattern: str): + self.pattern = pattern + self.patterns = self._split_patterns(pattern) + + def _split_patterns(self, p: str) -> List[str]: + patterns = [] + current = [] + in_regex = False + i = 0 + + while i < len(p): + if p[i] == '/' and (i == 0 or p[i - 1] != '_'): + in_regex = not in_regex + current.append(p[i]) + elif p[i] == ',' and not in_regex: + # Check if comma is escaped + if i > 0 and p[i - 1] == '_': + current.append(',') + else: + # End of pattern + patterns.append(''.join(current)) + current = [] + else: + current.append(p[i]) + i += 1 + + if current: + patterns.append(''.join(current)) + + result = [p.strip() for p in patterns if p.strip()] + return result + + def _is_regex_pattern(self, p: str) -> bool: + is_regex = p.startswith('/') and p.endswith('/') and not p.endswith('_/') + return is_regex + + def _clean_literal_pattern(self, p: str) -> str: + cleaned = re.sub(r'_[^_]', lambda m: m.group(0)[-1], p) + return cleaned + + def validate(self, value: str) -> bool: + for p in self.patterns: + if self._is_regex_pattern(p): + regex = p[1:-1] + match = bool(re.match(regex, value)) + if match: + return True + else: + cleaned = self._clean_literal_pattern(p) + match = value == cleaned + if match: + return True + + return False + + +def string_validator(value: str, pattern: str) -> bool: + validator = StringValidator(pattern) + result = validator.validate(value) + return result diff --git a/jsonvv/jsonvv/types.py b/jsonvv/jsonvv/types.py new file mode 100644 index 0000000..21357bd --- /dev/null +++ b/jsonvv/jsonvv/types.py @@ -0,0 +1,262 @@ +from abc import ABC, abstractmethod +from dataclasses import dataclass +from typing import Any, Dict, List, Optional, Union + +from .exceptions import InvalidPropertyType +from .strings import string_validator + +TYPE_NAMES = {'array', 'tuple', 'str', 'int', 'double', 'bool', 'any', 'nil', 'tuple'} + + +class Type(ABC): + @abstractmethod + def validate(self, value: Any, path: List[str], type_registry: Dict[str, 'Type']) -> None: + pass + + +@dataclass +class BaseType(Type): + """Base class for all types""" + + name: str + conditions: Optional[str] = None + + def __post_init__(self): + # Raise error early + if not self.name.startswith('@') and self.name not in TYPE_NAMES: + raise InvalidPropertyType(f'Unknown base type {self.name}') + + def validate(self, value: Any, path: List[str], type_registry: Dict[str, Type]) -> None: + if self.name in type_registry: + type_registry[self.name].validate(value, path, type_registry) + else: + raise RuntimeError(f'Unknown base type {self.name}') + + +@dataclass +class NilType(Type): + """Represents a nil/null type""" + + def validate(self, value: Any, path: List[str], type_registry: Dict[str, Type]) -> None: + if value is not None: + raise InvalidPropertyType( + f"Invalid value at {'.'.join(path)}: expected nil, got {value}" + ) + + def __str__(self) -> str: + return "nil" + + +@dataclass +class StringType(Type): + pattern: Optional[str] = None + + def validate(self, value: Any, path: List[str], type_registry: Dict[str, Type]) -> None: + if not isinstance(value, str): + raise InvalidPropertyType( + f"Invalid value at {'.'.join(path)}: expected string, got {type(value).__name__}" + ) + + if self.pattern: + if not string_validator(value, self.pattern): + raise InvalidPropertyType( + f"Invalid value at {'.'.join(path)}: {value} does not match pattern '{self.pattern}'" + ) + + def __str__(self) -> str: + return f"str[{self.pattern}]" if self.pattern else "str" + + +@dataclass +class NumericalType(Type): + conditions: Optional[str] = None + numeric_type: Type = float # Default to float + type_name: str = "number" # For error messages + + def validate(self, value: Any, path: List[str], type_registry: Dict[str, Type]) -> None: + allowed_types = (int, float) if self.numeric_type is float else (int,) + if not isinstance(value, allowed_types): + raise InvalidPropertyType( + f"Invalid value at {'.'.join(path)}: expected {self.type_name}, got {type(value).__name__}" + ) + if self.conditions and not self._check_conditions(self.numeric_type(value)): + raise InvalidPropertyType( + f"Invalid value at {'.'.join(path)}: {value} does not match conditions '{self.conditions}'" + ) + + def _check_conditions(self, value: Union[int, float]) -> bool: + if not self.conditions: + return True + + # Split by comma and handle each condition + conditions = [c.strip() for c in self.conditions.split(',')] + + for condition in conditions: + try: + # Handle comparisons + if '>=' in condition: + if value >= self.numeric_type(condition.replace('>=', '')): + return True + elif '<=' in condition: + if value <= self.numeric_type(condition.replace('<=', '')): + return True + elif '>' in condition: + if value > self.numeric_type(condition.replace('>', '')): + return True + elif '<' in condition: + if value < self.numeric_type(condition.replace('<', '')): + return True + # Handle ranges (e.g., "1.5-5.5") + elif '-' in condition[1:]: + # split by the -, ignoring the first character + range_s, range_e = condition[1:].split('-', 1) + range_s = self.numeric_type(condition[0] + range_s) + range_e = self.numeric_type(range_e) + if range_s <= value <= range_e: + return True + # Handle single values + else: + if value == self.numeric_type(condition): + return True + except ValueError: + continue + + return False + + def __str__(self) -> str: + return f"{self.type_name}[{self.conditions}]" if self.conditions else self.type_name + + +@dataclass +class IntType(NumericalType): + def __init__(self, conditions: Optional[str] = None): + super().__init__(conditions=conditions, numeric_type=int, type_name="int") + + +@dataclass +class DoubleType(NumericalType): + def __init__(self, conditions: Optional[str] = None): + super().__init__(conditions=conditions, numeric_type=float, type_name="double") + + +@dataclass +class AnyType(Type): + def validate(self, value: Any, path: List[str], type_registry: Dict[str, Type]) -> None: + # Any type accepts all values + pass + + def __str__(self) -> str: + return "any" + + +@dataclass +class BoolType(Type): + def validate(self, value: Any, path: List[str], type_registry: Dict[str, Type]) -> None: + if not isinstance(value, bool): + raise InvalidPropertyType( + f"Invalid value at {'.'.join(path)}: expected bool, got {type(value).__name__}" + ) + + +@dataclass +class ArrayType(Type): + element_type: Type + length_conditions: Optional[str] = None + + def validate(self, value: Any, path: List[str], type_registry: Dict[str, Type]) -> None: + if not isinstance(value, list): + raise InvalidPropertyType( + f"Invalid value at {'.'.join(path)}: expected array, got {type(value).__name__}" + ) + + if self.length_conditions: + array_len = len(value) + length_validator = IntType(self.length_conditions) + try: + length_validator._check_conditions(array_len) + except Exception: + raise InvalidPropertyType( + f"Invalid array length at {'.'.join(path)}: got length {array_len}" + ) + + for i, item in enumerate(value): + self.element_type.validate(item, path + [str(i)], type_registry) + + +@dataclass +class TupleType(Type): + element_types: List[Type] + + def validate(self, value: Any, path: List[str], type_registry: Dict[str, Type]) -> None: + if not isinstance(value, (list, tuple)): + raise InvalidPropertyType( + f"Invalid value at {'.'.join(path)}: expected tuple, got {type(value).__name__}" + ) + + if len(value) != len(self.element_types): + raise InvalidPropertyType( + f"Invalid tuple length at {'.'.join(path)}: expected {len(self.element_types)}, got {len(value)}" + ) + + for i, (item, expected_type) in enumerate(zip(value, self.element_types)): + expected_type.validate(item, path + [str(i)], type_registry) + + +@dataclass +class UnionType(Type): + types: List[Type] + + def validate(self, value: Any, path: List[str], type_registry: Dict[str, Type]) -> None: + errors = [] + for t in self.types: + try: + t.validate(value, path, type_registry) + return # If any type validates successfully, we're done + except InvalidPropertyType as e: + errors.append(str(e)) + + # If we get here, none of the types validated + raise InvalidPropertyType( + f"Invalid value at {'.'.join(path)}: {value} does not match any of the allowed types" + ) + + def __str__(self) -> str: + return f"({' | '.join(str(t) for t in self.types)})" + + +@dataclass +class SubtractionType(Type): + base_type: Type + subtracted_type: Type + + def validate(self, value: Any, path: List[str], type_registry: Dict[str, Type]) -> None: + path_str = '.'.join(path) + + # First check if value matches base type + matches_base = True + try: + self.base_type.validate(value, path, type_registry) + except InvalidPropertyType: + matches_base = False + raise + + # Then check if value matches subtracted type + matches_subtracted = True + try: + self.subtracted_type.validate(value, path, type_registry) + matches_subtracted = True + except InvalidPropertyType: + matches_subtracted = False + + # Final validation decision + if matches_base and matches_subtracted: + raise InvalidPropertyType(f"Invalid value at {path_str}: {value} matches excluded type") + elif matches_base and not matches_subtracted: + return + else: + raise InvalidPropertyType( + f"Invalid value at {path_str}: {value} does not match base type" + ) + + def __str__(self) -> str: + return f"({self.base_type} - {self.subtracted_type})" diff --git a/jsonvv/jsonvv/validator.py b/jsonvv/jsonvv/validator.py new file mode 100644 index 0000000..63fc022 --- /dev/null +++ b/jsonvv/jsonvv/validator.py @@ -0,0 +1,118 @@ +from typing import Any, Dict + +from .exceptions import MissingRequiredKey, PropertySyntaxError, UnknownProperty +from .parser import parse_type_def +from .strings import string_validator +from .types import Type + + +class JsonValidator: + def __init__(self, property_types): + self.property_types = property_types + # Create a registry for reference types and parsed type definitions + self.type_registry = {} + self.parsed_types = {} + # Validate and pre-parse all type definitions + self.parse_types(property_types) + + def validate(self, config_map): + validate_config(config_map, self.property_types, self.type_registry, self.parsed_types) + + def parse_types(self, property_types: Dict[str, Any], path: str = ""): + """Validates and pre-parses all type definitions.""" + for key, value in property_types.items(): + current_path = f"{path}.{key}" if path else key + + # Register reference types + if key.startswith('@'): + if len(key) == 1: + raise PropertySyntaxError( + f"Invalid key '{current_path}': '@' must be followed by a reference name" + ) + self.type_registry[key[1:]] = value + + # Validate key syntax for required properties + if key.startswith('*') and len(key) == 1: + raise PropertySyntaxError( + f"Invalid key '{current_path}': '*' must be followed by a property name" + ) + + if isinstance(value, dict): + # Recursively validate and parse nested dictionaries + self.parse_types(value, current_path) + elif isinstance(value, str): + try: + # Pre-parse the type definition and store it + self.parsed_types[current_path] = parse_type_def(value, self.type_registry) + except Exception as e: + raise PropertySyntaxError( + f"Invalid type definition for '{current_path}': {str(e)}" + ) + else: + raise PropertySyntaxError( + f"Invalid type definition for '{current_path}': must be a string or dictionary" + ) + + +def validate_config( + config_map: Dict[str, Any], + property_types: Dict[str, Any], + type_registry: Dict[str, Type], + parsed_types: Dict[str, Type], + parent_registry: Dict[str, Type] = None, + path: str = "", +) -> None: + """Validates a configuration map against property types.""" + + # Create a new registry for this scope, inheriting from parent if it exists + local_registry = dict(parent_registry or type_registry) + + # Track required properties + required_props = {key[1:]: False for key in property_types if key.startswith('*')} + + # Validate each property in config + for key, value in config_map.items(): + type_def = None + current_path = f"{path}.{key}" if path else key + + if key in property_types: + type_def = property_types[key] + + # If the value is a dict and type_def is also a dict, recurse with new scope + if isinstance(value, dict) and isinstance(type_def, dict): + validate_config( + value, type_def, type_registry, parsed_types, local_registry, current_path + ) + continue + + elif '*' + key in property_types: + type_def = property_types['*' + key] + required_props[key] = True + else: + # Check pattern matches + for pattern, pattern_type in property_types.items(): + if pattern.startswith('@') or pattern.startswith('*'): + continue + if string_validator(key, pattern): + type_def = pattern_type + current_path = f"{path}.{pattern}" if path else pattern + break + + if type_def is None: + raise UnknownProperty(f"Unknown property: {key}") + + # Use pre-parsed type if available, otherwise parse it + expected_type = parsed_types.get(current_path) + if expected_type is None: + expected_type = parse_type_def(type_def, local_registry) + expected_type.validate(value, [key], local_registry) + + # Check for missing required properties + missing_required = [key for key, found in required_props.items() if not found] + if missing_required: + raise MissingRequiredKey(f"Missing required properties: {', '.join(missing_required)}") + + # Check for missing required properties + missing_required = [key for key, found in required_props.items() if not found] + if missing_required: + raise MissingRequiredKey(f"Missing required properties: {', '.join(missing_required)}") diff --git a/jsonvv/publish.sh b/jsonvv/publish.sh new file mode 100644 index 0000000..66c549b --- /dev/null +++ b/jsonvv/publish.sh @@ -0,0 +1,12 @@ +rm -rf ./dist + +vermin . --eval-annotations --target=3.7 --violations jsonvv/ || exit 1 + +python -m build +twine check dist/* + +read -p "Confirm publish? (y/n) " -n 1 -r +echo +if [[ $REPLY =~ ^[Yy]$ ]]; then + twine upload dist/* +fi diff --git a/jsonvv/pyproject.toml b/jsonvv/pyproject.toml new file mode 100644 index 0000000..ac130c3 --- /dev/null +++ b/jsonvv/pyproject.toml @@ -0,0 +1,25 @@ +[build-system] +requires = ["poetry-core>=1.0.0"] +build-backend = "poetry.core.masonry.api" + +[tool.poetry] +name = "jsonvv" +version = "0.1.0" +description = "JSON value validator" +authors = ["daijro "] +license = "MIT" +repository = "https://github.com/daijro/camoufox" +homepage = "https://github.com/daijro/camoufox/tree/main/pythonlib/jsonvv" +readme = "README.md" +keywords = [ + "json", + "validator", + "validation", + "typing", +] + +[tool.poetry.dependencies] +python = "^3.7" + +[tool.poetry.scripts] +jsonvv = "jsonvv.__main__:main"