From 3a86e52843535020e220c999f1129d781434c0d9 Mon Sep 17 00:00:00 2001 From: "Victor [C] Tsang" Date: Thu, 7 May 2026 16:10:17 +0000 Subject: [PATCH 1/3] Added query tests for $jsonSchema Signed-off-by: Victor [C] Tsang --- .../query/misc/jsonSchema/__init__.py | 0 .../misc/jsonSchema/test_jsonSchema_errors.py | 304 ++++++++++ .../test_jsonSchema_field_validation.py | 560 ++++++++++++++++++ .../test_jsonSchema_keyword_combinations.py | 457 ++++++++++++++ .../test_jsonSchema_keyword_validation.py | 282 +++++++++ .../test_jsonSchema_object_validation.py | 214 +++++++ .../test_jsonSchema_type_filtering.py | 435 ++++++++++++++ documentdb_tests/framework/assertions.py | 13 + .../framework/bson_type_validator.py | 80 +++ documentdb_tests/framework/error_codes.py | 1 + documentdb_tests/framework/test_constants.py | 52 +- 11 files changed, 2397 insertions(+), 1 deletion(-) create mode 100644 documentdb_tests/compatibility/tests/core/operator/query/misc/jsonSchema/__init__.py create mode 100644 documentdb_tests/compatibility/tests/core/operator/query/misc/jsonSchema/test_jsonSchema_errors.py create mode 100644 documentdb_tests/compatibility/tests/core/operator/query/misc/jsonSchema/test_jsonSchema_field_validation.py create mode 100644 documentdb_tests/compatibility/tests/core/operator/query/misc/jsonSchema/test_jsonSchema_keyword_combinations.py create mode 100644 documentdb_tests/compatibility/tests/core/operator/query/misc/jsonSchema/test_jsonSchema_keyword_validation.py create mode 100644 documentdb_tests/compatibility/tests/core/operator/query/misc/jsonSchema/test_jsonSchema_object_validation.py create mode 100644 documentdb_tests/compatibility/tests/core/operator/query/misc/jsonSchema/test_jsonSchema_type_filtering.py create mode 100644 documentdb_tests/framework/bson_type_validator.py diff --git a/documentdb_tests/compatibility/tests/core/operator/query/misc/jsonSchema/__init__.py b/documentdb_tests/compatibility/tests/core/operator/query/misc/jsonSchema/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/documentdb_tests/compatibility/tests/core/operator/query/misc/jsonSchema/test_jsonSchema_errors.py b/documentdb_tests/compatibility/tests/core/operator/query/misc/jsonSchema/test_jsonSchema_errors.py new file mode 100644 index 00000000..3a14c8fe --- /dev/null +++ b/documentdb_tests/compatibility/tests/core/operator/query/misc/jsonSchema/test_jsonSchema_errors.py @@ -0,0 +1,304 @@ +""" +Tests for $jsonSchema error handling. + +Verifies that $jsonSchema rejects invalid argument types, unsupported keywords, +invalid schema constructs (negative values, empty arrays, duplicates, invalid regex, +invalid bsonType/type values), and invalid schemas in other commands. +Wrong-type errors are covered by test_jsonSchema_keyword_validation.py. +""" + +import pytest + +from documentdb_tests.compatibility.tests.core.operator.query.utils.query_test_case import ( + QueryTestCase, +) +from documentdb_tests.framework.assertions import assertFailureCode +from documentdb_tests.framework.error_codes import ( + BAD_VALUE_ERROR, + FAILED_TO_PARSE_ERROR, + INVALID_REGEX_PATTERN_ERROR, + TYPE_MISMATCH_ERROR, +) +from documentdb_tests.framework.executor import execute_command +from documentdb_tests.framework.parametrize import pytest_params + +INVALID_ARGUMENT_TESTS: list[QueryTestCase] = [ + QueryTestCase( + id="arg_string", + filter={"$jsonSchema": "invalid"}, + error_code=TYPE_MISMATCH_ERROR, + msg="String argument should error", + ), + QueryTestCase( + id="arg_int", + filter={"$jsonSchema": 1}, + error_code=TYPE_MISMATCH_ERROR, + msg="Int argument should error", + ), + QueryTestCase( + id="arg_array", + filter={"$jsonSchema": [1, 2]}, + error_code=TYPE_MISMATCH_ERROR, + msg="Array argument should error", + ), + QueryTestCase( + id="arg_null", + filter={"$jsonSchema": None}, + error_code=TYPE_MISMATCH_ERROR, + msg="Null argument should error", + ), + QueryTestCase( + id="arg_bool", + filter={"$jsonSchema": True}, + error_code=TYPE_MISMATCH_ERROR, + msg="Bool argument should error", + ), +] + +UNSUPPORTED_KEYWORD_TESTS: list[QueryTestCase] = [ + QueryTestCase( + id="unsupported_ref", + filter={"$jsonSchema": {"properties": {"a": {"$ref": "#/defs/x"}}}}, + error_code=FAILED_TO_PARSE_ERROR, + msg="$ref keyword should error", + ), + QueryTestCase( + id="unsupported_schema", + filter={"$jsonSchema": {"$schema": "http://json-schema.org/draft-04/schema#"}}, + error_code=FAILED_TO_PARSE_ERROR, + msg="$schema keyword should error", + ), + QueryTestCase( + id="unsupported_default", + filter={"$jsonSchema": {"properties": {"a": {"default": 0}}}}, + error_code=FAILED_TO_PARSE_ERROR, + msg="default keyword should error", + ), + QueryTestCase( + id="unsupported_definitions", + filter={"$jsonSchema": {"definitions": {"x": {"bsonType": "int"}}}}, + error_code=FAILED_TO_PARSE_ERROR, + msg="definitions keyword should error", + ), + QueryTestCase( + id="unsupported_format", + filter={"$jsonSchema": {"properties": {"a": {"format": "email"}}}}, + error_code=FAILED_TO_PARSE_ERROR, + msg="format keyword should error", + ), + QueryTestCase( + id="unsupported_id", + filter={"$jsonSchema": {"id": "myschema"}}, + error_code=FAILED_TO_PARSE_ERROR, + msg="id keyword should error", + ), + QueryTestCase( + id="unsupported_unknown", + filter={"$jsonSchema": {"unknownKeyword": True}}, + error_code=FAILED_TO_PARSE_ERROR, + msg="Unknown keyword should error", + ), +] + +INVALID_SCHEMA_CONSTRUCT_TESTS: list[QueryTestCase] = [ + QueryTestCase( + id="construct_required_non_string_elements", + filter={"$jsonSchema": {"required": [1]}}, + error_code=TYPE_MISMATCH_ERROR, + msg="required with non-string elements should error", + ), + QueryTestCase( + id="construct_required_duplicate_values", + filter={"$jsonSchema": {"required": ["a", "a"]}}, + error_code=FAILED_TO_PARSE_ERROR, + msg="required with duplicate values should error", + ), + QueryTestCase( + id="construct_maxLength_negative", + filter={"$jsonSchema": {"properties": {"x": {"maxLength": -1}}}}, + error_code=FAILED_TO_PARSE_ERROR, + msg="maxLength with negative value should error", + ), + QueryTestCase( + id="construct_minLength_negative", + filter={"$jsonSchema": {"properties": {"x": {"minLength": -1}}}}, + error_code=FAILED_TO_PARSE_ERROR, + msg="minLength with negative value should error", + ), + QueryTestCase( + id="construct_pattern_invalid_regex", + filter={"$jsonSchema": {"properties": {"x": {"pattern": "[invalid"}}}}, + error_code=INVALID_REGEX_PATTERN_ERROR, + msg="pattern with invalid regex should error", + ), + QueryTestCase( + id="construct_allOf_non_object_element", + filter={"$jsonSchema": {"allOf": [1]}}, + error_code=TYPE_MISMATCH_ERROR, + msg="allOf with non-object element should error", + ), + QueryTestCase( + id="construct_anyOf_non_object_element", + filter={"$jsonSchema": {"anyOf": ["str"]}}, + error_code=TYPE_MISMATCH_ERROR, + msg="anyOf with non-object element should error", + ), + QueryTestCase( + id="construct_oneOf_non_object_element", + filter={"$jsonSchema": {"oneOf": [None]}}, + error_code=TYPE_MISMATCH_ERROR, + msg="oneOf with non-object element should error", + ), + QueryTestCase( + id="construct_minItems_negative", + filter={"$jsonSchema": {"properties": {"x": {"minItems": -1}}}}, + error_code=FAILED_TO_PARSE_ERROR, + msg="minItems with negative value should error", + ), + QueryTestCase( + id="construct_maxItems_negative", + filter={"$jsonSchema": {"properties": {"x": {"maxItems": -1}}}}, + error_code=FAILED_TO_PARSE_ERROR, + msg="maxItems with negative value should error", + ), + QueryTestCase( + id="construct_multipleOf_zero", + filter={"$jsonSchema": {"properties": {"x": {"multipleOf": 0}}}}, + error_code=FAILED_TO_PARSE_ERROR, + msg="multipleOf with zero should error", + ), + QueryTestCase( + id="construct_multipleOf_negative", + filter={"$jsonSchema": {"properties": {"x": {"multipleOf": -3}}}}, + error_code=FAILED_TO_PARSE_ERROR, + msg="multipleOf with negative should error", + ), + QueryTestCase( + id="construct_invalid_bsontype", + filter={"$jsonSchema": {"properties": {"x": {"bsonType": "invalid"}}}}, + error_code=BAD_VALUE_ERROR, + msg="Invalid bsonType string should error", + ), + QueryTestCase( + id="construct_invalid_bsontype_in_array", + filter={"$jsonSchema": {"properties": {"x": {"bsonType": ["invalid"]}}}}, + error_code=BAD_VALUE_ERROR, + msg="Invalid bsonType in array form should error", + ), + QueryTestCase( + id="construct_bsontype_invalid_alias_integer", + filter={"$jsonSchema": {"properties": {"x": {"bsonType": "integer"}}}}, + error_code=FAILED_TO_PARSE_ERROR, + msg="bsonType 'integer' is not a valid alias", + ), + QueryTestCase( + id="construct_invalid_type_string", + filter={"$jsonSchema": {"properties": {"x": {"type": "invalid"}}}}, + error_code=BAD_VALUE_ERROR, + msg="Invalid type string should error", + ), + QueryTestCase( + id="construct_invalid_type_in_array", + filter={"$jsonSchema": {"properties": {"x": {"type": ["invalid"]}}}}, + error_code=BAD_VALUE_ERROR, + msg="Invalid type in array form should error", + ), + QueryTestCase( + id="construct_dependencies_non_string_in_array", + filter={"$jsonSchema": {"dependencies": {"a": [1]}}}, + error_code=TYPE_MISMATCH_ERROR, + msg="dependencies array with non-string element should error", + ), + QueryTestCase( + id="construct_dependencies_empty_array", + filter={"$jsonSchema": {"dependencies": {"a": []}}}, + error_code=FAILED_TO_PARSE_ERROR, + msg="dependencies with empty array should error", + ), + QueryTestCase( + id="construct_patternProperties_invalid_regex_key", + filter={"$jsonSchema": {"patternProperties": {"[invalid": {}}}}, + error_code=BAD_VALUE_ERROR, + msg="patternProperties with invalid regex key should error", + ), + QueryTestCase( + id="construct_items_non_object_in_array", + filter={"$jsonSchema": {"properties": {"x": {"items": [1]}}}}, + error_code=TYPE_MISMATCH_ERROR, + msg="items tuple form with non-object element should error", + ), + QueryTestCase( + id="construct_minProperties_negative", + filter={"$jsonSchema": {"minProperties": -1}}, + error_code=FAILED_TO_PARSE_ERROR, + msg="minProperties with negative should error", + ), + QueryTestCase( + id="construct_maxProperties_negative", + filter={"$jsonSchema": {"maxProperties": -1}}, + error_code=FAILED_TO_PARSE_ERROR, + msg="maxProperties with negative should error", + ), +] + +TYPE_INVALID_BSON_ONLY = [ + "double", + "binData", + "objectId", + "date", + "regex", + "javascript", + "int", + "timestamp", + "long", + "decimal", +] + +INVALID_SCHEMA_CONSTRUCT_TESTS += [ + QueryTestCase( + id=f"construct_type_bson_only_{t}", + filter={"$jsonSchema": {"properties": {"x": {"type": t}}}}, + error_code=BAD_VALUE_ERROR, + msg=f"type keyword should reject BSON-only alias '{t}'", + ) + for t in TYPE_INVALID_BSON_ONLY +] + +ALL_ERROR_TESTS = ( + INVALID_ARGUMENT_TESTS + UNSUPPORTED_KEYWORD_TESTS + INVALID_SCHEMA_CONSTRUCT_TESTS +) + + +@pytest.mark.parametrize("test", pytest_params(ALL_ERROR_TESTS)) +def test_jsonSchema_errors(collection, test): + """Test $jsonSchema rejects invalid inputs with correct error codes.""" + result = execute_command(collection, {"find": collection.name, "filter": test.filter}) + assertFailureCode(result, test.error_code) + + +INVALID_SCHEMA_COMMAND_TESTS: list[QueryTestCase] = [ + QueryTestCase( + id="command_invalid_in_count", + filter={"$jsonSchema": {"invalid_keyword": True}}, + error_code=FAILED_TO_PARSE_ERROR, + msg="Invalid $jsonSchema should fail in count command", + ), + QueryTestCase( + id="command_invalid_in_distinct", + filter={"$jsonSchema": {"invalid_keyword": True}}, + error_code=FAILED_TO_PARSE_ERROR, + msg="Invalid $jsonSchema should fail in distinct command", + ), +] + + +@pytest.mark.parametrize("test", pytest_params(INVALID_SCHEMA_COMMAND_TESTS)) +def test_jsonSchema_invalid_schema_in_commands(collection, test): + """Test invalid $jsonSchema fails in count and distinct commands.""" + if "count" in test.id: + result = execute_command(collection, {"count": collection.name, "query": test.filter}) + else: + result = execute_command( + collection, {"distinct": collection.name, "key": "x", "query": test.filter} + ) + assertFailureCode(result, test.error_code) diff --git a/documentdb_tests/compatibility/tests/core/operator/query/misc/jsonSchema/test_jsonSchema_field_validation.py b/documentdb_tests/compatibility/tests/core/operator/query/misc/jsonSchema/test_jsonSchema_field_validation.py new file mode 100644 index 00000000..c403c545 --- /dev/null +++ b/documentdb_tests/compatibility/tests/core/operator/query/misc/jsonSchema/test_jsonSchema_field_validation.py @@ -0,0 +1,560 @@ +""" +Tests for $jsonSchema type-specific validation keywords. + +Validates string (minLength, maxLength, pattern), numeric (minimum, maximum, +exclusiveMinimum, exclusiveMaximum, multipleOf), and array (minItems, maxItems, +uniqueItems, items, additionalItems) constraint keywords against document field values. +""" + +import pytest +from bson import Decimal128, Int64 + +from documentdb_tests.compatibility.tests.core.operator.query.utils.query_test_case import ( + QueryTestCase, +) +from documentdb_tests.framework.assertions import assertSuccess +from documentdb_tests.framework.executor import execute_command +from documentdb_tests.framework.parametrize import pytest_params +from documentdb_tests.framework.test_constants import ( + DECIMAL128_INFINITY, + DECIMAL128_NAN, + DECIMAL128_NEGATIVE_INFINITY, + DECIMAL128_NEGATIVE_ZERO, + DECIMAL128_TRAILING_ZERO, + DECIMAL128_ZERO, + DOUBLE_NEGATIVE_ZERO, + DOUBLE_ZERO, + FLOAT_INFINITY, + FLOAT_NAN, + FLOAT_NEGATIVE_INFINITY, + INT64_MAX, +) + +STRING_VALIDATION_TESTS: list[QueryTestCase] = [ + QueryTestCase( + id="string_minLength_matches", + filter={"$jsonSchema": {"properties": {"x": {"minLength": 3}}}}, + doc=[{"_id": 1, "x": "abc"}, {"_id": 2, "x": "ab"}, {"_id": 3, "x": "abcd"}], + expected=[{"_id": 1, "x": "abc"}, {"_id": 3, "x": "abcd"}], + msg="minLength should match strings with sufficient length", + ), + QueryTestCase( + id="string_maxLength_matches", + filter={"$jsonSchema": {"properties": {"x": {"maxLength": 5}}}}, + doc=[{"_id": 1, "x": "abcde"}, {"_id": 2, "x": "abcdef"}, {"_id": 3, "x": "ab"}], + expected=[{"_id": 1, "x": "abcde"}, {"_id": 3, "x": "ab"}], + msg="maxLength should match strings within length limit", + ), + QueryTestCase( + id="string_minLength_zero_matches_empty", + filter={"$jsonSchema": {"properties": {"x": {"minLength": 0}}}}, + doc=[{"_id": 1, "x": ""}, {"_id": 2, "x": "a"}], + expected=[{"_id": 1, "x": ""}, {"_id": 2, "x": "a"}], + msg="minLength 0 should match empty string", + ), + QueryTestCase( + id="string_minLength_maxLength_zero_only_empty", + filter={"$jsonSchema": {"properties": {"x": {"minLength": 0, "maxLength": 0}}}}, + doc=[{"_id": 1, "x": ""}, {"_id": 2, "x": "a"}], + expected=[{"_id": 1, "x": ""}], + msg="minLength 0 and maxLength 0 should match only empty string", + ), + QueryTestCase( + id="string_minLength_non_string_passes", + filter={"$jsonSchema": {"properties": {"x": {"minLength": 1}}}}, + doc=[{"_id": 1, "x": 42}, {"_id": 2, "x": "abc"}], + expected=[{"_id": 1, "x": 42}, {"_id": 2, "x": "abc"}], + msg="minLength on non-string field should pass", + ), + QueryTestCase( + id="string_pattern_prefix", + filter={"$jsonSchema": {"properties": {"x": {"pattern": "^abc"}}}}, + doc=[{"_id": 1, "x": "abcdef"}, {"_id": 2, "x": "xyzabc"}], + expected=[{"_id": 1, "x": "abcdef"}], + msg="Pattern with ^ prefix should match strings starting with pattern", + ), + QueryTestCase( + id="string_pattern_digits", + filter={"$jsonSchema": {"properties": {"x": {"pattern": "[0-9]+"}}}}, + doc=[{"_id": 1, "x": "test123"}, {"_id": 2, "x": "test"}], + expected=[{"_id": 1, "x": "test123"}], + msg="Pattern with digit regex should match strings containing digits", + ), + QueryTestCase( + id="string_pattern_non_string_passes", + filter={"$jsonSchema": {"properties": {"x": {"pattern": "abc"}}}}, + doc=[{"_id": 1, "x": {"a": 1}}, {"_id": 2, "x": "abc"}], + expected=[{"_id": 1, "x": {"a": 1}}, {"_id": 2, "x": "abc"}], + msg="Pattern on non-string field should pass", + ), + QueryTestCase( + id="string_pattern_empty_matches_all", + filter={"$jsonSchema": {"properties": {"x": {"pattern": ""}}}}, + doc=[{"_id": 1, "x": "abc"}, {"_id": 2, "x": ""}, {"_id": 3, "x": 42}], + expected=[{"_id": 1, "x": "abc"}, {"_id": 2, "x": ""}, {"_id": 3, "x": 42}], + msg="Empty pattern should match all strings", + ), + QueryTestCase( + id="string_pattern_exact_empty", + filter={"$jsonSchema": {"properties": {"x": {"pattern": "^$"}}}}, + doc=[{"_id": 1, "x": ""}, {"_id": 2, "x": "a"}, {"_id": 3, "x": 42}], + expected=[{"_id": 1, "x": ""}, {"_id": 3, "x": 42}], + msg="Pattern ^$ should match only empty string", + ), + QueryTestCase( + id="string_pattern_dot_matches_any", + filter={"$jsonSchema": {"properties": {"x": {"pattern": "a.b"}}}}, + doc=[{"_id": 1, "x": "axb"}, {"_id": 2, "x": "ab"}, {"_id": 3, "x": "a.b"}], + expected=[{"_id": 1, "x": "axb"}, {"_id": 3, "x": "a.b"}], + msg="Pattern with dot should match any character", + ), + QueryTestCase( + id="string_combined_constraints", + filter={ + "$jsonSchema": { + "properties": {"x": {"minLength": 3, "maxLength": 5, "pattern": "^[a-c]+"}} + } + }, + doc=[ + {"_id": 1, "x": "abc"}, + {"_id": 2, "x": "ab"}, + {"_id": 3, "x": "abcdefghij"}, + {"_id": 4, "x": "xyz"}, + ], + expected=[{"_id": 1, "x": "abc"}], + msg="Combined minLength, maxLength, and pattern should all apply", + ), + QueryTestCase( + id="string_missing_field_passes", + filter={"$jsonSchema": {"properties": {"x": {"minLength": 1}}}}, + doc=[{"_id": 1, "x": "abc"}, {"_id": 2}], + expected=[{"_id": 1, "x": "abc"}, {"_id": 2}], + msg="String keywords should pass when field is missing", + ), + QueryTestCase( + id="string_minLength_large_rejects_all", + filter={"$jsonSchema": {"properties": {"x": {"minLength": 999999}}}}, + doc=[{"_id": 1, "x": "hello"}, {"_id": 2, "x": ""}], + expected=[], + msg="minLength with huge value should reject all strings", + ), + QueryTestCase( + id="string_maxLength_large_accepts_all", + filter={"$jsonSchema": {"properties": {"x": {"maxLength": 999999}}}}, + doc=[{"_id": 1, "x": "hello"}, {"_id": 2, "x": ""}], + expected=[{"_id": 1, "x": "hello"}, {"_id": 2, "x": ""}], + msg="maxLength with huge value should accept all strings", + ), + QueryTestCase( + id="string_minLength_unicode", + filter={"$jsonSchema": {"properties": {"x": {"minLength": 2}}}}, + doc=[{"_id": 1, "x": "\u65e5\u672c"}, {"_id": 2, "x": "\u65e5"}], + expected=[{"_id": 1, "x": "\u65e5\u672c"}], + msg="minLength should count code points not bytes", + ), + QueryTestCase( + id="string_maxLength_unicode", + filter={"$jsonSchema": {"properties": {"x": {"maxLength": 2}}}}, + doc=[{"_id": 1, "x": "\u65e5\u672c"}, {"_id": 2, "x": "\u65e5\u672c\u8a9e"}], + expected=[{"_id": 1, "x": "\u65e5\u672c"}], + msg="maxLength should count code points not bytes", + ), +] + + +@pytest.mark.parametrize("test", pytest_params(STRING_VALIDATION_TESTS)) +def test_jsonSchema_string_validation(collection, test): + """Test $jsonSchema string validation keywords.""" + collection.insert_many(test.doc) + result = execute_command(collection, {"find": collection.name, "filter": test.filter}) + assertSuccess(result, test.expected) + + +NUMERIC_VALIDATION_TESTS: list[QueryTestCase] = [ + QueryTestCase( + id="numeric_minimum_inclusive", + filter={"$jsonSchema": {"properties": {"x": {"minimum": 5}}}}, + doc=[{"_id": 1, "x": 5}, {"_id": 2, "x": 4}, {"_id": 3, "x": 6}], + expected=[{"_id": 1, "x": 5}, {"_id": 3, "x": 6}], + msg="Minimum should be inclusive", + ), + QueryTestCase( + id="numeric_maximum_inclusive", + filter={"$jsonSchema": {"properties": {"x": {"maximum": 10}}}}, + doc=[{"_id": 1, "x": 10}, {"_id": 2, "x": 11}, {"_id": 3, "x": 9}], + expected=[{"_id": 1, "x": 10}, {"_id": 3, "x": 9}], + msg="Maximum should be inclusive", + ), + QueryTestCase( + id="numeric_minimum_with_long", + filter={"$jsonSchema": {"properties": {"x": {"minimum": 5}}}}, + doc=[{"_id": 1, "x": Int64(100)}, {"_id": 2, "x": Int64(4)}], + expected=[{"_id": 1, "x": Int64(100)}], + msg="Minimum should work with Int64 values", + ), + QueryTestCase( + id="numeric_minimum_with_double", + filter={"$jsonSchema": {"properties": {"x": {"minimum": 5}}}}, + doc=[{"_id": 1, "x": 5.5}, {"_id": 2, "x": 4.9}], + expected=[{"_id": 1, "x": 5.5}], + msg="Minimum should work with double values", + ), + QueryTestCase( + id="numeric_minimum_with_decimal128", + filter={"$jsonSchema": {"properties": {"x": {"minimum": 5}}}}, + doc=[{"_id": 1, "x": Decimal128("10.5")}, {"_id": 2, "x": Decimal128("3.2")}], + expected=[{"_id": 1, "x": Decimal128("10.5")}], + msg="Minimum should work with Decimal128 values", + ), + QueryTestCase( + id="numeric_cross_type_minimum_decimal_with_int", + filter={"$jsonSchema": {"properties": {"x": {"minimum": DECIMAL128_TRAILING_ZERO}}}}, + doc=[{"_id": 1, "x": 5}, {"_id": 2, "x": 0}], + expected=[{"_id": 1, "x": 5}], + msg="Minimum as Decimal128 should match int values", + ), + QueryTestCase( + id="numeric_exclusive_minimum", + filter={"$jsonSchema": {"properties": {"x": {"minimum": 5, "exclusiveMinimum": True}}}}, + doc=[{"_id": 1, "x": 5}, {"_id": 2, "x": 6}, {"_id": 3, "x": 4}], + expected=[{"_id": 2, "x": 6}], + msg="exclusiveMinimum true should exclude boundary value", + ), + QueryTestCase( + id="numeric_exclusive_maximum", + filter={"$jsonSchema": {"properties": {"x": {"maximum": 10, "exclusiveMaximum": True}}}}, + doc=[{"_id": 1, "x": 10}, {"_id": 2, "x": 9}, {"_id": 3, "x": 11}], + expected=[{"_id": 2, "x": 9}], + msg="exclusiveMaximum true should exclude boundary value", + ), + QueryTestCase( + id="numeric_exclusive_minimum_false", + filter={"$jsonSchema": {"properties": {"x": {"minimum": 5, "exclusiveMinimum": False}}}}, + doc=[{"_id": 1, "x": 5}, {"_id": 2, "x": 4}], + expected=[{"_id": 1, "x": 5}], + msg="exclusiveMinimum false should be same as default inclusive", + ), + QueryTestCase( + id="numeric_exclusive_maximum_false", + filter={"$jsonSchema": {"properties": {"x": {"maximum": 10, "exclusiveMaximum": False}}}}, + doc=[{"_id": 1, "x": 10}, {"_id": 2, "x": 11}], + expected=[{"_id": 1, "x": 10}], + msg="exclusiveMaximum false should be same as default inclusive", + ), + QueryTestCase( + id="numeric_multipleOf_matches", + filter={"$jsonSchema": {"properties": {"x": {"multipleOf": 3}}}}, + doc=[{"_id": 1, "x": 9}, {"_id": 2, "x": 10}, {"_id": 3, "x": 12}], + expected=[{"_id": 1, "x": 9}, {"_id": 3, "x": 12}], + msg="multipleOf should match multiples and reject non-multiples", + ), + QueryTestCase( + id="numeric_minimum_non_numeric_passes", + filter={"$jsonSchema": {"properties": {"x": {"minimum": 5}}}}, + doc=[{"_id": 1, "x": "hello"}, {"_id": 2, "x": 10}], + expected=[{"_id": 1, "x": "hello"}, {"_id": 2, "x": 10}], + msg="Minimum on non-numeric field should pass", + ), + QueryTestCase( + id="numeric_maximum_non_numeric_passes", + filter={"$jsonSchema": {"properties": {"x": {"maximum": 10}}}}, + doc=[{"_id": 1, "x": "hello"}, {"_id": 2, "x": 3}], + expected=[{"_id": 1, "x": "hello"}, {"_id": 2, "x": 3}], + msg="Maximum on non-numeric field should pass", + ), + QueryTestCase( + id="numeric_multipleOf_non_numeric_passes", + filter={"$jsonSchema": {"properties": {"x": {"multipleOf": 3}}}}, + doc=[{"_id": 1, "x": [1, 2]}, {"_id": 2, "x": 6}], + expected=[{"_id": 1, "x": [1, 2]}, {"_id": 2, "x": 6}], + msg="multipleOf on non-numeric field should pass", + ), + QueryTestCase( + id="numeric_minimum_maximum_range", + filter={"$jsonSchema": {"properties": {"x": {"minimum": 0, "maximum": 100}}}}, + doc=[ + {"_id": 1, "x": -1}, + {"_id": 2, "x": 0}, + {"_id": 3, "x": 50}, + {"_id": 4, "x": 100}, + {"_id": 5, "x": 101}, + ], + expected=[{"_id": 2, "x": 0}, {"_id": 3, "x": 50}, {"_id": 4, "x": 100}], + msg="Minimum and maximum together should validate range", + ), + QueryTestCase( + id="numeric_missing_field_passes", + filter={"$jsonSchema": {"properties": {"x": {"minimum": 5}}}}, + doc=[{"_id": 1, "x": 10}, {"_id": 2}], + expected=[{"_id": 1, "x": 10}, {"_id": 2}], + msg="Numeric keywords should pass when field is missing", + ), + QueryTestCase( + id="numeric_minimum_negative", + filter={"$jsonSchema": {"properties": {"x": {"minimum": -1}}}}, + doc=[{"_id": 1, "x": 0}, {"_id": 2, "x": -2}, {"_id": 3, "x": -1}], + expected=[{"_id": 1, "x": 0}, {"_id": 3, "x": -1}], + msg="Minimum with negative value should work", + ), + QueryTestCase( + id="numeric_maximum_negative", + filter={"$jsonSchema": {"properties": {"x": {"maximum": -1}}}}, + doc=[{"_id": 1, "x": 0}, {"_id": 2, "x": -2}, {"_id": 3, "x": -1}], + expected=[{"_id": 2, "x": -2}, {"_id": 3, "x": -1}], + msg="Maximum with negative value should work", + ), + QueryTestCase( + id="numeric_minimum_large_rejects_all", + filter={"$jsonSchema": {"properties": {"x": {"minimum": INT64_MAX}}}}, + doc=[{"_id": 1, "x": 999999}, {"_id": 2, "x": 0}], + expected=[], + msg="Minimum with INT64_MAX should reject normal values", + ), + QueryTestCase( + id="numeric_maximum_large_accepts_below", + filter={"$jsonSchema": {"properties": {"x": {"maximum": INT64_MAX}}}}, + doc=[{"_id": 1, "x": 999999}, {"_id": 2, "x": 0}], + expected=[{"_id": 1, "x": 999999}, {"_id": 2, "x": 0}], + msg="Maximum with INT64_MAX should accept normal values", + ), + QueryTestCase( + id="numeric_multipleOf_decimal", + filter={"$jsonSchema": {"properties": {"x": {"multipleOf": 0.1}}}}, + doc=[{"_id": 1, "x": 0.3}, {"_id": 2, "x": 0.5}], + expected=[{"_id": 1, "x": 0.3}, {"_id": 2, "x": 0.5}], + msg="multipleOf with decimal value should work", + ), + QueryTestCase( + id="numeric_minimum_nan_field", + filter={"$jsonSchema": {"properties": {"x": {"minimum": 0}}}}, + doc=[{"_id": 1, "x": FLOAT_NAN}, {"_id": 2, "x": DECIMAL128_NAN}, {"_id": 3, "x": 1}], + expected=[{"_id": 3, "x": 1}], + msg="NaN fields (float and decimal) should not satisfy minimum", + ), + QueryTestCase( + id="numeric_maximum_nan_field", + filter={"$jsonSchema": {"properties": {"x": {"maximum": 100}}}}, + doc=[{"_id": 1, "x": FLOAT_NAN}, {"_id": 2, "x": DECIMAL128_NAN}, {"_id": 3, "x": 1}], + expected=[{"_id": 3, "x": 1}], + msg="NaN fields (float and decimal) should not satisfy maximum", + ), + QueryTestCase( + id="numeric_minimum_infinity_field", + filter={"$jsonSchema": {"properties": {"x": {"minimum": 0}}}}, + doc=[ + {"_id": 1, "x": FLOAT_INFINITY}, + {"_id": 2, "x": FLOAT_NEGATIVE_INFINITY}, + {"_id": 3, "x": DECIMAL128_INFINITY}, + {"_id": 4, "x": DECIMAL128_NEGATIVE_INFINITY}, + ], + expected=[{"_id": 1, "x": FLOAT_INFINITY}, {"_id": 3, "x": DECIMAL128_INFINITY}], + msg="Infinity should satisfy minimum, -Infinity should not (float and decimal)", + ), + QueryTestCase( + id="numeric_maximum_infinity_field", + filter={"$jsonSchema": {"properties": {"x": {"maximum": 0}}}}, + doc=[ + {"_id": 1, "x": FLOAT_NEGATIVE_INFINITY}, + {"_id": 2, "x": FLOAT_INFINITY}, + {"_id": 3, "x": DECIMAL128_NEGATIVE_INFINITY}, + {"_id": 4, "x": DECIMAL128_INFINITY}, + ], + expected=[ + {"_id": 1, "x": FLOAT_NEGATIVE_INFINITY}, + {"_id": 3, "x": DECIMAL128_NEGATIVE_INFINITY}, + ], + msg="-Infinity should satisfy maximum 0, Infinity should not (float and decimal)", + ), + QueryTestCase( + id="numeric_minimum_negative_zero", + filter={"$jsonSchema": {"properties": {"x": {"minimum": 0}}}}, + doc=[ + {"_id": 1, "x": DOUBLE_NEGATIVE_ZERO}, + {"_id": 2, "x": DOUBLE_ZERO}, + {"_id": 3, "x": DECIMAL128_NEGATIVE_ZERO}, + {"_id": 4, "x": DECIMAL128_ZERO}, + ], + expected=[ + {"_id": 1, "x": DOUBLE_NEGATIVE_ZERO}, + {"_id": 2, "x": DOUBLE_ZERO}, + {"_id": 3, "x": DECIMAL128_NEGATIVE_ZERO}, + {"_id": 4, "x": DECIMAL128_ZERO}, + ], + msg="-0.0 and 0.0 should both satisfy minimum 0 (float and decimal)", + ), +] + + +@pytest.mark.parametrize("test", pytest_params(NUMERIC_VALIDATION_TESTS)) +def test_jsonSchema_numeric_validation(collection, test): + """Test $jsonSchema numeric validation keywords.""" + collection.insert_many(test.doc) + result = execute_command(collection, {"find": collection.name, "filter": test.filter}) + assertSuccess(result, test.expected) + + +ARRAY_VALIDATION_TESTS: list[QueryTestCase] = [ + QueryTestCase( + id="array_minItems_matches", + filter={"$jsonSchema": {"properties": {"x": {"minItems": 2}}}}, + doc=[{"_id": 1, "x": [1, 2, 3]}, {"_id": 2, "x": [1]}], + expected=[{"_id": 1, "x": [1, 2, 3]}], + msg="minItems should match arrays with sufficient elements", + ), + QueryTestCase( + id="array_maxItems_matches", + filter={"$jsonSchema": {"properties": {"x": {"maxItems": 3}}}}, + doc=[{"_id": 1, "x": [1, 2, 3]}, {"_id": 2, "x": [1, 2, 3, 4]}], + expected=[{"_id": 1, "x": [1, 2, 3]}], + msg="maxItems should match arrays within limit", + ), + QueryTestCase( + id="array_minItems_zero_matches_empty", + filter={"$jsonSchema": {"properties": {"x": {"minItems": 0}}}}, + doc=[{"_id": 1, "x": []}, {"_id": 2, "x": [1]}], + expected=[{"_id": 1, "x": []}, {"_id": 2, "x": [1]}], + msg="minItems 0 should match empty array", + ), + QueryTestCase( + id="array_minItems_maxItems_zero_only_empty", + filter={"$jsonSchema": {"properties": {"x": {"minItems": 0, "maxItems": 0}}}}, + doc=[{"_id": 1, "x": []}, {"_id": 2, "x": [1]}], + expected=[{"_id": 1, "x": []}], + msg="minItems 0 and maxItems 0 should match only empty array", + ), + QueryTestCase( + id="array_minItems_non_array_passes", + filter={"$jsonSchema": {"properties": {"x": {"minItems": 1}}}}, + doc=[{"_id": 1, "x": "hello"}, {"_id": 2, "x": [1, 2]}], + expected=[{"_id": 1, "x": "hello"}, {"_id": 2, "x": [1, 2]}], + msg="minItems on non-array field should pass", + ), + QueryTestCase( + id="array_minItems_missing_field_passes", + filter={"$jsonSchema": {"properties": {"x": {"minItems": 1}}}}, + doc=[{"_id": 1, "x": [1, 2]}, {"_id": 2}], + expected=[{"_id": 1, "x": [1, 2]}, {"_id": 2}], + msg="minItems should pass when field is missing", + ), + QueryTestCase( + id="array_uniqueItems_true_matches", + filter={"$jsonSchema": {"properties": {"x": {"uniqueItems": True}}}}, + doc=[{"_id": 1, "x": [1, 2, 3]}, {"_id": 2, "x": [1, 1, 2]}], + expected=[{"_id": 1, "x": [1, 2, 3]}], + msg="uniqueItems true should match arrays with all distinct elements", + ), + QueryTestCase( + id="array_uniqueItems_false_allows_duplicates", + filter={"$jsonSchema": {"properties": {"x": {"uniqueItems": False}}}}, + doc=[{"_id": 1, "x": [1, 1, 2]}, {"_id": 2, "x": [1, 2]}], + expected=[{"_id": 1, "x": [1, 1, 2]}, {"_id": 2, "x": [1, 2]}], + msg="uniqueItems false should allow arrays with duplicates", + ), + QueryTestCase( + id="array_uniqueItems_null_duplicates", + filter={"$jsonSchema": {"properties": {"x": {"uniqueItems": True}}}}, + doc=[{"_id": 1, "x": [None, None]}, {"_id": 2, "x": [None, 1]}], + expected=[{"_id": 2, "x": [None, 1]}], + msg="uniqueItems true should reject [null, null]", + ), + QueryTestCase( + id="array_uniqueItems_zero_false_distinct", + filter={"$jsonSchema": {"properties": {"x": {"uniqueItems": True}}}}, + doc=[{"_id": 1, "x": [0, False]}, {"_id": 2, "x": [0, 0]}], + expected=[{"_id": 1, "x": [0, False]}], + msg="uniqueItems true with [0, false] — distinct BSON types are unique", + ), + QueryTestCase( + id="array_uniqueItems_cross_type_numeric", + filter={"$jsonSchema": {"properties": {"x": {"uniqueItems": True}}}}, + doc=[{"_id": 1, "x": [1, Int64(1)]}, {"_id": 2, "x": [1, Int64(2)]}], + expected=[{"_id": 2, "x": [1, Int64(2)]}], + msg="uniqueItems should treat NumberInt(1) and NumberLong(1) as equivalent", + ), + QueryTestCase( + id="array_uniqueItems_non_array_passes", + filter={"$jsonSchema": {"properties": {"x": {"uniqueItems": True}}}}, + doc=[{"_id": 1, "x": 42}, {"_id": 2, "x": [1, 1]}], + expected=[{"_id": 1, "x": 42}], + msg="uniqueItems on non-array field should pass", + ), + QueryTestCase( + id="array_items_schema_matches", + filter={"$jsonSchema": {"properties": {"x": {"items": {"bsonType": "int"}}}}}, + doc=[{"_id": 1, "x": [1, 2, 3]}, {"_id": 2, "x": [1, "two", 3]}], + expected=[{"_id": 1, "x": [1, 2, 3]}], + msg="items with schema should validate all array elements", + ), + QueryTestCase( + id="array_items_empty_array_passes", + filter={"$jsonSchema": {"properties": {"x": {"items": {"bsonType": "string"}}}}}, + doc=[{"_id": 1, "x": []}, {"_id": 2, "x": [1]}], + expected=[{"_id": 1, "x": []}], + msg="items on empty array should pass (vacuously true)", + ), + QueryTestCase( + id="array_items_tuple_validation", + filter={ + "$jsonSchema": { + "properties": {"x": {"items": [{"bsonType": "string"}, {"bsonType": "int"}]}} + } + }, + doc=[{"_id": 1, "x": ["hello", 42]}, {"_id": 2, "x": [42, "hello"]}], + expected=[{"_id": 1, "x": ["hello", 42]}], + msg="items with array of schemas should do tuple validation", + ), + QueryTestCase( + id="array_items_tuple_shorter_array_passes", + filter={ + "$jsonSchema": { + "properties": {"x": {"items": [{"bsonType": "string"}, {"bsonType": "int"}]}} + } + }, + doc=[{"_id": 1, "x": ["hello"]}, {"_id": 2, "x": [42]}], + expected=[{"_id": 1, "x": ["hello"]}], + msg="Tuple items with shorter array should pass", + ), + QueryTestCase( + id="array_additionalItems_false_rejects_extra", + filter={ + "$jsonSchema": { + "properties": {"x": {"items": [{"bsonType": "string"}], "additionalItems": False}} + } + }, + doc=[{"_id": 1, "x": ["hello"]}, {"_id": 2, "x": ["hello", 42]}], + expected=[{"_id": 1, "x": ["hello"]}], + msg="additionalItems false should reject extra elements", + ), + QueryTestCase( + id="array_additionalItems_true_allows_extra", + filter={ + "$jsonSchema": { + "properties": {"x": {"items": [{"bsonType": "string"}], "additionalItems": True}} + } + }, + doc=[{"_id": 1, "x": ["hello", 42]}, {"_id": 2, "x": ["hello"]}], + expected=[{"_id": 1, "x": ["hello", 42]}, {"_id": 2, "x": ["hello"]}], + msg="additionalItems true should allow extra elements", + ), + QueryTestCase( + id="array_additionalItems_schema_validates_extra", + filter={ + "$jsonSchema": { + "properties": { + "x": {"items": [{"bsonType": "string"}], "additionalItems": {"bsonType": "int"}} + } + } + }, + doc=[{"_id": 1, "x": ["hello", 42]}, {"_id": 2, "x": ["hello", "extra"]}], + expected=[{"_id": 1, "x": ["hello", 42]}], + msg="additionalItems as schema should validate extra elements", + ), +] + + +@pytest.mark.parametrize("test", pytest_params(ARRAY_VALIDATION_TESTS)) +def test_jsonSchema_array_validation(collection, test): + """Test $jsonSchema array validation keywords.""" + collection.insert_many(test.doc) + result = execute_command(collection, {"find": collection.name, "filter": test.filter}) + assertSuccess(result, test.expected) diff --git a/documentdb_tests/compatibility/tests/core/operator/query/misc/jsonSchema/test_jsonSchema_keyword_combinations.py b/documentdb_tests/compatibility/tests/core/operator/query/misc/jsonSchema/test_jsonSchema_keyword_combinations.py new file mode 100644 index 00000000..40e85a6c --- /dev/null +++ b/documentdb_tests/compatibility/tests/core/operator/query/misc/jsonSchema/test_jsonSchema_keyword_combinations.py @@ -0,0 +1,457 @@ +""" +Tests for $jsonSchema keyword combinations and logical composition. + +Validates combined keyword usage, enum matching, type-inapplicable keyword behavior, +collation, allOf, anyOf, oneOf, not, nested logical composition, and $jsonSchema +combined with query-level logical operators ($and, $or, $nor). +""" + +import pytest + +from documentdb_tests.compatibility.tests.core.operator.query.utils.query_test_case import ( + QueryTestCase, +) +from documentdb_tests.framework.assertions import assertSuccess +from documentdb_tests.framework.executor import execute_command +from documentdb_tests.framework.parametrize import pytest_params + +KEYWORD_COMBINATION_TESTS: list[QueryTestCase] = [ + QueryTestCase( + id="bsontype_with_minimum", + filter={"$jsonSchema": {"properties": {"x": {"bsonType": "int", "minimum": 5}}}}, + doc=[{"_id": 1, "x": 10}, {"_id": 2, "x": 3}, {"_id": 3, "x": "hello"}], + expected=[{"_id": 1, "x": 10}], + msg="bsonType int combined with minimum should filter both", + ), + QueryTestCase( + id="bsontype_string_with_length", + filter={ + "$jsonSchema": { + "properties": {"x": {"bsonType": "string", "minLength": 1, "maxLength": 100}} + } + }, + doc=[{"_id": 1, "x": "abc"}, {"_id": 2, "x": ""}, {"_id": 3, "x": 42}], + expected=[{"_id": 1, "x": "abc"}], + msg="bsonType string combined with minLength and maxLength should filter both", + ), + QueryTestCase( + id="bsontype_array_with_items", + filter={ + "$jsonSchema": { + "properties": { + "x": {"bsonType": "array", "minItems": 1, "items": {"bsonType": "int"}} + } + } + }, + doc=[{"_id": 1, "x": [1, 2]}, {"_id": 2, "x": []}, {"_id": 3, "x": "not_array"}], + expected=[{"_id": 1, "x": [1, 2]}], + msg="bsonType array combined with minItems and items should filter all", + ), + QueryTestCase( + id="required_properties_additionalProperties", + filter={ + "$jsonSchema": { + "required": ["a", "b"], + "properties": {"_id": {}, "a": {"bsonType": "string"}, "b": {"bsonType": "int"}}, + "additionalProperties": False, + } + }, + doc=[ + {"_id": 1, "a": "hello", "b": 42}, + {"_id": 2, "a": "hello"}, + {"_id": 3, "a": "hello", "b": 42, "c": "extra"}, + ], + expected=[{"_id": 1, "a": "hello", "b": 42}], + msg="required + properties + additionalProperties should all apply", + ), + QueryTestCase( + id="not_null_pattern", + filter={"$jsonSchema": {"properties": {"x": {"not": {"bsonType": "null"}}}}}, + doc=[{"_id": 1, "x": "hello"}, {"_id": 2, "x": None}, {"_id": 3, "x": 42}], + expected=[{"_id": 1, "x": "hello"}, {"_id": 3, "x": 42}], + msg="not bsonType null should reject null fields", + ), + QueryTestCase( + id="enum_matches", + filter={"$jsonSchema": {"properties": {"x": {"enum": [1, 2, 3]}}}}, + doc=[{"_id": 1, "x": 1}, {"_id": 2, "x": 4}, {"_id": 3, "x": 2}], + expected=[{"_id": 1, "x": 1}, {"_id": 3, "x": 2}], + msg="enum should match values in the enum list", + ), + QueryTestCase( + id="enum_mixed_types", + filter={"$jsonSchema": {"properties": {"x": {"enum": ["a", None, 1]}}}}, + doc=[ + {"_id": 1, "x": "a"}, + {"_id": 2, "x": None}, + {"_id": 3, "x": 1}, + {"_id": 4, "x": True}, + ], + expected=[{"_id": 1, "x": "a"}, {"_id": 2, "x": None}, {"_id": 3, "x": 1}], + msg="enum with mixed types should match string, null, and int", + ), + QueryTestCase( + id="enum_single_value", + filter={"$jsonSchema": {"properties": {"x": {"enum": [1]}}}}, + doc=[{"_id": 1, "x": 1}, {"_id": 2, "x": 2}], + expected=[{"_id": 1, "x": 1}], + msg="enum with single value should match only that value", + ), + QueryTestCase( + id="non_applicable_minimum_on_string", + filter={"$jsonSchema": {"properties": {"x": {"minimum": 5}}}}, + doc=[{"_id": 1, "x": "hello"}, {"_id": 2, "x": 10}], + expected=[{"_id": 1, "x": "hello"}, {"_id": 2, "x": 10}], + msg="minimum on string field should pass (does not apply)", + ), + QueryTestCase( + id="non_applicable_minLength_on_int", + filter={"$jsonSchema": {"properties": {"x": {"minLength": 1}}}}, + doc=[{"_id": 1, "x": 42}, {"_id": 2, "x": "abc"}], + expected=[{"_id": 1, "x": 42}, {"_id": 2, "x": "abc"}], + msg="minLength on int field should pass (does not apply)", + ), + QueryTestCase( + id="non_applicable_minItems_on_string", + filter={"$jsonSchema": {"properties": {"x": {"minItems": 1}}}}, + doc=[{"_id": 1, "x": "hello"}, {"_id": 2, "x": [1, 2]}], + expected=[{"_id": 1, "x": "hello"}, {"_id": 2, "x": [1, 2]}], + msg="minItems on string field should pass (does not apply)", + ), + QueryTestCase( + id="non_applicable_minProperties_on_array", + filter={"$jsonSchema": {"properties": {"x": {"minProperties": 1}}}}, + doc=[{"_id": 1, "x": [1, 2]}, {"_id": 2, "x": {"a": 1, "b": 2}}], + expected=[{"_id": 1, "x": [1, 2]}, {"_id": 2, "x": {"a": 1, "b": 2}}], + msg="minProperties on array field should pass (does not apply)", + ), + QueryTestCase( + id="non_applicable_required_on_non_object", + filter={"$jsonSchema": {"properties": {"x": {"required": ["a"]}}}}, + doc=[{"_id": 1, "x": "hello"}, {"_id": 2, "x": {"a": 1}}], + expected=[{"_id": 1, "x": "hello"}, {"_id": 2, "x": {"a": 1}}], + msg="required on non-object field should pass (does not apply)", + ), + QueryTestCase( + id="enum_with_bsontype", + filter={"$jsonSchema": {"properties": {"x": {"bsonType": "int", "enum": [1, "a"]}}}}, + doc=[{"_id": 1, "x": 1}, {"_id": 2, "x": "a"}, {"_id": 3, "x": 2}], + expected=[{"_id": 1, "x": 1}], + msg="bsonType + enum should both apply — only int 1 passes both", + ), + QueryTestCase( + id="not_required", + filter={"$jsonSchema": {"properties": {"x": {"not": {"required": ["a"]}}}}}, + doc=[{"_id": 1, "x": {"a": 1}}, {"_id": 2, "x": {"b": 1}}, {"_id": 3, "x": "str"}], + expected=[{"_id": 2, "x": {"b": 1}}], + msg="not required should match objects where nested field is absent", + ), + QueryTestCase( + id="dependencies_with_required", + filter={"$jsonSchema": {"required": ["a"], "dependencies": {"a": ["b"]}}}, + doc=[{"_id": 1, "a": 1, "b": 2}, {"_id": 2, "a": 1}, {"_id": 3, "b": 2}], + expected=[{"_id": 1, "a": 1, "b": 2}], + msg="required + dependencies — a must exist and b must follow", + ), + QueryTestCase( + id="patternProperties_with_additionalProperties_false", + filter={ + "$jsonSchema": { + "properties": {"_id": {}}, + "patternProperties": {"^s_": {"bsonType": "string"}}, + "additionalProperties": False, + } + }, + doc=[ + {"_id": 1, "s_name": "hello"}, + {"_id": 2, "s_name": "hello", "other": 1}, + {"_id": 3, "s_name": 42}, + ], + expected=[{"_id": 1, "s_name": "hello"}], + msg="patternProperties + additionalProperties:false — only matched fields allowed", + ), + QueryTestCase( + id="items_minItems_maxItems", + filter={ + "$jsonSchema": { + "properties": {"x": {"items": {"minimum": 0}, "minItems": 1, "maxItems": 3}} + } + }, + doc=[ + {"_id": 1, "x": [1, 2]}, + {"_id": 2, "x": []}, + {"_id": 3, "x": [1, 2, 3, 4]}, + {"_id": 4, "x": [-1, 2]}, + ], + expected=[{"_id": 1, "x": [1, 2]}], + msg="items + minItems + maxItems — all array constraints together", + ), + QueryTestCase( + id="required_with_oneOf_in_properties", + filter={ + "$jsonSchema": { + "required": ["x"], + "properties": {"x": {"oneOf": [{"bsonType": "string"}, {"bsonType": "int"}]}}, + } + }, + doc=[{"_id": 1, "x": "hello"}, {"_id": 2, "x": [1]}, {"_id": 3}], + expected=[{"_id": 1, "x": "hello"}], + msg="required + oneOf — field must exist and satisfy exactly one", + ), + QueryTestCase( + id="not_enum", + filter={"$jsonSchema": {"properties": {"x": {"not": {"enum": [1, 2]}}}}}, + doc=[{"_id": 1, "x": 1}, {"_id": 2, "x": 3}, {"_id": 3, "x": "a"}], + expected=[{"_id": 2, "x": 3}, {"_id": 3, "x": "a"}], + msg="not enum should match anything NOT in the list", + ), + QueryTestCase( + id="multiple_jsonschema_in_and", + filter={ + "$and": [ + {"$jsonSchema": {"properties": {"x": {"bsonType": "int"}}}}, + {"$jsonSchema": {"properties": {"x": {"minimum": 5}}}}, + ] + }, + doc=[{"_id": 1, "x": 10}, {"_id": 2, "x": 3}, {"_id": 3, "x": "a"}], + expected=[{"_id": 1, "x": 10}], + msg="Multiple $jsonSchema in $and — both must pass", + ), +] + + +@pytest.mark.parametrize("test", pytest_params(KEYWORD_COMBINATION_TESTS)) +def test_jsonSchema_keyword_combinations(collection, test): + """Test $jsonSchema keyword combinations and non-applicable type behavior.""" + collection.insert_many(test.doc) + result = execute_command(collection, {"find": collection.name, "filter": test.filter}) + assertSuccess(result, test.expected) + + +LOGICAL_COMPOSITION_TESTS: list[QueryTestCase] = [ + QueryTestCase( + id="allOf_both_match", + filter={"$jsonSchema": {"allOf": [{"required": ["a"]}, {"required": ["b"]}]}}, + doc=[{"_id": 1, "a": "hello", "b": 1}, {"_id": 2, "a": "hello"}, {"_id": 3, "b": 1}], + expected=[{"_id": 1, "a": "hello", "b": 1}], + msg="allOf should match documents satisfying all sub-schemas", + ), + QueryTestCase( + id="allOf_conflicting_no_match", + filter={ + "$jsonSchema": { + "allOf": [ + {"properties": {"x": {"bsonType": "string"}}}, + {"properties": {"x": {"bsonType": "int"}}}, + ] + } + }, + doc=[{"_id": 1, "x": "hello"}, {"_id": 2, "x": 42}], + expected=[], + msg="allOf with conflicting schemas should match nothing", + ), + QueryTestCase( + id="allOf_single_element", + filter={"$jsonSchema": {"allOf": [{"required": ["a"]}]}}, + doc=[{"_id": 1, "a": 1}, {"_id": 2}], + expected=[{"_id": 1, "a": 1}], + msg="allOf with single sub-schema should behave like that schema alone", + ), + QueryTestCase( + id="anyOf_matches_either", + filter={ + "$jsonSchema": { + "anyOf": [ + {"properties": {"x": {"bsonType": "string"}}}, + {"properties": {"x": {"bsonType": "int"}}}, + ] + } + }, + doc=[{"_id": 1, "x": "hello"}, {"_id": 2, "x": 42}, {"_id": 3, "x": [1]}], + expected=[{"_id": 1, "x": "hello"}, {"_id": 2, "x": 42}], + msg="anyOf should match documents satisfying at least one sub-schema", + ), + QueryTestCase( + id="anyOf_none_match", + filter={ + "$jsonSchema": { + "anyOf": [ + {"properties": {"x": {"bsonType": "string"}}}, + {"properties": {"x": {"bsonType": "int"}}}, + ] + } + }, + doc=[{"_id": 1, "x": [1]}, {"_id": 2, "x": True}], + expected=[], + msg="anyOf should reject documents satisfying no sub-schemas", + ), + QueryTestCase( + id="oneOf_exactly_one", + filter={ + "$jsonSchema": {"properties": {"x": {"oneOf": [{"multipleOf": 3}, {"multipleOf": 5}]}}} + }, + doc=[{"_id": 1, "x": 3}, {"_id": 2, "x": 5}, {"_id": 3, "x": 15}, {"_id": 4, "x": 7}], + expected=[{"_id": 1, "x": 3}, {"_id": 2, "x": 5}], + msg="oneOf should match documents satisfying exactly one sub-schema", + ), + QueryTestCase( + id="oneOf_both_match_rejected", + filter={ + "$jsonSchema": {"properties": {"x": {"oneOf": [{"multipleOf": 3}, {"multipleOf": 5}]}}} + }, + doc=[{"_id": 1, "x": 15}, {"_id": 2, "x": 3}], + expected=[{"_id": 2, "x": 3}], + msg="oneOf should reject documents satisfying more than one sub-schema", + ), + QueryTestCase( + id="oneOf_none_match_rejected", + filter={ + "$jsonSchema": {"properties": {"x": {"oneOf": [{"multipleOf": 3}, {"multipleOf": 5}]}}} + }, + doc=[{"_id": 1, "x": 7}], + expected=[], + msg="oneOf should reject documents satisfying zero sub-schemas", + ), + QueryTestCase( + id="not_matches_non_matching", + filter={"$jsonSchema": {"properties": {"x": {"not": {"bsonType": "string"}}}}}, + doc=[{"_id": 1, "x": "hello"}, {"_id": 2, "x": 42}], + expected=[{"_id": 2, "x": 42}], + msg="not should match documents that do not satisfy the sub-schema", + ), + QueryTestCase( + id="not_empty_matches_none", + filter={"$jsonSchema": {"not": {}}}, + doc=[{"_id": 1, "x": 1}, {"_id": 2, "x": "a"}], + expected=[], + msg="not with empty schema should reject all documents", + ), + QueryTestCase( + id="double_negation", + filter={"$jsonSchema": {"properties": {"x": {"not": {"not": {"bsonType": "string"}}}}}}, + doc=[{"_id": 1, "x": "hello"}, {"_id": 2, "x": 42}], + expected=[{"_id": 1, "x": "hello"}], + msg="not-not should be equivalent to the original schema", + ), + QueryTestCase( + id="nested_allOf_within_anyOf", + filter={ + "$jsonSchema": { + "properties": { + "x": { + "anyOf": [ + {"allOf": [{"minimum": 5}, {"maximum": 8}]}, + {"allOf": [{"minimum": 9}, {"maximum": 11}]}, + ] + } + } + } + }, + doc=[{"_id": 1, "x": 6}, {"_id": 2, "x": 10}, {"_id": 3, "x": 7}], + expected=[{"_id": 1, "x": 6}, {"_id": 2, "x": 10}, {"_id": 3, "x": 7}], + msg="Nested allOf within anyOf should work correctly", + ), + QueryTestCase( + id="find_with_and", + filter={ + "$and": [ + {"$jsonSchema": {"properties": {"x": {"bsonType": "int"}}}}, + {"x": {"$gt": 5}}, + ] + }, + doc=[{"_id": 1, "x": 10}, {"_id": 2, "x": 3}, {"_id": 3, "x": "a"}], + expected=[{"_id": 1, "x": 10}], + msg="$jsonSchema combined with $and should work", + ), + QueryTestCase( + id="find_with_or", + filter={ + "$or": [ + {"$jsonSchema": {"properties": {"x": {"bsonType": "string"}}}}, + {"$jsonSchema": {"properties": {"x": {"bsonType": "int"}}}}, + ] + }, + doc=[{"_id": 1, "x": "hello"}, {"_id": 2, "x": 42}, {"_id": 3, "x": [1]}], + expected=[{"_id": 1, "x": "hello"}, {"_id": 2, "x": 42}], + msg="$jsonSchema combined with $or should work", + ), + QueryTestCase( + id="find_with_nor", + filter={"$nor": [{"$jsonSchema": {"properties": {"x": {"bsonType": "string"}}}}]}, + doc=[{"_id": 1, "x": "hello"}, {"_id": 2, "x": 42}], + expected=[{"_id": 2, "x": 42}], + msg="$jsonSchema combined with $nor should work", + ), + QueryTestCase( + id="find_implicit_and", + filter={"$jsonSchema": {"properties": {"x": {"bsonType": "int"}}}, "x": {"$gt": 5}}, + doc=[{"_id": 1, "x": 10}, {"_id": 2, "x": 3}, {"_id": 3, "x": "a"}], + expected=[{"_id": 1, "x": 10}], + msg="$jsonSchema with sibling field predicate (implicit $and) should work", + ), +] + + +@pytest.mark.parametrize("test", pytest_params(LOGICAL_COMPOSITION_TESTS)) +def test_jsonSchema_logical_composition(collection, test): + """Test $jsonSchema logical composition keywords.""" + collection.insert_many(test.doc) + result = execute_command(collection, {"find": collection.name, "filter": test.filter}) + assertSuccess(result, test.expected) + + +def test_jsonSchema_enum_ignores_collation(database_client): + """Test $jsonSchema enum does not respect collection-default case-insensitive collation.""" + coll_name = "test_collation_enum" + database_client.drop_collection(coll_name) + database_client.command( + { + "create": coll_name, + "collation": {"locale": "en", "strength": 2}, + } + ) + try: + coll = database_client[coll_name] + coll.insert_many([{"_id": 1, "x": "ABC"}, {"_id": 2, "x": "abc"}, {"_id": 3, "x": "def"}]) + result = execute_command( + coll, + { + "find": coll_name, + "filter": {"$jsonSchema": {"properties": {"x": {"enum": ["abc"]}}}}, + }, + ) + # $jsonSchema enum does NOT respect collation — only exact match + assertSuccess(result, [{"_id": 2, "x": "abc"}]) + finally: + database_client.drop_collection(coll_name) + + +def test_jsonSchema_uniqueItems_ignores_collation(database_client): + """Test $jsonSchema uniqueItems does not respect case-insensitive collation.""" + coll_name = "test_collation_uniqueItems" + database_client.drop_collection(coll_name) + database_client.command( + { + "create": coll_name, + "collation": {"locale": "en", "strength": 2}, + } + ) + try: + coll = database_client[coll_name] + coll.insert_many( + [ + {"_id": 1, "x": ["abc", "ABC"]}, + {"_id": 2, "x": ["abc", "def"]}, + ] + ) + result = execute_command( + coll, + { + "find": coll_name, + "filter": {"$jsonSchema": {"properties": {"x": {"uniqueItems": True}}}}, + }, + ) + # uniqueItems uses binary comparison, ignores collation — "abc" and "ABC" are distinct + assertSuccess(result, [{"_id": 1, "x": ["abc", "ABC"]}, {"_id": 2, "x": ["abc", "def"]}]) + finally: + database_client.drop_collection(coll_name) diff --git a/documentdb_tests/compatibility/tests/core/operator/query/misc/jsonSchema/test_jsonSchema_keyword_validation.py b/documentdb_tests/compatibility/tests/core/operator/query/misc/jsonSchema/test_jsonSchema_keyword_validation.py new file mode 100644 index 00000000..f287cc8b --- /dev/null +++ b/documentdb_tests/compatibility/tests/core/operator/query/misc/jsonSchema/test_jsonSchema_keyword_validation.py @@ -0,0 +1,282 @@ +""" +Tests for $jsonSchema keyword value validation. + +Verifies that each $jsonSchema keyword rejects invalid BSON types for its value +with expected error codes and accepts valid BSON types without error. Each keyword +is tested at both root level and nested inside properties. +""" + +import pytest + +from documentdb_tests.framework.assertions import assertFailureCode, assertNotError +from documentdb_tests.framework.bson_type_validator import ( + BsonType, + BsonTypeTestCase, + generate_bson_acceptance_test_cases, + generate_bson_rejection_test_cases, +) +from documentdb_tests.framework.error_codes import BAD_VALUE_ERROR, FAILED_TO_PARSE_ERROR +from documentdb_tests.framework.executor import execute_command + +JSONSCHEMA_PARAMS = [ + BsonTypeTestCase( + id="required", + msg="required should reject non-array types", + keyword="required", + valid_types=[BsonType.ARRAY], + error_code_overrides={BsonType.EMPTY_ARRAY: FAILED_TO_PARSE_ERROR}, + valid_inputs={BsonType.ARRAY: ["x"]}, + ), + BsonTypeTestCase( + id="properties", + msg="properties should reject non-object types", + keyword="properties", + valid_types=[BsonType.OBJECT, BsonType.EMPTY_OBJECT], + valid_inputs={BsonType.OBJECT: {"x": {}}, BsonType.EMPTY_OBJECT: {}}, + ), + BsonTypeTestCase( + id="additionalProperties", + msg="additionalProperties should reject non-bool/object types", + keyword="additionalProperties", + valid_types=[BsonType.BOOL, BsonType.OBJECT, BsonType.EMPTY_OBJECT], + valid_inputs={BsonType.OBJECT: {"bsonType": "string"}}, + ), + BsonTypeTestCase( + id="bsonType", + msg="bsonType should reject non-string/array types", + keyword="bsonType", + valid_types=[BsonType.STRING, BsonType.ARRAY], + error_code_overrides={BsonType.EMPTY_ARRAY: FAILED_TO_PARSE_ERROR}, + valid_inputs={BsonType.STRING: "object", BsonType.ARRAY: ["string", "int"]}, + ), + BsonTypeTestCase( + id="type", + msg="type should reject non-string/array types", + keyword="type", + valid_types=[BsonType.STRING, BsonType.ARRAY], + error_code_overrides={BsonType.EMPTY_ARRAY: FAILED_TO_PARSE_ERROR}, + valid_inputs={BsonType.STRING: "object", BsonType.ARRAY: ["string", "number"]}, + ), + BsonTypeTestCase( + id="enum", + msg="enum should reject non-array types", + keyword="enum", + valid_types=[BsonType.ARRAY], + error_code_overrides={BsonType.EMPTY_ARRAY: FAILED_TO_PARSE_ERROR}, + valid_inputs={BsonType.ARRAY: [1, 2, 3]}, + ), + BsonTypeTestCase( + id="title", + msg="title should reject non-string types", + keyword="title", + valid_types=[BsonType.STRING], + ), + BsonTypeTestCase( + id="description", + msg="description should reject non-string types", + keyword="description", + valid_types=[BsonType.STRING], + ), + BsonTypeTestCase( + id="minimum", + msg="minimum should reject non-numeric types", + keyword="minimum", + valid_types=[BsonType.DOUBLE, BsonType.INT, BsonType.LONG, BsonType.DECIMAL], + ), + BsonTypeTestCase( + id="maximum", + msg="maximum should reject non-numeric types", + keyword="maximum", + valid_types=[BsonType.DOUBLE, BsonType.INT, BsonType.LONG, BsonType.DECIMAL], + ), + BsonTypeTestCase( + id="exclusiveMinimum", + msg="exclusiveMinimum should reject non-bool types", + keyword="exclusiveMinimum", + valid_types=[BsonType.BOOL], + requires={"minimum": 0}, + ), + BsonTypeTestCase( + id="exclusiveMaximum", + msg="exclusiveMaximum should reject non-bool types", + keyword="exclusiveMaximum", + valid_types=[BsonType.BOOL], + requires={"maximum": 100}, + ), + BsonTypeTestCase( + id="minLength", + msg="minLength should reject non-integer types", + keyword="minLength", + valid_types=[BsonType.INT, BsonType.LONG], + default_error_code=FAILED_TO_PARSE_ERROR, + ), + BsonTypeTestCase( + id="maxLength", + msg="maxLength should reject non-integer types", + keyword="maxLength", + valid_types=[BsonType.INT, BsonType.LONG], + default_error_code=FAILED_TO_PARSE_ERROR, + ), + BsonTypeTestCase( + id="pattern", + msg="pattern should reject non-string types", + keyword="pattern", + valid_types=[BsonType.STRING], + ), + BsonTypeTestCase( + id="minItems", + msg="minItems should reject non-integer types", + keyword="minItems", + valid_types=[BsonType.INT, BsonType.LONG], + default_error_code=FAILED_TO_PARSE_ERROR, + ), + BsonTypeTestCase( + id="maxItems", + msg="maxItems should reject non-integer types", + keyword="maxItems", + valid_types=[BsonType.INT, BsonType.LONG], + default_error_code=FAILED_TO_PARSE_ERROR, + ), + BsonTypeTestCase( + id="uniqueItems", + msg="uniqueItems should reject non-bool types", + keyword="uniqueItems", + valid_types=[BsonType.BOOL], + ), + BsonTypeTestCase( + id="items", + msg="items should reject non-object/array types", + keyword="items", + valid_types=[BsonType.OBJECT, BsonType.ARRAY, BsonType.EMPTY_ARRAY, BsonType.EMPTY_OBJECT], + valid_inputs={BsonType.OBJECT: {"bsonType": "string"}, BsonType.ARRAY: [{}]}, + ), + BsonTypeTestCase( + id="minProperties", + msg="minProperties should reject non-integer types", + keyword="minProperties", + valid_types=[BsonType.INT, BsonType.LONG], + default_error_code=FAILED_TO_PARSE_ERROR, + ), + BsonTypeTestCase( + id="maxProperties", + msg="maxProperties should reject non-integer types", + keyword="maxProperties", + valid_types=[BsonType.INT, BsonType.LONG], + default_error_code=FAILED_TO_PARSE_ERROR, + ), + BsonTypeTestCase( + id="allOf", + msg="allOf should reject non-array types", + keyword="allOf", + valid_types=[BsonType.ARRAY], + error_code_overrides={BsonType.EMPTY_ARRAY: BAD_VALUE_ERROR}, + valid_inputs={BsonType.ARRAY: [{}]}, + ), + BsonTypeTestCase( + id="anyOf", + msg="anyOf should reject non-array types", + keyword="anyOf", + valid_types=[BsonType.ARRAY], + error_code_overrides={BsonType.EMPTY_ARRAY: BAD_VALUE_ERROR}, + valid_inputs={BsonType.ARRAY: [{}]}, + ), + BsonTypeTestCase( + id="oneOf", + msg="oneOf should reject non-array types", + keyword="oneOf", + valid_types=[BsonType.ARRAY], + error_code_overrides={BsonType.EMPTY_ARRAY: BAD_VALUE_ERROR}, + valid_inputs={BsonType.ARRAY: [{}]}, + ), + BsonTypeTestCase( + id="not", + msg="not should reject non-object types", + keyword="not", + valid_types=[BsonType.OBJECT, BsonType.EMPTY_OBJECT], + valid_inputs={BsonType.OBJECT: {"bsonType": "string"}}, + ), + BsonTypeTestCase( + id="dependencies", + msg="dependencies should reject non-object types", + keyword="dependencies", + valid_types=[BsonType.OBJECT, BsonType.EMPTY_OBJECT], + valid_inputs={BsonType.OBJECT: {"x": ["y"]}}, + ), + BsonTypeTestCase( + id="patternProperties", + msg="patternProperties should reject non-object types", + keyword="patternProperties", + valid_types=[BsonType.OBJECT, BsonType.EMPTY_OBJECT], + valid_inputs={BsonType.OBJECT: {"^x": {}}}, + ), + BsonTypeTestCase( + id="multipleOf", + msg="multipleOf should reject non-numeric types", + keyword="multipleOf", + valid_types=[BsonType.DOUBLE, BsonType.INT, BsonType.LONG, BsonType.DECIMAL], + ), + BsonTypeTestCase( + id="additionalItems", + msg="additionalItems should reject non-bool/object types", + keyword="additionalItems", + valid_types=[BsonType.BOOL, BsonType.OBJECT, BsonType.EMPTY_OBJECT], + valid_inputs={BsonType.OBJECT: {"bsonType": "string"}}, + ), +] + +TEST_CASES = generate_bson_rejection_test_cases(JSONSCHEMA_PARAMS) + + +@pytest.mark.parametrize( + "bson_type,sample_value,spec", + TEST_CASES, +) +def test_jsonschema_bson_type_root(collection, bson_type, sample_value, spec): + """Test $jsonSchema rejects invalid BSON types at root level.""" + schema = {spec.keyword: sample_value, **(spec.requires or {})} + result = execute_command( + collection, {"find": collection.name, "filter": {"$jsonSchema": schema}} + ) + assertFailureCode(result, spec.expected_code(bson_type), msg=spec.msg) + + +@pytest.mark.parametrize( + "bson_type,sample_value,spec", + TEST_CASES, +) +def test_jsonschema_bson_type_nested(collection, bson_type, sample_value, spec): + """Test $jsonSchema rejects invalid BSON types nested in properties.""" + schema = {"properties": {"x": {spec.keyword: sample_value, **(spec.requires or {})}}} + result = execute_command( + collection, {"find": collection.name, "filter": {"$jsonSchema": schema}} + ) + assertFailureCode(result, spec.expected_code(bson_type), msg=spec.msg) + + +ACCEPTANCE_CASES = generate_bson_acceptance_test_cases(JSONSCHEMA_PARAMS) + + +@pytest.mark.parametrize( + "bson_type,sample_value,spec", + ACCEPTANCE_CASES, +) +def test_jsonschema_bson_type_accepted_root(collection, bson_type, sample_value, spec): + """Test $jsonSchema accepts valid BSON types at root level.""" + schema = {spec.keyword: sample_value, **(spec.requires or {})} + result = execute_command( + collection, {"find": collection.name, "filter": {"$jsonSchema": schema}} + ) + assertNotError(result, msg=f"{spec.keyword} should accept {bson_type.value} at root") + + +@pytest.mark.parametrize( + "bson_type,sample_value,spec", + ACCEPTANCE_CASES, +) +def test_jsonschema_bson_type_accepted_nested(collection, bson_type, sample_value, spec): + """Test $jsonSchema accepts valid BSON types nested in properties.""" + schema = {"properties": {"x": {spec.keyword: sample_value, **(spec.requires or {})}}} + result = execute_command( + collection, {"find": collection.name, "filter": {"$jsonSchema": schema}} + ) + assertNotError(result, msg=f"{spec.keyword} should accept {bson_type.value} nested") diff --git a/documentdb_tests/compatibility/tests/core/operator/query/misc/jsonSchema/test_jsonSchema_object_validation.py b/documentdb_tests/compatibility/tests/core/operator/query/misc/jsonSchema/test_jsonSchema_object_validation.py new file mode 100644 index 00000000..2674df9c --- /dev/null +++ b/documentdb_tests/compatibility/tests/core/operator/query/misc/jsonSchema/test_jsonSchema_object_validation.py @@ -0,0 +1,214 @@ +""" +Tests for $jsonSchema object validation keywords. + +Validates required, properties, additionalProperties, minProperties, maxProperties, +dependencies, patternProperties, null/missing handling, nested schemas, and field name edge cases. +""" + +import pytest + +from documentdb_tests.compatibility.tests.core.operator.query.utils.query_test_case import ( + QueryTestCase, +) +from documentdb_tests.framework.assertions import assertSuccess +from documentdb_tests.framework.executor import execute_command +from documentdb_tests.framework.parametrize import pytest_params + +OBJECT_VALIDATION_TESTS: list[QueryTestCase] = [ + QueryTestCase( + id="required_matches", + filter={"$jsonSchema": {"required": ["a", "b"]}}, + doc=[{"_id": 1, "a": 1, "b": 2}, {"_id": 2, "a": 1}], + expected=[{"_id": 1, "a": 1, "b": 2}], + msg="required should match documents with all required fields", + ), + QueryTestCase( + id="required_null_field_matches", + filter={"$jsonSchema": {"required": ["a"]}}, + doc=[{"_id": 1, "a": None}, {"_id": 2}], + expected=[{"_id": 1, "a": None}], + msg="required on field with null value should match", + ), + QueryTestCase( + id="properties_validates_type", + filter={"$jsonSchema": {"properties": {"a": {"bsonType": "string"}}}}, + doc=[{"_id": 1, "a": "hello"}, {"_id": 2, "a": 42}], + expected=[{"_id": 1, "a": "hello"}], + msg="properties should validate bsonType of specified fields", + ), + QueryTestCase( + id="properties_missing_field_passes", + filter={"$jsonSchema": {"properties": {"a": {"bsonType": "string"}}}}, + doc=[{"_id": 1, "a": "hello"}, {"_id": 2, "b": 42}], + expected=[{"_id": 1, "a": "hello"}, {"_id": 2, "b": 42}], + msg="properties on non-existent field should pass", + ), + QueryTestCase( + id="properties_null_field_type_check", + filter={"$jsonSchema": {"properties": {"a": {"bsonType": "null"}}}}, + doc=[{"_id": 1, "a": None}, {"_id": 2, "a": "hello"}], + expected=[{"_id": 1, "a": None}], + msg="properties bsonType null should match null field", + ), + QueryTestCase( + id="properties_nested", + filter={ + "$jsonSchema": {"properties": {"addr": {"properties": {"zip": {"bsonType": "string"}}}}} + }, + doc=[{"_id": 1, "addr": {"zip": "12345"}}, {"_id": 2, "addr": {"zip": 12345}}], + expected=[{"_id": 1, "addr": {"zip": "12345"}}], + msg="Nested properties should validate nested object fields", + ), + QueryTestCase( + id="properties_non_object_field_passes", + filter={ + "$jsonSchema": {"properties": {"addr": {"properties": {"zip": {"bsonType": "string"}}}}} + }, + doc=[{"_id": 1, "addr": "not_object"}, {"_id": 2, "addr": {"zip": "12345"}}], + expected=[{"_id": 1, "addr": "not_object"}, {"_id": 2, "addr": {"zip": "12345"}}], + msg="properties should not apply when field is not an object", + ), + QueryTestCase( + id="additionalProperties_false", + filter={"$jsonSchema": {"properties": {"_id": {}, "a": {}}, "additionalProperties": False}}, + doc=[{"_id": 1, "a": 1}, {"_id": 2, "a": 1, "b": 2}], + expected=[{"_id": 1, "a": 1}], + msg="additionalProperties false should reject extra fields", + ), + QueryTestCase( + id="additionalProperties_schema", + filter={ + "$jsonSchema": { + "properties": {"_id": {}, "a": {}}, + "additionalProperties": {"bsonType": "int"}, + } + }, + doc=[{"_id": 1, "a": 1, "b": 2}, {"_id": 2, "a": 1, "b": "str"}], + expected=[{"_id": 1, "a": 1, "b": 2}], + msg="additionalProperties as schema should validate extra fields", + ), + QueryTestCase( + id="minProperties_matches", + filter={"$jsonSchema": {"minProperties": 3}}, + doc=[{"_id": 1, "a": 1, "b": 2}, {"_id": 2, "a": 1}], + expected=[{"_id": 1, "a": 1, "b": 2}], + msg="minProperties should match documents with enough properties", + ), + QueryTestCase( + id="maxProperties_matches", + filter={"$jsonSchema": {"maxProperties": 2}}, + doc=[{"_id": 1, "a": 1, "b": 2}, {"_id": 2, "a": 1}], + expected=[{"_id": 2, "a": 1}], + msg="maxProperties should match documents within property limit", + ), + QueryTestCase( + id="dependencies_property", + filter={"$jsonSchema": {"dependencies": {"a": ["b"]}}}, + doc=[{"_id": 1, "a": 1, "b": 2}, {"_id": 2, "a": 1}, {"_id": 3, "c": 1}], + expected=[{"_id": 1, "a": 1, "b": 2}, {"_id": 3, "c": 1}], + msg="Property dependency — if a exists, b must exist", + ), + QueryTestCase( + id="dependencies_absent_passes", + filter={"$jsonSchema": {"dependencies": {"a": ["b"]}}}, + doc=[{"_id": 1, "b": 2}, {"_id": 2, "a": 1}], + expected=[{"_id": 1, "b": 2}], + msg="Dependency should pass when dependent field is absent", + ), + QueryTestCase( + id="dependencies_schema", + filter={"$jsonSchema": {"dependencies": {"a": {"properties": {"b": {"minimum": 0}}}}}}, + doc=[{"_id": 1, "a": 1, "b": 5}, {"_id": 2, "a": 1, "b": -1}, {"_id": 3, "c": 1}], + expected=[{"_id": 1, "a": 1, "b": 5}, {"_id": 3, "c": 1}], + msg="Schema dependency — if a exists, schema must be satisfied", + ), + QueryTestCase( + id="patternProperties_matches", + filter={"$jsonSchema": {"patternProperties": {"^S_": {"bsonType": "string"}}}}, + doc=[ + {"_id": 1, "S_name": "hello", "S_age": "young"}, + {"_id": 2, "S_name": 42, "other": "val"}, + ], + expected=[{"_id": 1, "S_name": "hello", "S_age": "young"}], + msg="patternProperties should validate fields matching regex pattern", + ), + QueryTestCase( + id="patternProperties_combined_with_properties", + filter={ + "$jsonSchema": { + "properties": {"name": {"bsonType": "string"}}, + "patternProperties": {"^S_": {"bsonType": "string"}}, + } + }, + doc=[{"_id": 1, "name": "hello", "S_val": "x"}, {"_id": 2, "name": 42, "S_val": "x"}], + expected=[{"_id": 1, "name": "hello", "S_val": "x"}], + msg="patternProperties combined with properties — both must be satisfied", + ), + QueryTestCase( + id="deep_nested_properties", + filter={ + "$jsonSchema": { + "properties": { + "a": {"properties": {"b": {"properties": {"c": {"bsonType": "string"}}}}} + } + } + }, + doc=[{"_id": 1, "a": {"b": {"c": "hello"}}}, {"_id": 2, "a": {"b": {"c": 42}}}], + expected=[{"_id": 1, "a": {"b": {"c": "hello"}}}], + msg="3 levels deep nested properties should work", + ), + QueryTestCase( + id="items_array_of_objects", + filter={ + "$jsonSchema": { + "properties": {"arr": {"items": {"properties": {"name": {"bsonType": "string"}}}}} + } + }, + doc=[{"_id": 1, "arr": [{"name": "a"}, {"name": "b"}]}, {"_id": 2, "arr": [{"name": 1}]}], + expected=[{"_id": 1, "arr": [{"name": "a"}, {"name": "b"}]}], + msg="items should validate objects within array", + ), + QueryTestCase( + id="items_array_of_arrays", + filter={"$jsonSchema": {"properties": {"matrix": {"items": {"bsonType": "array"}}}}}, + doc=[{"_id": 1, "matrix": [[1, 2], [3, 4]]}, {"_id": 2, "matrix": [[1, 2], "not_array"]}], + expected=[{"_id": 1, "matrix": [[1, 2], [3, 4]]}], + msg="items should validate arrays within array", + ), + QueryTestCase( + id="empty_field_name_in_required", + filter={"$jsonSchema": {"required": [""]}}, + doc=[{"_id": 1, "": "val"}, {"_id": 2, "a": 1}], + expected=[{"_id": 1, "": "val"}], + msg="required with empty string field name should work", + ), + QueryTestCase( + id="empty_field_name_in_properties", + filter={"$jsonSchema": {"properties": {"": {"bsonType": "string"}}}}, + doc=[{"_id": 1, "": "val"}, {"_id": 2, "": 42}], + expected=[{"_id": 1, "": "val"}], + msg="properties with empty string field name should validate bsonType", + ), + QueryTestCase( + id="required_dollar_prefixed_field", + filter={"$jsonSchema": {"required": ["$field"]}}, + doc=[{"_id": 1, "$field": "val"}, {"_id": 2, "a": 1}], + expected=[{"_id": 1, "$field": "val"}], + msg="required with dollar-prefixed field name should match", + ), + QueryTestCase( + id="properties_dollar_prefixed_field", + filter={"$jsonSchema": {"properties": {"$field": {"bsonType": "string"}}}}, + doc=[{"_id": 1, "$field": "val"}, {"_id": 2, "$field": 42}], + expected=[{"_id": 1, "$field": "val"}], + msg="properties with dollar-prefixed field should validate bsonType", + ), +] + + +@pytest.mark.parametrize("test", pytest_params(OBJECT_VALIDATION_TESTS)) +def test_jsonSchema_object_validation(collection, test): + """Test $jsonSchema object validation keywords.""" + collection.insert_many(test.doc) + result = execute_command(collection, {"find": collection.name, "filter": test.filter}) + assertSuccess(result, test.expected) diff --git a/documentdb_tests/compatibility/tests/core/operator/query/misc/jsonSchema/test_jsonSchema_type_filtering.py b/documentdb_tests/compatibility/tests/core/operator/query/misc/jsonSchema/test_jsonSchema_type_filtering.py new file mode 100644 index 00000000..0e248e69 --- /dev/null +++ b/documentdb_tests/compatibility/tests/core/operator/query/misc/jsonSchema/test_jsonSchema_type_filtering.py @@ -0,0 +1,435 @@ +""" +Tests for $jsonSchema document type matching. + +Verifies that bsonType and type keywords correctly filter documents based on +field types, including single-type matching, array-of-types syntax, special +cases, and type distinction between similar BSON types. +""" + +from datetime import datetime, timezone + +import pytest +from bson import Code, Decimal128, Int64, MaxKey, MinKey, ObjectId, Regex, Timestamp + +from documentdb_tests.compatibility.tests.core.operator.query.utils.query_test_case import ( + QueryTestCase, +) +from documentdb_tests.framework.assertions import assertSuccess +from documentdb_tests.framework.executor import execute_command +from documentdb_tests.framework.parametrize import pytest_params + +TRIVIAL_SCHEMA_TESTS: list[QueryTestCase] = [ + QueryTestCase( + id="trivial_valid_object_schema", + filter={"$jsonSchema": {"properties": {"x": {"bsonType": "int"}}}}, + doc=[{"_id": 1, "x": 1}, {"_id": 2, "x": "a"}], + expected=[{"_id": 1, "x": 1}], + msg="Valid object argument should succeed", + ), + QueryTestCase( + id="trivial_empty_schema_matches_all", + filter={"$jsonSchema": {}}, + doc=[{"_id": 1, "x": 1}, {"_id": 2, "x": "a"}], + expected=[{"_id": 1, "x": 1}, {"_id": 2, "x": "a"}], + msg="Empty object should match all documents", + ), + QueryTestCase( + id="trivial_bsontype_object_matches_all", + filter={"$jsonSchema": {"bsonType": "object"}}, + doc=[{"_id": 1, "x": 1}, {"_id": 2, "x": "a"}], + expected=[{"_id": 1, "x": 1}, {"_id": 2, "x": "a"}], + msg="bsonType object should match all documents", + ), + QueryTestCase( + id="trivial_not_empty_matches_none", + filter={"$jsonSchema": {"not": {}}}, + doc=[{"_id": 1, "x": 1}, {"_id": 2, "x": "a"}], + expected=[], + msg="not empty schema should match no documents", + ), + QueryTestCase( + id="trivial_empty_properties_matches_all", + filter={"$jsonSchema": {"properties": {}}}, + doc=[{"_id": 1, "x": 1}, {"_id": 2, "x": "a"}], + expected=[{"_id": 1, "x": 1}, {"_id": 2, "x": "a"}], + msg="Empty properties should match all documents", + ), + QueryTestCase( + id="trivial_description_no_effect", + filter={ + "$jsonSchema": {"description": "test schema", "properties": {"x": {"bsonType": "int"}}} + }, + doc=[{"_id": 1, "x": 1}, {"_id": 2, "x": "a"}], + expected=[{"_id": 1, "x": 1}], + msg="Description keyword should have no effect on validation", + ), + QueryTestCase( + id="trivial_title_no_effect", + filter={"$jsonSchema": {"title": "test", "properties": {"x": {"bsonType": "int"}}}}, + doc=[{"_id": 1, "x": 1}, {"_id": 2, "x": "a"}], + expected=[{"_id": 1, "x": 1}], + msg="Title keyword should have no effect on validation", + ), +] + + +@pytest.mark.parametrize("test", pytest_params(TRIVIAL_SCHEMA_TESTS)) +def test_jsonSchema_trivial_schemas(collection, test): + """Test $jsonSchema with trivial/valid schemas.""" + collection.insert_many(test.doc) + result = execute_command(collection, {"find": collection.name, "filter": test.filter}) + assertSuccess(result, test.expected) + + +BSONTYPE_MATCH_TESTS: list[QueryTestCase] = [ + QueryTestCase( + id="match_double", + filter={"$jsonSchema": {"properties": {"x": {"bsonType": "double"}}}}, + doc=[{"_id": 1, "x": 3.14}, {"_id": 2, "x": "str"}], + expected=[{"_id": 1, "x": 3.14}], + msg="bsonType double should match double values", + ), + QueryTestCase( + id="match_string", + filter={"$jsonSchema": {"properties": {"x": {"bsonType": "string"}}}}, + doc=[{"_id": 1, "x": "hello"}, {"_id": 2, "x": 123}], + expected=[{"_id": 1, "x": "hello"}], + msg="bsonType string should match string values", + ), + QueryTestCase( + id="match_object", + filter={"$jsonSchema": {"properties": {"x": {"bsonType": "object"}}}}, + doc=[{"_id": 1, "x": {"nested": 1}}, {"_id": 2, "x": [1, 2]}], + expected=[{"_id": 1, "x": {"nested": 1}}], + msg="bsonType object should match object values", + ), + QueryTestCase( + id="match_array", + filter={"$jsonSchema": {"properties": {"x": {"bsonType": "array"}}}}, + doc=[{"_id": 1, "x": [1, 2, 3]}, {"_id": 2, "x": {"a": 1}}], + expected=[{"_id": 1, "x": [1, 2, 3]}], + msg="bsonType array should match array values", + ), + QueryTestCase( + id="match_binData", + filter={"$jsonSchema": {"properties": {"x": {"bsonType": "binData"}}}}, + doc=[{"_id": 1, "x": b"\x01\x02"}, {"_id": 2, "x": "str"}], + expected=[{"_id": 1, "x": b"\x01\x02"}], + msg="bsonType binData should match Binary values", + ), + QueryTestCase( + id="match_objectId", + filter={"$jsonSchema": {"properties": {"x": {"bsonType": "objectId"}}}}, + doc=[{"_id": 1, "x": ObjectId("000000000000000000000001")}, {"_id": 2, "x": 123}], + expected=[{"_id": 1, "x": ObjectId("000000000000000000000001")}], + msg="bsonType objectId should match ObjectId values", + ), + QueryTestCase( + id="match_bool", + filter={"$jsonSchema": {"properties": {"x": {"bsonType": "bool"}}}}, + doc=[{"_id": 1, "x": True}, {"_id": 2, "x": 0}], + expected=[{"_id": 1, "x": True}], + msg="bsonType bool should match boolean values", + ), + QueryTestCase( + id="match_date", + filter={"$jsonSchema": {"properties": {"x": {"bsonType": "date"}}}}, + doc=[ + {"_id": 1, "x": datetime(2024, 1, 1, tzinfo=timezone.utc)}, + {"_id": 2, "x": Timestamp(1, 1)}, + ], + expected=[{"_id": 1, "x": datetime(2024, 1, 1, tzinfo=timezone.utc)}], + msg="bsonType date should match datetime values", + ), + QueryTestCase( + id="match_null", + filter={"$jsonSchema": {"properties": {"x": {"bsonType": "null"}}}}, + doc=[{"_id": 1, "x": None}, {"_id": 2, "x": 0}], + expected=[{"_id": 1, "x": None}], + msg="bsonType null should match null values", + ), + QueryTestCase( + id="match_regex", + filter={"$jsonSchema": {"properties": {"x": {"bsonType": "regex"}}}}, + doc=[{"_id": 1, "x": Regex("^abc", "i")}, {"_id": 2, "x": "abc"}], + expected=[{"_id": 1, "x": Regex("^abc", "i")}], + msg="bsonType regex should match Regex values", + ), + QueryTestCase( + id="match_javascript", + filter={"$jsonSchema": {"properties": {"x": {"bsonType": "javascript"}}}}, + doc=[{"_id": 1, "x": Code("function(){}")}, {"_id": 2, "x": "str"}], + expected=[{"_id": 1, "x": Code("function(){}")}], + msg="bsonType javascript should match Code values", + ), + QueryTestCase( + id="match_int", + filter={"$jsonSchema": {"properties": {"x": {"bsonType": "int"}}}}, + doc=[{"_id": 1, "x": 42}, {"_id": 2, "x": 3.14}], + expected=[{"_id": 1, "x": 42}], + msg="bsonType int should match int values", + ), + QueryTestCase( + id="match_timestamp", + filter={"$jsonSchema": {"properties": {"x": {"bsonType": "timestamp"}}}}, + doc=[ + {"_id": 1, "x": Timestamp(1000, 1)}, + {"_id": 2, "x": datetime(2024, 1, 1, tzinfo=timezone.utc)}, + ], + expected=[{"_id": 1, "x": Timestamp(1000, 1)}], + msg="bsonType timestamp should match Timestamp values", + ), + QueryTestCase( + id="match_long", + filter={"$jsonSchema": {"properties": {"x": {"bsonType": "long"}}}}, + doc=[{"_id": 1, "x": Int64(123456789)}, {"_id": 2, "x": 3.14}], + expected=[{"_id": 1, "x": Int64(123456789)}], + msg="bsonType long should match Int64 values", + ), + QueryTestCase( + id="match_decimal", + filter={"$jsonSchema": {"properties": {"x": {"bsonType": "decimal"}}}}, + doc=[{"_id": 1, "x": Decimal128("1.5")}, {"_id": 2, "x": 3.14}], + expected=[{"_id": 1, "x": Decimal128("1.5")}], + msg="bsonType decimal should match Decimal128 values", + ), + QueryTestCase( + id="match_minKey", + filter={"$jsonSchema": {"properties": {"x": {"bsonType": "minKey"}}}}, + doc=[{"_id": 1, "x": MinKey()}, {"_id": 2, "x": 0}], + expected=[{"_id": 1, "x": MinKey()}], + msg="bsonType minKey should match MinKey values", + ), + QueryTestCase( + id="match_maxKey", + filter={"$jsonSchema": {"properties": {"x": {"bsonType": "maxKey"}}}}, + doc=[{"_id": 1, "x": MaxKey()}, {"_id": 2, "x": 0}], + expected=[{"_id": 1, "x": MaxKey()}], + msg="bsonType maxKey should match MaxKey values", + ), +] + + +@pytest.mark.parametrize("test", pytest_params(BSONTYPE_MATCH_TESTS)) +def test_jsonSchema_bsontype_matches(collection, test): + """Test bsonType matches the correct BSON type and rejects others.""" + collection.insert_many(test.doc) + result = execute_command(collection, {"find": collection.name, "filter": test.filter}) + assertSuccess(result, test.expected) + + +BSONTYPE_SPECIAL_TESTS: list[QueryTestCase] = [ + QueryTestCase( + id="special_number_matches_all_numeric", + filter={"$jsonSchema": {"properties": {"x": {"bsonType": "number"}}}}, + doc=[ + {"_id": 1, "x": 42}, + {"_id": 2, "x": Int64(100)}, + {"_id": 3, "x": 3.14}, + {"_id": 4, "x": Decimal128("1.5")}, + {"_id": 5, "x": "not_number"}, + ], + expected=[ + {"_id": 1, "x": 42}, + {"_id": 2, "x": Int64(100)}, + {"_id": 3, "x": 3.14}, + {"_id": 4, "x": Decimal128("1.5")}, + ], + msg="bsonType number should match int, long, double, and decimal128", + ), + QueryTestCase( + id="special_missing_field_passes", + filter={"$jsonSchema": {"properties": {"x": {"bsonType": "double"}}}}, + doc=[{"_id": 1, "x": 3.14}, {"_id": 2}], + expected=[{"_id": 1, "x": 3.14}, {"_id": 2}], + msg="bsonType should pass when field is missing", + ), + QueryTestCase( + id="special_array_top_level_no_match", + filter={"$jsonSchema": {"bsonType": "array"}}, + doc=[{"_id": 1, "x": 1}], + expected=[], + msg="bsonType array at top level should not match (document is object)", + ), +] + + +@pytest.mark.parametrize("test", pytest_params(BSONTYPE_SPECIAL_TESTS)) +def test_jsonSchema_bsontype_special(collection, test): + """Test bsonType special cases.""" + collection.insert_many(test.doc) + result = execute_command(collection, {"find": collection.name, "filter": test.filter}) + assertSuccess(result, test.expected) + + +BSONTYPE_ARRAY_SYNTAX_TESTS: list[QueryTestCase] = [ + QueryTestCase( + id="array_syntax_string_or_int", + filter={"$jsonSchema": {"properties": {"x": {"bsonType": ["string", "int"]}}}}, + doc=[{"_id": 1, "x": "hello"}, {"_id": 2, "x": 42}, {"_id": 3, "x": 3.14}], + expected=[{"_id": 1, "x": "hello"}, {"_id": 2, "x": 42}], + msg="bsonType array should match either type", + ), + QueryTestCase( + id="array_syntax_null_or_string", + filter={"$jsonSchema": {"properties": {"x": {"bsonType": ["null", "string"]}}}}, + doc=[{"_id": 1, "x": None}, {"_id": 2, "x": "hello"}, {"_id": 3, "x": 42}], + expected=[{"_id": 1, "x": None}, {"_id": 2, "x": "hello"}], + msg="bsonType array with null or string should match both", + ), + QueryTestCase( + id="array_syntax_double_or_decimal", + filter={"$jsonSchema": {"properties": {"x": {"bsonType": ["double", "decimal"]}}}}, + doc=[{"_id": 1, "x": 3.14}, {"_id": 2, "x": Decimal128("1.5")}, {"_id": 3, "x": 42}], + expected=[{"_id": 1, "x": 3.14}, {"_id": 2, "x": Decimal128("1.5")}], + msg="bsonType array with double or decimal should match both", + ), +] + + +@pytest.mark.parametrize("test", pytest_params(BSONTYPE_ARRAY_SYNTAX_TESTS)) +def test_jsonSchema_bsontype_array_syntax(collection, test): + """Test bsonType array syntax matches either type.""" + collection.insert_many(test.doc) + result = execute_command(collection, {"find": collection.name, "filter": test.filter}) + assertSuccess(result, test.expected) + + +BSONTYPE_DISTINCTION_TESTS: list[QueryTestCase] = [ + QueryTestCase( + id="distinction_bool_not_int_zero", + filter={"$jsonSchema": {"properties": {"x": {"bsonType": "bool"}}}}, + doc=[{"_id": 1, "x": False}, {"_id": 2, "x": 0}], + expected=[{"_id": 1, "x": False}], + msg="bsonType bool should NOT match int 0", + ), + QueryTestCase( + id="distinction_int_not_bool_false", + filter={"$jsonSchema": {"properties": {"x": {"bsonType": "int"}}}}, + doc=[{"_id": 1, "x": 0}, {"_id": 2, "x": False}], + expected=[{"_id": 1, "x": 0}], + msg="bsonType int should NOT match bool false", + ), + QueryTestCase( + id="distinction_string_not_null", + filter={"$jsonSchema": {"properties": {"x": {"bsonType": "string"}}}}, + doc=[{"_id": 1, "x": "hello"}, {"_id": 2, "x": None}], + expected=[{"_id": 1, "x": "hello"}], + msg="bsonType string should NOT match null", + ), + QueryTestCase( + id="distinction_null_not_empty_string", + filter={"$jsonSchema": {"properties": {"x": {"bsonType": "null"}}}}, + doc=[{"_id": 1, "x": None}, {"_id": 2, "x": ""}], + expected=[{"_id": 1, "x": None}], + msg="bsonType null should NOT match empty string", + ), + QueryTestCase( + id="distinction_int_not_long", + filter={"$jsonSchema": {"properties": {"x": {"bsonType": "int"}}}}, + doc=[{"_id": 1, "x": 42}, {"_id": 2, "x": Int64(42)}], + expected=[{"_id": 1, "x": 42}], + msg="bsonType int should NOT match NumberLong", + ), + QueryTestCase( + id="distinction_long_not_int", + filter={"$jsonSchema": {"properties": {"x": {"bsonType": "long"}}}}, + doc=[{"_id": 1, "x": Int64(42)}, {"_id": 2, "x": 42}], + expected=[{"_id": 1, "x": Int64(42)}], + msg="bsonType long should NOT match NumberInt", + ), + QueryTestCase( + id="distinction_double_not_decimal", + filter={"$jsonSchema": {"properties": {"x": {"bsonType": "double"}}}}, + doc=[{"_id": 1, "x": 1.0}, {"_id": 2, "x": Decimal128("1.0")}], + expected=[{"_id": 1, "x": 1.0}], + msg="bsonType double should NOT match Decimal128", + ), +] + + +@pytest.mark.parametrize("test", pytest_params(BSONTYPE_DISTINCTION_TESTS)) +def test_jsonSchema_bsontype_distinction(collection, test): + """Test BSON type distinction — types are not interchangeable.""" + collection.insert_many(test.doc) + result = execute_command(collection, {"find": collection.name, "filter": test.filter}) + assertSuccess(result, test.expected) + + +TYPE_KEYWORD_TESTS: list[QueryTestCase] = [ + QueryTestCase( + id="type_object", + filter={"$jsonSchema": {"properties": {"x": {"type": "object"}}}}, + doc=[{"_id": 1, "x": {"nested": 1}}, {"_id": 2, "x": [1]}], + expected=[{"_id": 1, "x": {"nested": 1}}], + msg="type object should match object values", + ), + QueryTestCase( + id="type_array", + filter={"$jsonSchema": {"properties": {"x": {"type": "array"}}}}, + doc=[{"_id": 1, "x": [1, 2]}, {"_id": 2, "x": {"a": 1}}], + expected=[{"_id": 1, "x": [1, 2]}], + msg="type array should match array values", + ), + QueryTestCase( + id="type_number", + filter={"$jsonSchema": {"properties": {"x": {"type": "number"}}}}, + doc=[{"_id": 1, "x": 42}, {"_id": 2, "x": "str"}], + expected=[{"_id": 1, "x": 42}], + msg="type number should match numeric values", + ), + QueryTestCase( + id="type_boolean", + filter={"$jsonSchema": {"properties": {"x": {"type": "boolean"}}}}, + doc=[{"_id": 1, "x": True}, {"_id": 2, "x": 0}], + expected=[{"_id": 1, "x": True}], + msg="type boolean should match boolean values", + ), + QueryTestCase( + id="type_string", + filter={"$jsonSchema": {"properties": {"x": {"type": "string"}}}}, + doc=[{"_id": 1, "x": "hello"}, {"_id": 2, "x": 42}], + expected=[{"_id": 1, "x": "hello"}], + msg="type string should match string values", + ), + QueryTestCase( + id="type_null", + filter={"$jsonSchema": {"properties": {"x": {"type": "null"}}}}, + doc=[{"_id": 1, "x": None}, {"_id": 2, "x": 0}], + expected=[{"_id": 1, "x": None}], + msg="type null should match null values", + ), + QueryTestCase( + id="type_number_all_numeric", + filter={"$jsonSchema": {"properties": {"x": {"type": "number"}}}}, + doc=[ + {"_id": 1, "x": 42}, + {"_id": 2, "x": Int64(100)}, + {"_id": 3, "x": 3.14}, + {"_id": 4, "x": Decimal128("1.5")}, + {"_id": 5, "x": "str"}, + ], + expected=[ + {"_id": 1, "x": 42}, + {"_id": 2, "x": Int64(100)}, + {"_id": 3, "x": 3.14}, + {"_id": 4, "x": Decimal128("1.5")}, + ], + msg="type number should match int, long, double, decimal128", + ), + QueryTestCase( + id="type_array_string_or_null", + filter={"$jsonSchema": {"properties": {"x": {"type": ["string", "null"]}}}}, + doc=[{"_id": 1, "x": "hello"}, {"_id": 2, "x": None}, {"_id": 3, "x": 42}], + expected=[{"_id": 1, "x": "hello"}, {"_id": 2, "x": None}], + msg="type array should match either type", + ), +] + + +@pytest.mark.parametrize("test", pytest_params(TYPE_KEYWORD_TESTS)) +def test_jsonSchema_type_keyword(collection, test): + """Test JSON Schema type keyword matches correct types.""" + collection.insert_many(test.doc) + result = execute_command(collection, {"find": collection.name, "filter": test.filter}) + assertSuccess(result, test.expected) diff --git a/documentdb_tests/framework/assertions.py b/documentdb_tests/framework/assertions.py index 1acb64bb..66fbf083 100644 --- a/documentdb_tests/framework/assertions.py +++ b/documentdb_tests/framework/assertions.py @@ -99,6 +99,19 @@ def _format_exception_error(result: Exception) -> str: ) +def assertNotError(result: Union[Any, Exception], msg: Optional[str] = None): + """Assert that the command did not return an error. + + Only checks that the result is not an Exception. Does not validate + the actual result value. + """ + if isinstance(result, Exception): + fail_msg = f"Expected success but got error: {result}" + if msg: + fail_msg = f"[{msg}] {fail_msg}" + raise AssertionError(fail_msg) + + def assertSuccess( result: Union[Any, Exception], expected: Any, diff --git a/documentdb_tests/framework/bson_type_validator.py b/documentdb_tests/framework/bson_type_validator.py new file mode 100644 index 00000000..f6f2c362 --- /dev/null +++ b/documentdb_tests/framework/bson_type_validator.py @@ -0,0 +1,80 @@ +"""BSON type test harness. + +Generates parametrized test cases that verify operators correctly reject +invalid BSON types and accept valid BSON types. +""" + +from dataclasses import dataclass +from typing import Optional + +from documentdb_tests.framework.error_codes import TYPE_MISMATCH_ERROR +from documentdb_tests.framework.test_case import BaseTestCase +from documentdb_tests.framework.test_constants import BSON_TYPE_SAMPLES, BsonType + + +@dataclass(frozen=True) +class BsonTypeTestCase(BaseTestCase): + """Test case for verifying an operator keyword's BSON type handling. + + Each case defines a keyword and its accepted types. The test framework + generates rejection tests for every BSON type not in valid_types, and + acceptance tests for every type in valid_types. + + Attributes: + keyword: The operator keyword being tested (e.g. "minimum", "required"). + valid_types: List of BsonType values the keyword accepts. + All other BSON types will be tested as rejections. + requires: Optional sibling fields needed alongside the keyword + (e.g. {"minimum": 0} for exclusiveMinimum). + default_error_code: Expected error code for rejected types. + error_code_overrides: Custom error code for a specific BSON type that + differs from default_error_code, as {BsonType: code}. + valid_inputs: Optional per-type sample overrides for acceptance tests, + used when the generic BSON_TYPE_SAMPLES value is not semantically + valid for the keyword (e.g. {"bsonType": "string"} for properties). + """ + + keyword: Optional[str] = None + valid_types: Optional[list] = None + requires: Optional[dict] = None + default_error_code: int = TYPE_MISMATCH_ERROR + error_code_overrides: Optional[dict] = None + valid_inputs: Optional[dict] = None + + def expected_code(self, bson_type): + """Return the expected error code for a rejected BsonType.""" + if self.error_code_overrides: + return self.error_code_overrides.get(bson_type, self.default_error_code) + return self.default_error_code + + +def generate_bson_rejection_test_cases(params): + """Generate pytest.param tuples for rejected BSON types.""" + import pytest + + cases = [] + for spec in params: + accepted = set(spec.valid_types) + for bson_type in BsonType: + if bson_type in accepted: + continue + sample_value = BSON_TYPE_SAMPLES[bson_type] + test_id = f"reject_{bson_type.value}_for_{spec.id}" + cases.append(pytest.param(bson_type, sample_value, spec, id=test_id)) + return cases + + +def generate_bson_acceptance_test_cases(params): + """Generate pytest.param tuples for valid types that should be accepted.""" + import pytest + + cases = [] + for spec in params: + for bson_type in spec.valid_types: + if spec.valid_inputs and bson_type in spec.valid_inputs: + sample_value = spec.valid_inputs[bson_type] + else: + sample_value = BSON_TYPE_SAMPLES[bson_type] + test_id = f"accept_{bson_type.value}_for_{spec.id}" + cases.append(pytest.param(bson_type, sample_value, spec, id=test_id)) + return cases diff --git a/documentdb_tests/framework/error_codes.py b/documentdb_tests/framework/error_codes.py index 0fe94584..89e259c3 100644 --- a/documentdb_tests/framework/error_codes.py +++ b/documentdb_tests/framework/error_codes.py @@ -265,6 +265,7 @@ NON_NUMERIC_TYPE_ERROR = 51081 NON_INTEGRAL_POSITION_ERROR = 51082 INVALID_POSITION_ERROR = 51083 +INVALID_REGEX_PATTERN_ERROR = 51091 REGEX_NON_OBJECT_ERROR = 51103 REGEX_INPUT_TYPE_ERROR = 51104 REGEX_REGEX_TYPE_ERROR = 51105 diff --git a/documentdb_tests/framework/test_constants.py b/documentdb_tests/framework/test_constants.py index 9c9bfeca..5ee224f9 100644 --- a/documentdb_tests/framework/test_constants.py +++ b/documentdb_tests/framework/test_constants.py @@ -1,6 +1,7 @@ from datetime import datetime, timezone +from enum import Enum -from bson import Decimal128, Int64, ObjectId, Timestamp +from bson import Binary, Code, Decimal128, Int64, MaxKey, MinKey, ObjectId, Regex, Timestamp from bson.datetime_ms import DatetimeMS # Int32 boundary values @@ -188,3 +189,52 @@ OID_MAX_SIGNED32 = ObjectId("7fffffff0000000000000000") OID_MIN_SIGNED32 = ObjectId("800000000000000000000000") OID_MAX_UNSIGNED32 = ObjectId("ffffffff0000000000000000") + + +# BSON type identifiers +class BsonType(Enum): + """BSON type identifiers.""" + + DOUBLE = "double" + STRING = "string" + OBJECT = "object" + ARRAY = "array" + EMPTY_OBJECT = "empty_object" + EMPTY_ARRAY = "empty_array" + BIN_DATA = "bin_data" + OBJECT_ID = "object_id" + BOOL = "bool" + DATE = "date" + NULL = "null" + REGEX = "regex" + JAVASCRIPT = "javascript" + INT = "int" + TIMESTAMP = "timestamp" + LONG = "long" + DECIMAL = "decimal" + MIN_KEY = "min_key" + MAX_KEY = "max_key" + + +# Representative sample value for each BSON type +BSON_TYPE_SAMPLES = { + BsonType.DOUBLE: 3.14, + BsonType.STRING: "hello", + BsonType.OBJECT: {"key": "value"}, + BsonType.ARRAY: ["a", "b", "c"], + BsonType.EMPTY_OBJECT: {}, + BsonType.EMPTY_ARRAY: [], + BsonType.BIN_DATA: Binary(b"\x00\x01\x02"), + BsonType.OBJECT_ID: OID_EPOCH, + BsonType.BOOL: True, + BsonType.DATE: DATE_EPOCH, + BsonType.NULL: None, + BsonType.REGEX: Regex("^abc", "i"), + BsonType.JAVASCRIPT: Code("function(){}"), + BsonType.INT: INT32_MAX, + BsonType.TIMESTAMP: TS_EPOCH, + BsonType.LONG: INT64_MAX, + BsonType.DECIMAL: DECIMAL128_HALF, + BsonType.MIN_KEY: MinKey(), + BsonType.MAX_KEY: MaxKey(), +} From 9b6fb6dd32d3ab51d84aedcc4340c029898771b2 Mon Sep 17 00:00:00 2001 From: "Victor [C] Tsang" Date: Sat, 9 May 2026 00:00:59 +0000 Subject: [PATCH 2/3] use OBJECT as empty object instead of a seperate type Signed-off-by: Victor [C] Tsang --- .../test_jsonSchema_keyword_validation.py | 16 ++++++++-------- documentdb_tests/framework/test_constants.py | 4 +--- 2 files changed, 9 insertions(+), 11 deletions(-) diff --git a/documentdb_tests/compatibility/tests/core/operator/query/misc/jsonSchema/test_jsonSchema_keyword_validation.py b/documentdb_tests/compatibility/tests/core/operator/query/misc/jsonSchema/test_jsonSchema_keyword_validation.py index f287cc8b..d95da466 100644 --- a/documentdb_tests/compatibility/tests/core/operator/query/misc/jsonSchema/test_jsonSchema_keyword_validation.py +++ b/documentdb_tests/compatibility/tests/core/operator/query/misc/jsonSchema/test_jsonSchema_keyword_validation.py @@ -31,14 +31,14 @@ id="properties", msg="properties should reject non-object types", keyword="properties", - valid_types=[BsonType.OBJECT, BsonType.EMPTY_OBJECT], - valid_inputs={BsonType.OBJECT: {"x": {}}, BsonType.EMPTY_OBJECT: {}}, + valid_types=[BsonType.OBJECT], + valid_inputs={BsonType.OBJECT: {"x": {}}}, ), BsonTypeTestCase( id="additionalProperties", msg="additionalProperties should reject non-bool/object types", keyword="additionalProperties", - valid_types=[BsonType.BOOL, BsonType.OBJECT, BsonType.EMPTY_OBJECT], + valid_types=[BsonType.BOOL, BsonType.OBJECT], valid_inputs={BsonType.OBJECT: {"bsonType": "string"}}, ), BsonTypeTestCase( @@ -147,7 +147,7 @@ id="items", msg="items should reject non-object/array types", keyword="items", - valid_types=[BsonType.OBJECT, BsonType.ARRAY, BsonType.EMPTY_ARRAY, BsonType.EMPTY_OBJECT], + valid_types=[BsonType.OBJECT, BsonType.ARRAY, BsonType.EMPTY_ARRAY], valid_inputs={BsonType.OBJECT: {"bsonType": "string"}, BsonType.ARRAY: [{}]}, ), BsonTypeTestCase( @@ -192,21 +192,21 @@ id="not", msg="not should reject non-object types", keyword="not", - valid_types=[BsonType.OBJECT, BsonType.EMPTY_OBJECT], + valid_types=[BsonType.OBJECT], valid_inputs={BsonType.OBJECT: {"bsonType": "string"}}, ), BsonTypeTestCase( id="dependencies", msg="dependencies should reject non-object types", keyword="dependencies", - valid_types=[BsonType.OBJECT, BsonType.EMPTY_OBJECT], + valid_types=[BsonType.OBJECT], valid_inputs={BsonType.OBJECT: {"x": ["y"]}}, ), BsonTypeTestCase( id="patternProperties", msg="patternProperties should reject non-object types", keyword="patternProperties", - valid_types=[BsonType.OBJECT, BsonType.EMPTY_OBJECT], + valid_types=[BsonType.OBJECT], valid_inputs={BsonType.OBJECT: {"^x": {}}}, ), BsonTypeTestCase( @@ -219,7 +219,7 @@ id="additionalItems", msg="additionalItems should reject non-bool/object types", keyword="additionalItems", - valid_types=[BsonType.BOOL, BsonType.OBJECT, BsonType.EMPTY_OBJECT], + valid_types=[BsonType.BOOL, BsonType.OBJECT], valid_inputs={BsonType.OBJECT: {"bsonType": "string"}}, ), ] diff --git a/documentdb_tests/framework/test_constants.py b/documentdb_tests/framework/test_constants.py index 5ee224f9..abdf72e7 100644 --- a/documentdb_tests/framework/test_constants.py +++ b/documentdb_tests/framework/test_constants.py @@ -199,7 +199,6 @@ class BsonType(Enum): STRING = "string" OBJECT = "object" ARRAY = "array" - EMPTY_OBJECT = "empty_object" EMPTY_ARRAY = "empty_array" BIN_DATA = "bin_data" OBJECT_ID = "object_id" @@ -220,9 +219,8 @@ class BsonType(Enum): BSON_TYPE_SAMPLES = { BsonType.DOUBLE: 3.14, BsonType.STRING: "hello", - BsonType.OBJECT: {"key": "value"}, + BsonType.OBJECT: {}, BsonType.ARRAY: ["a", "b", "c"], - BsonType.EMPTY_OBJECT: {}, BsonType.EMPTY_ARRAY: [], BsonType.BIN_DATA: Binary(b"\x00\x01\x02"), BsonType.OBJECT_ID: OID_EPOCH, From 1cd7e967d46260ca86f320bbc2768116470b0417 Mon Sep 17 00:00:00 2001 From: "Victor [C] Tsang" Date: Sat, 9 May 2026 00:20:49 +0000 Subject: [PATCH 3/3] Added geospatial query tests for $geoWithin Signed-off-by: Victor [C] Tsang --- .../test_geoWithin_argument_handling.py | 482 ++++++++++++++++++ .../test_geoWithin_bson_type_validation.py | 93 ++++ ...test_geoWithin_centersphere_containment.py | 195 +++++++ .../geoWithin/test_geoWithin_errors.py | 366 +++++++++++++ .../geoWithin/test_geoWithin_field_lookup.py | 171 +++++++ .../test_geoWithin_geojson_polygon.py | 245 +++++++++ .../geoWithin/test_geoWithin_legacy_shapes.py | 98 ++++ .../test_geoWithin_logical_operators.py | 159 ++++++ 8 files changed, 1809 insertions(+) create mode 100644 documentdb_tests/compatibility/tests/core/operator/query/geospatial/geoWithin/test_geoWithin_argument_handling.py create mode 100644 documentdb_tests/compatibility/tests/core/operator/query/geospatial/geoWithin/test_geoWithin_bson_type_validation.py create mode 100644 documentdb_tests/compatibility/tests/core/operator/query/geospatial/geoWithin/test_geoWithin_centersphere_containment.py create mode 100644 documentdb_tests/compatibility/tests/core/operator/query/geospatial/geoWithin/test_geoWithin_errors.py create mode 100644 documentdb_tests/compatibility/tests/core/operator/query/geospatial/geoWithin/test_geoWithin_field_lookup.py create mode 100644 documentdb_tests/compatibility/tests/core/operator/query/geospatial/geoWithin/test_geoWithin_geojson_polygon.py create mode 100644 documentdb_tests/compatibility/tests/core/operator/query/geospatial/geoWithin/test_geoWithin_legacy_shapes.py create mode 100644 documentdb_tests/compatibility/tests/core/operator/query/geospatial/geoWithin/test_geoWithin_logical_operators.py diff --git a/documentdb_tests/compatibility/tests/core/operator/query/geospatial/geoWithin/test_geoWithin_argument_handling.py b/documentdb_tests/compatibility/tests/core/operator/query/geospatial/geoWithin/test_geoWithin_argument_handling.py new file mode 100644 index 00000000..40f2de50 --- /dev/null +++ b/documentdb_tests/compatibility/tests/core/operator/query/geospatial/geoWithin/test_geoWithin_argument_handling.py @@ -0,0 +1,482 @@ +""" +Tests for $geoWithin argument handling, geometry formats, null/missing fields, and document types. + +Field lookup / dotted-path tests live in test_geoWithin_field_lookup.py. +""" + +import pytest + +from documentdb_tests.compatibility.tests.core.operator.query.utils.query_test_case import ( + QueryTestCase, +) +from documentdb_tests.framework.assertions import assertSuccess +from documentdb_tests.framework.executor import execute_command +from documentdb_tests.framework.parametrize import pytest_params + +# Standard polygon for reuse in tests +POLYGON = { + "$geometry": { + "type": "Polygon", + "coordinates": [[[-10, -10], [10, -10], [10, 10], [-10, 10], [-10, -10]]], + } +} + + +ARGUMENT_HANDLING_TESTS: list[QueryTestCase] = [ + QueryTestCase( + id="geometry_polygon_single_ring", + filter={"loc": {"$geoWithin": POLYGON}}, + doc=[ + {"_id": 1, "loc": {"type": "Point", "coordinates": [0, 0]}}, + {"_id": 2, "loc": {"type": "Point", "coordinates": [50, 50]}}, + ], + expected=[{"_id": 1, "loc": {"type": "Point", "coordinates": [0, 0]}}], + msg="$geometry GeoJSON Polygon single ring should return matching docs", + ), + QueryTestCase( + id="geometry_polygon_with_hole", + filter={ + "loc": { + "$geoWithin": { + "$geometry": { + "type": "Polygon", + "coordinates": [ + [[-10, -10], [10, -10], [10, 10], [-10, 10], [-10, -10]], + [[-1, -1], [1, -1], [1, 1], [-1, 1], [-1, -1]], + ], + } + } + } + }, + doc=[ + {"_id": 1, "loc": {"type": "Point", "coordinates": [5, 5]}}, + {"_id": 2, "loc": {"type": "Point", "coordinates": [0, 0]}}, + {"_id": 3, "loc": {"type": "Point", "coordinates": [50, 50]}}, + ], + expected=[{"_id": 1, "loc": {"type": "Point", "coordinates": [5, 5]}}], + msg="Polygon with hole should exclude points in hole", + ), + QueryTestCase( + id="geometry_multipolygon", + filter={ + "loc": { + "$geoWithin": { + "$geometry": { + "type": "MultiPolygon", + "coordinates": [ + [[[-5, -5], [5, -5], [5, 5], [-5, 5], [-5, -5]]], + [[[15, 15], [25, 15], [25, 25], [15, 25], [15, 15]]], + ], + } + } + } + }, + doc=[ + {"_id": 1, "loc": {"type": "Point", "coordinates": [0, 0]}}, + {"_id": 2, "loc": {"type": "Point", "coordinates": [20, 20]}}, + {"_id": 3, "loc": {"type": "Point", "coordinates": [50, 50]}}, + ], + expected=[ + {"_id": 1, "loc": {"type": "Point", "coordinates": [0, 0]}}, + {"_id": 2, "loc": {"type": "Point", "coordinates": [20, 20]}}, + ], + msg="MultiPolygon should match points in either polygon", + ), + QueryTestCase( + id="geometry_polygon_with_strictwinding_crs", + filter={ + "loc": { + "$geoWithin": { + "$geometry": { + "type": "Polygon", + "coordinates": [[[-10, -10], [10, -10], [10, 10], [-10, 10], [-10, -10]]], + "crs": { + "type": "name", + "properties": {"name": "urn:x-mongodb:crs:strictwinding:EPSG:4326"}, + }, + } + } + } + }, + doc=[ + {"_id": 1, "loc": {"type": "Point", "coordinates": [0, 0]}}, + {"_id": 2, "loc": {"type": "Point", "coordinates": [50, 50]}}, + ], + expected=[{"_id": 1, "loc": {"type": "Point", "coordinates": [0, 0]}}], + msg="Polygon with strictwinding CRS should behave like default for small polygon", + ), + QueryTestCase( + id="geometry_polygon_with_epsg4326_crs", + filter={ + "loc": { + "$geoWithin": { + "$geometry": { + "type": "Polygon", + "coordinates": [[[-10, -10], [10, -10], [10, 10], [-10, 10], [-10, -10]]], + "crs": { + "type": "name", + "properties": {"name": "urn:ogc:def:crs:OGC:1.3:CRS84"}, + }, + } + } + } + }, + doc=[ + {"_id": 1, "loc": {"type": "Point", "coordinates": [0, 0]}}, + {"_id": 2, "loc": {"type": "Point", "coordinates": [50, 50]}}, + ], + expected=[{"_id": 1, "loc": {"type": "Point", "coordinates": [0, 0]}}], + msg="Polygon with standard CRS84 (EPSG:4326) CRS should match points inside", + ), +] + + +NULL_MISSING_TESTS: list[QueryTestCase] = [ + QueryTestCase( + id="null_field_no_match", + filter={"loc": {"$geoWithin": POLYGON}}, + doc=[{"_id": 1, "loc": None}, {"_id": 2, "loc": {"type": "Point", "coordinates": [0, 0]}}], + expected=[{"_id": 2, "loc": {"type": "Point", "coordinates": [0, 0]}}], + msg="Null location field should not match", + ), + QueryTestCase( + id="missing_field_no_match", + filter={"loc": {"$geoWithin": POLYGON}}, + doc=[ + {"_id": 1, "other": "value"}, + {"_id": 2, "loc": {"type": "Point", "coordinates": [0, 0]}}, + ], + expected=[{"_id": 2, "loc": {"type": "Point", "coordinates": [0, 0]}}], + msg="Missing location field should not match", + ), +] + + +DOCUMENT_TYPE_TESTS: list[QueryTestCase] = [ + QueryTestCase( + id="unsorted_results", + filter={"loc": {"$geoWithin": POLYGON}}, + doc=[ + {"_id": 1, "loc": {"type": "Point", "coordinates": [9, 9]}}, + {"_id": 2, "loc": {"type": "Point", "coordinates": [1, 1]}}, + {"_id": 3, "loc": {"type": "Point", "coordinates": [5, 5]}}, + ], + expected=[ + {"_id": 1, "loc": {"type": "Point", "coordinates": [9, 9]}}, + {"_id": 2, "loc": {"type": "Point", "coordinates": [1, 1]}}, + {"_id": 3, "loc": {"type": "Point", "coordinates": [5, 5]}}, + ], + msg="Results are not sorted by distance — all within should be returned", + ), + QueryTestCase( + id="array_of_geojson_points", + filter={"locs": {"$geoWithin": POLYGON}}, + doc=[ + { + "_id": 1, + "locs": [ + {"type": "Point", "coordinates": [0, 0]}, + {"type": "Point", "coordinates": [5, 5]}, + ], + }, + { + "_id": 2, + "locs": [ + {"type": "Point", "coordinates": [50, 50]}, + {"type": "Point", "coordinates": [60, 60]}, + ], + }, + ], + expected=[ + { + "_id": 1, + "locs": [ + {"type": "Point", "coordinates": [0, 0]}, + {"type": "Point", "coordinates": [5, 5]}, + ], + } + ], + msg="Array of GeoJSON Points should match if any element is within", + ), + QueryTestCase( + id="array_of_legacy_coords", + filter={"locs": {"$geoWithin": {"$box": [[-10, -10], [10, 10]]}}}, + doc=[{"_id": 1, "locs": [[0, 0], [5, 5]]}, {"_id": 2, "locs": [[50, 50], [60, 60]]}], + expected=[{"_id": 1, "locs": [[0, 0], [5, 5]]}], + msg="Array of legacy coordinate pairs should match", + ), + QueryTestCase( + id="array_of_linestrings", + filter={"routes": {"$geoWithin": POLYGON}}, + doc=[ + { + "_id": 1, + "routes": [ + {"type": "LineString", "coordinates": [[0, 0], [5, 5]]}, + {"type": "LineString", "coordinates": [[0, 0], [50, 50]]}, + ], + }, + { + "_id": 2, + "routes": [ + {"type": "LineString", "coordinates": [[50, 50], [60, 60]]}, + {"type": "LineString", "coordinates": [[0, 0], [50, 50]]}, + ], + }, + ], + expected=[ + { + "_id": 1, + "routes": [ + {"type": "LineString", "coordinates": [[0, 0], [5, 5]]}, + {"type": "LineString", "coordinates": [[0, 0], [50, 50]]}, + ], + } + ], + msg="Array of LineStrings should match if any element is entirely within", + ), + QueryTestCase( + id="array_of_polygons", + filter={"coverage_areas": {"$geoWithin": POLYGON}}, + doc=[ + { + "_id": 1, + "coverage_areas": [ + { + "type": "Polygon", + "coordinates": [[[-1, -1], [1, -1], [1, 1], [-1, 1], [-1, -1]]], + }, + { + "type": "Polygon", + "coordinates": [[[-50, -50], [50, -50], [50, 50], [-50, 50], [-50, -50]]], + }, + ], + }, + { + "_id": 2, + "coverage_areas": [ + { + "type": "Polygon", + "coordinates": [[[20, 20], [30, 20], [30, 30], [20, 30], [20, 20]]], + }, + { + "type": "Polygon", + "coordinates": [[[40, 40], [50, 40], [50, 50], [40, 50], [40, 40]]], + }, + ], + }, + ], + expected=[ + { + "_id": 1, + "coverage_areas": [ + { + "type": "Polygon", + "coordinates": [[[-1, -1], [1, -1], [1, 1], [-1, 1], [-1, -1]]], + }, + { + "type": "Polygon", + "coordinates": [[[-50, -50], [50, -50], [50, 50], [-50, 50], [-50, -50]]], + }, + ], + } + ], + msg="Array of Polygons should match if any element is entirely within", + ), + QueryTestCase( + id="multiple_geospatial_fields", + filter={"home": {"$geoWithin": POLYGON}}, + doc=[ + { + "_id": 1, + "home": {"type": "Point", "coordinates": [0, 0]}, + "work": {"type": "Point", "coordinates": [50, 50]}, + }, + { + "_id": 2, + "home": {"type": "Point", "coordinates": [50, 50]}, + "work": {"type": "Point", "coordinates": [0, 0]}, + }, + ], + expected=[ + { + "_id": 1, + "home": {"type": "Point", "coordinates": [0, 0]}, + "work": {"type": "Point", "coordinates": [50, 50]}, + } + ], + msg="Query on one geospatial field should not affect other fields", + ), + QueryTestCase( + id="linestring_entirely_within", + filter={"geo": {"$geoWithin": POLYGON}}, + doc=[ + {"_id": 1, "geo": {"type": "LineString", "coordinates": [[0, 0], [5, 5]]}}, + {"_id": 2, "geo": {"type": "LineString", "coordinates": [[0, 0], [50, 50]]}}, + ], + expected=[{"_id": 1, "geo": {"type": "LineString", "coordinates": [[0, 0], [5, 5]]}}], + msg="LineString entirely within should match", + ), + QueryTestCase( + id="polygon_entirely_within", + filter={"geo": {"$geoWithin": POLYGON}}, + doc=[ + { + "_id": 1, + "geo": { + "type": "Polygon", + "coordinates": [[[-1, -1], [1, -1], [1, 1], [-1, 1], [-1, -1]]], + }, + }, + { + "_id": 2, + "geo": { + "type": "Polygon", + "coordinates": [[[-50, -50], [50, -50], [50, 50], [-50, 50], [-50, -50]]], + }, + }, + ], + expected=[ + { + "_id": 1, + "geo": { + "type": "Polygon", + "coordinates": [[[-1, -1], [1, -1], [1, 1], [-1, 1], [-1, -1]]], + }, + } + ], + msg="Polygon entirely within should match", + ), + QueryTestCase( + id="multipoint_all_within", + filter={"geo": {"$geoWithin": POLYGON}}, + doc=[ + {"_id": 1, "geo": {"type": "MultiPoint", "coordinates": [[0, 0], [5, 5], [-5, -5]]}}, + {"_id": 2, "geo": {"type": "MultiPoint", "coordinates": [[0, 0], [50, 50]]}}, + ], + expected=[ + {"_id": 1, "geo": {"type": "MultiPoint", "coordinates": [[0, 0], [5, 5], [-5, -5]]}} + ], + msg="MultiPoint should match only if all points are within", + ), + QueryTestCase( + id="multilinestring_all_within", + filter={"geo": {"$geoWithin": POLYGON}}, + doc=[ + { + "_id": 1, + "geo": { + "type": "MultiLineString", + "coordinates": [[[0, 0], [5, 5]], [[-5, -5], [3, 3]]], + }, + }, + { + "_id": 2, + "geo": { + "type": "MultiLineString", + "coordinates": [[[0, 0], [5, 5]], [[0, 0], [50, 50]]], + }, + }, + ], + expected=[ + { + "_id": 1, + "geo": { + "type": "MultiLineString", + "coordinates": [[[0, 0], [5, 5]], [[-5, -5], [3, 3]]], + }, + } + ], + msg="MultiLineString should match only if all lines are within", + ), + QueryTestCase( + id="multilinestring_partial_no_match", + filter={"geo": {"$geoWithin": POLYGON}}, + doc=[ + { + "_id": 1, + "geo": { + "type": "MultiLineString", + "coordinates": [[[0, 0], [5, 5]], [[0, 0], [50, 50]]], + }, + } + ], + expected=[], + msg="MultiLineString with one line outside should not match", + ), + QueryTestCase( + id="multipoint_partial_no_match", + filter={"geo": {"$geoWithin": POLYGON}}, + doc=[ + {"_id": 1, "geo": {"type": "MultiPoint", "coordinates": [[0, 0], [50, 50]]}}, + {"_id": 2, "geo": {"type": "MultiPoint", "coordinates": [[1, 1], [2, 2]]}}, + ], + expected=[{"_id": 2, "geo": {"type": "MultiPoint", "coordinates": [[1, 1], [2, 2]]}}], + msg="MultiPoint with one point outside should not match", + ), + QueryTestCase( + id="geometry_collection_all_within", + filter={"geo": {"$geoWithin": POLYGON}}, + doc=[ + { + "_id": 1, + "geo": { + "type": "GeometryCollection", + "geometries": [ + {"type": "Point", "coordinates": [0, 0]}, + {"type": "LineString", "coordinates": [[1, 1], [2, 2]]}, + ], + }, + }, + { + "_id": 2, + "geo": { + "type": "GeometryCollection", + "geometries": [{"type": "Point", "coordinates": [50, 50]}], + }, + }, + ], + expected=[ + { + "_id": 1, + "geo": { + "type": "GeometryCollection", + "geometries": [ + {"type": "Point", "coordinates": [0, 0]}, + {"type": "LineString", "coordinates": [[1, 1], [2, 2]]}, + ], + }, + } + ], + msg="GeometryCollection with all sub-geometries within should match", + ), + QueryTestCase( + id="geometry_collection_partial_no_match", + filter={"geo": {"$geoWithin": POLYGON}}, + doc=[ + { + "_id": 1, + "geo": { + "type": "GeometryCollection", + "geometries": [ + {"type": "Point", "coordinates": [0, 0]}, + {"type": "Point", "coordinates": [50, 50]}, + ], + }, + } + ], + expected=[], + msg="GeometryCollection with one sub-geometry outside should not match", + ), +] + + +ALL_TESTS = ARGUMENT_HANDLING_TESTS + NULL_MISSING_TESTS + DOCUMENT_TYPE_TESTS + + +@pytest.mark.parametrize("test", pytest_params(ALL_TESTS)) +def test_geoWithin_argument_handling(collection, test): + """Test $geoWithin argument handling, data type coverage, and document types.""" + collection.insert_many(test.doc) + result = execute_command(collection, {"find": collection.name, "filter": test.filter}) + assertSuccess(result, test.expected, ignore_doc_order=True) diff --git a/documentdb_tests/compatibility/tests/core/operator/query/geospatial/geoWithin/test_geoWithin_bson_type_validation.py b/documentdb_tests/compatibility/tests/core/operator/query/geospatial/geoWithin/test_geoWithin_bson_type_validation.py new file mode 100644 index 00000000..e771c878 --- /dev/null +++ b/documentdb_tests/compatibility/tests/core/operator/query/geospatial/geoWithin/test_geoWithin_bson_type_validation.py @@ -0,0 +1,93 @@ +""" +Tests for $geoWithin BSON type validation of shape operator arguments. + +Verifies that each $geoWithin shape operator rejects invalid BSON types for its +value with expected error codes and accepts valid BSON types without error. +""" + +import pytest + +from documentdb_tests.framework.assertions import assertFailureCode, assertNotError +from documentdb_tests.framework.bson_type_validator import ( + BsonType, + BsonTypeTestCase, + generate_bson_acceptance_test_cases, + generate_bson_rejection_test_cases, +) +from documentdb_tests.framework.error_codes import BAD_VALUE_ERROR +from documentdb_tests.framework.executor import execute_command + +GEOWITHIN_PARAMS = [ + BsonTypeTestCase( + id="geometry", + msg="$geometry should reject non-object types", + keyword="$geometry", + valid_types=[BsonType.OBJECT], + default_error_code=BAD_VALUE_ERROR, + valid_inputs={ + BsonType.OBJECT: { + "type": "Polygon", + "coordinates": [[[-10, -10], [10, -10], [10, 10], [-10, 10], [-10, -10]]], + } + }, + ), + BsonTypeTestCase( + id="box", + msg="$box should reject non-array types and empty array", + keyword="$box", + valid_types=[BsonType.ARRAY], + default_error_code=BAD_VALUE_ERROR, + valid_inputs={BsonType.ARRAY: [[-10, -10], [10, 10]]}, + ), + BsonTypeTestCase( + id="polygon", + msg="$polygon should reject non-array types and empty array", + keyword="$polygon", + valid_types=[BsonType.ARRAY], + default_error_code=BAD_VALUE_ERROR, + valid_inputs={BsonType.ARRAY: [[0, 0], [10, 0], [10, 10]]}, + ), + BsonTypeTestCase( + id="center", + msg="$center should reject non-array types and empty array", + keyword="$center", + valid_types=[BsonType.ARRAY], + default_error_code=BAD_VALUE_ERROR, + valid_inputs={BsonType.ARRAY: [[0, 0], 10]}, + ), + BsonTypeTestCase( + id="centerSphere", + msg="$centerSphere should reject non-array types and empty array", + keyword="$centerSphere", + valid_types=[BsonType.ARRAY], + default_error_code=BAD_VALUE_ERROR, + valid_inputs={BsonType.ARRAY: [[0, 0], 0.5]}, + ), +] + +TEST_CASES = generate_bson_rejection_test_cases(GEOWITHIN_PARAMS) + + +@pytest.mark.parametrize( + "bson_type,sample_value,spec", + TEST_CASES, +) +def test_geoWithin_bson_type_rejected(collection, bson_type, sample_value, spec): + """Test $geoWithin shape operators reject invalid BSON types.""" + query_filter = {"loc": {"$geoWithin": {spec.keyword: sample_value}}} + result = execute_command(collection, {"find": collection.name, "filter": query_filter}) + assertFailureCode(result, spec.expected_code(bson_type), msg=spec.msg) + + +ACCEPTANCE_CASES = generate_bson_acceptance_test_cases(GEOWITHIN_PARAMS) + + +@pytest.mark.parametrize( + "bson_type,sample_value,spec", + ACCEPTANCE_CASES, +) +def test_geoWithin_bson_type_accepted(collection, bson_type, sample_value, spec): + """Test $geoWithin shape operators accept valid BSON types.""" + query_filter = {"loc": {"$geoWithin": {spec.keyword: sample_value}}} + result = execute_command(collection, {"find": collection.name, "filter": query_filter}) + assertNotError(result, msg=f"{spec.keyword} should accept {bson_type.value}") diff --git a/documentdb_tests/compatibility/tests/core/operator/query/geospatial/geoWithin/test_geoWithin_centersphere_containment.py b/documentdb_tests/compatibility/tests/core/operator/query/geospatial/geoWithin/test_geoWithin_centersphere_containment.py new file mode 100644 index 00000000..a676bc69 --- /dev/null +++ b/documentdb_tests/compatibility/tests/core/operator/query/geospatial/geoWithin/test_geoWithin_centersphere_containment.py @@ -0,0 +1,195 @@ +""" +Tests for $geoWithin $centerSphere containment of non-Point geometry types +(LineString, Polygon, MultiPolygon). +""" + +import pytest + +from documentdb_tests.compatibility.tests.core.operator.query.utils.query_test_case import ( + QueryTestCase, +) +from documentdb_tests.framework.assertions import assertSuccess +from documentdb_tests.framework.executor import execute_command +from documentdb_tests.framework.parametrize import pytest_params + +# Spherical cap centered at [0, 0] with radius ~111km (0.01 radians ≈ 0.57 degrees) +SMALL_CAP = {"$centerSphere": [[0, 0], 0.01]} + +# Larger cap centered at [0, 0] with radius ~1111km (0.1 radians ≈ 5.7 degrees) +LARGE_CAP = {"$centerSphere": [[0, 0], 0.1]} + + +CENTERSPHERE_LINESTRING_TESTS: list[QueryTestCase] = [ + QueryTestCase( + id="linestring_entirely_within_cap", + filter={"geo": {"$geoWithin": LARGE_CAP}}, + doc=[ + {"_id": 1, "geo": {"type": "LineString", "coordinates": [[0.1, 0.1], [0.2, 0.2]]}}, + {"_id": 2, "geo": {"type": "LineString", "coordinates": [[0, 0], [20, 20]]}}, + ], + expected=[ + {"_id": 1, "geo": {"type": "LineString", "coordinates": [[0.1, 0.1], [0.2, 0.2]]}} + ], + msg="LineString entirely within $centerSphere should match", + ), + QueryTestCase( + id="linestring_intersecting_cap_no_match", + filter={"geo": {"$geoWithin": SMALL_CAP}}, + doc=[{"_id": 1, "geo": {"type": "LineString", "coordinates": [[0, 0], [5, 5]]}}], + expected=[], + msg="LineString intersecting but not entirely within $centerSphere should not match", + ), + QueryTestCase( + id="linestring_outside_cap_no_match", + filter={"geo": {"$geoWithin": SMALL_CAP}}, + doc=[{"_id": 1, "geo": {"type": "LineString", "coordinates": [[10, 10], [11, 11]]}}], + expected=[], + msg="LineString entirely outside $centerSphere should not match", + ), +] + + +CENTERSPHERE_POLYGON_TESTS: list[QueryTestCase] = [ + QueryTestCase( + id="polygon_entirely_within_cap", + filter={"geo": {"$geoWithin": LARGE_CAP}}, + doc=[ + { + "_id": 1, + "geo": { + "type": "Polygon", + "coordinates": [[[0.1, 0.1], [0.2, 0.1], [0.2, 0.2], [0.1, 0.2], [0.1, 0.1]]], + }, + }, + { + "_id": 2, + "geo": { + "type": "Polygon", + "coordinates": [[[20, 20], [21, 20], [21, 21], [20, 21], [20, 20]]], + }, + }, + ], + expected=[ + { + "_id": 1, + "geo": { + "type": "Polygon", + "coordinates": [[[0.1, 0.1], [0.2, 0.1], [0.2, 0.2], [0.1, 0.2], [0.1, 0.1]]], + }, + } + ], + msg="Polygon entirely within $centerSphere should match", + ), + QueryTestCase( + id="polygon_intersecting_cap_no_match", + filter={"geo": {"$geoWithin": SMALL_CAP}}, + doc=[ + { + "_id": 1, + "geo": { + "type": "Polygon", + "coordinates": [[[0, 0], [5, 0], [5, 5], [0, 5], [0, 0]]], + }, + } + ], + expected=[], + msg="Polygon intersecting but not entirely within $centerSphere should not match", + ), + QueryTestCase( + id="polygon_outside_cap_no_match", + filter={"geo": {"$geoWithin": SMALL_CAP}}, + doc=[ + { + "_id": 1, + "geo": { + "type": "Polygon", + "coordinates": [[[10, 10], [11, 10], [11, 11], [10, 11], [10, 10]]], + }, + } + ], + expected=[], + msg="Polygon entirely outside $centerSphere should not match", + ), +] + + +CENTERSPHERE_MULTIPOLYGON_TESTS: list[QueryTestCase] = [ + QueryTestCase( + id="multipolygon_all_within_cap", + filter={"geo": {"$geoWithin": LARGE_CAP}}, + doc=[ + { + "_id": 1, + "geo": { + "type": "MultiPolygon", + "coordinates": [ + [[[0.1, 0.1], [0.2, 0.1], [0.2, 0.2], [0.1, 0.2], [0.1, 0.1]]], + [[[0.3, 0.3], [0.4, 0.3], [0.4, 0.4], [0.3, 0.4], [0.3, 0.3]]], + ], + }, + }, + { + "_id": 2, + "geo": { + "type": "MultiPolygon", + "coordinates": [ + [[[0.1, 0.1], [0.2, 0.1], [0.2, 0.2], [0.1, 0.2], [0.1, 0.1]]], + [[[20, 20], [21, 20], [21, 21], [20, 21], [20, 20]]], + ], + }, + }, + ], + expected=[ + { + "_id": 1, + "geo": { + "type": "MultiPolygon", + "coordinates": [ + [[[0.1, 0.1], [0.2, 0.1], [0.2, 0.2], [0.1, 0.2], [0.1, 0.1]]], + [[[0.3, 0.3], [0.4, 0.3], [0.4, 0.4], [0.3, 0.4], [0.3, 0.3]]], + ], + }, + } + ], + msg="MultiPolygon with all polygons within $centerSphere should match", + ), + QueryTestCase( + id="multipolygon_one_outside_no_match", + filter={"geo": {"$geoWithin": SMALL_CAP}}, + doc=[ + { + "_id": 1, + "geo": { + "type": "MultiPolygon", + "coordinates": [ + [ + [ + [0.001, 0.001], + [0.002, 0.001], + [0.002, 0.002], + [0.001, 0.002], + [0.001, 0.001], + ] + ], + [[[20, 20], [21, 20], [21, 21], [20, 21], [20, 20]]], + ], + }, + } + ], + expected=[], + msg="MultiPolygon with one polygon outside $centerSphere should not match", + ), +] + + +ALL_TESTS = ( + CENTERSPHERE_LINESTRING_TESTS + CENTERSPHERE_POLYGON_TESTS + CENTERSPHERE_MULTIPOLYGON_TESTS +) + + +@pytest.mark.parametrize("test", pytest_params(ALL_TESTS)) +def test_geoWithin_centersphere_containment(collection, test): + """Test $geoWithin $centerSphere containment of non-Point geometry types.""" + collection.insert_many(test.doc) + result = execute_command(collection, {"find": collection.name, "filter": test.filter}) + assertSuccess(result, test.expected, ignore_doc_order=True) diff --git a/documentdb_tests/compatibility/tests/core/operator/query/geospatial/geoWithin/test_geoWithin_errors.py b/documentdb_tests/compatibility/tests/core/operator/query/geospatial/geoWithin/test_geoWithin_errors.py new file mode 100644 index 00000000..26d91a39 --- /dev/null +++ b/documentdb_tests/compatibility/tests/core/operator/query/geospatial/geoWithin/test_geoWithin_errors.py @@ -0,0 +1,366 @@ +""" +Tests for $geoWithin error cases — argument validation, coordinate validation, and invalid geometry. +""" + +import pytest + +from documentdb_tests.compatibility.tests.core.operator.query.utils.query_test_case import ( + QueryTestCase, +) +from documentdb_tests.framework.assertions import assertFailureCode +from documentdb_tests.framework.error_codes import BAD_VALUE_ERROR +from documentdb_tests.framework.executor import execute_command +from documentdb_tests.framework.parametrize import pytest_params + +GEOJSON_ERROR_TESTS: list[QueryTestCase] = [ + QueryTestCase( + id="missing_geometry_and_shape", + filter={"loc": {"$geoWithin": {}}}, + error_code=BAD_VALUE_ERROR, + msg="Missing $geometry and no shape operator should error", + ), + QueryTestCase( + id="invalid_geometry_type_linestring", + filter={ + "loc": { + "$geoWithin": {"$geometry": {"type": "LineString", "coordinates": [[0, 0], [1, 1]]}} + } + }, + error_code=BAD_VALUE_ERROR, + msg="Invalid geometry type LineString should error", + ), + QueryTestCase( + id="empty_coordinates", + filter={"loc": {"$geoWithin": {"$geometry": {"type": "Polygon", "coordinates": []}}}}, + error_code=BAD_VALUE_ERROR, + msg="Empty coordinates array should error", + ), + QueryTestCase( + id="non_array_coordinates", + filter={ + "loc": {"$geoWithin": {"$geometry": {"type": "Polygon", "coordinates": "invalid"}}} + }, + error_code=BAD_VALUE_ERROR, + msg="Non-array coordinates should error", + ), + QueryTestCase( + id="non_object_argument", + filter={"loc": {"$geoWithin": "invalid"}}, + error_code=BAD_VALUE_ERROR, + msg="Non-object argument should error", + ), + QueryTestCase( + id="null_argument", + filter={"loc": {"$geoWithin": None}}, + error_code=BAD_VALUE_ERROR, + msg="Null argument should error", + ), + QueryTestCase( + id="latitude_above_90", + filter={ + "loc": { + "$geoWithin": { + "$geometry": { + "type": "Polygon", + "coordinates": [[[0, 91], [1, 91], [1, 92], [0, 92], [0, 91]]], + } + } + } + }, + error_code=BAD_VALUE_ERROR, + msg="Latitude > 90 should error", + ), + QueryTestCase( + id="self_intersecting_polygon", + filter={ + "loc": { + "$geoWithin": { + "$geometry": { + "type": "Polygon", + "coordinates": [[[0, 0], [2, 2], [2, 0], [0, 2], [0, 0]]], + } + } + } + }, + error_code=BAD_VALUE_ERROR, + msg="Self-intersecting polygon should error", + ), + QueryTestCase( + id="unclosed_polygon_ring", + filter={ + "loc": { + "$geoWithin": { + "$geometry": { + "type": "Polygon", + "coordinates": [[[0, 0], [1, 0], [1, 1], [0, 1]]], + } + } + } + }, + error_code=BAD_VALUE_ERROR, + msg="Unclosed polygon ring should error", + ), + QueryTestCase( + id="polygon_non_contained_hole", + filter={ + "loc": { + "$geoWithin": { + "$geometry": { + "type": "Polygon", + "coordinates": [ + [[-1, -1], [1, -1], [1, 1], [-1, 1], [-1, -1]], + [[50, 50], [51, 50], [51, 51], [50, 51], [50, 50]], + ], + } + } + } + }, + error_code=BAD_VALUE_ERROR, + msg="Polygon with non-contained hole should error", + ), + QueryTestCase( + id="multipolygon_non_contained_hole", + filter={ + "loc": { + "$geoWithin": { + "$geometry": { + "type": "MultiPolygon", + "coordinates": [ + [ + [[-1, -1], [1, -1], [1, 1], [-1, 1], [-1, -1]], + [[50, 50], [51, 50], [51, 51], [50, 51], [50, 50]], + ] + ], + } + } + } + }, + error_code=BAD_VALUE_ERROR, + msg="MultiPolygon with non-contained hole should error", + ), + QueryTestCase( + id="missing_type_field", + filter={ + "loc": { + "$geoWithin": { + "$geometry": {"coordinates": [[[0, 0], [1, 0], [1, 1], [0, 1], [0, 0]]]} + } + } + }, + error_code=BAD_VALUE_ERROR, + msg="$geometry without type field should error", + ), + QueryTestCase( + id="missing_coordinates_field", + filter={"loc": {"$geoWithin": {"$geometry": {"type": "Polygon"}}}}, + error_code=BAD_VALUE_ERROR, + msg="$geometry without coordinates field should error", + ), + QueryTestCase( + id="non_string_type_field", + filter={ + "loc": { + "$geoWithin": { + "$geometry": { + "type": 123, + "coordinates": [[[0, 0], [1, 0], [1, 1], [0, 1], [0, 0]]], + } + } + } + }, + error_code=BAD_VALUE_ERROR, + msg="$geometry with non-string type field should error", + ), + QueryTestCase( + id="invalid_geometry_type_point", + filter={"loc": {"$geoWithin": {"$geometry": {"type": "Point", "coordinates": [0, 0]}}}}, + error_code=BAD_VALUE_ERROR, + msg="Invalid geometry type Point should error", + ), + QueryTestCase( + id="invalid_geometry_type_multipoint", + filter={ + "loc": { + "$geoWithin": {"$geometry": {"type": "MultiPoint", "coordinates": [[0, 0], [1, 1]]}} + } + }, + error_code=BAD_VALUE_ERROR, + msg="Invalid geometry type MultiPoint should error", + ), + QueryTestCase( + id="invalid_geometry_type_multilinestring", + filter={ + "loc": { + "$geoWithin": { + "$geometry": {"type": "MultiLineString", "coordinates": [[[0, 0], [1, 1]]]} + } + } + }, + error_code=BAD_VALUE_ERROR, + msg="Invalid geometry type MultiLineString should error", + ), + QueryTestCase( + id="geometry_value_numeric", + filter={"loc": {"$geoWithin": {"$geometry": 1}}}, + error_code=BAD_VALUE_ERROR, + msg="$geometry with numeric value should error", + ), + QueryTestCase( + id="geometry_value_empty_string", + filter={"loc": {"$geoWithin": {"$geometry": ""}}}, + error_code=BAD_VALUE_ERROR, + msg="$geometry with empty string value should error", + ), + QueryTestCase( + id="geometry_value_boolean", + filter={"loc": {"$geoWithin": {"$geometry": False}}}, + error_code=BAD_VALUE_ERROR, + msg="$geometry with boolean value should error", + ), + QueryTestCase( + id="geometry_value_empty_array", + filter={"loc": {"$geoWithin": {"$geometry": []}}}, + error_code=BAD_VALUE_ERROR, + msg="$geometry with empty array value should error", + ), + QueryTestCase( + id="geojson_without_geometry_wrapper", + filter={ + "loc": { + "$geoWithin": { + "type": "Polygon", + "coordinates": [[[0, 0], [1, 0], [1, 1], [0, 1], [0, 0]]], + } + } + }, + error_code=BAD_VALUE_ERROR, + msg="GeoJSON object directly in $geoWithin without $geometry wrapper should error", + ), +] + + +CRS_ERROR_TESTS: list[QueryTestCase] = [ + QueryTestCase( + id="invalid_crs_type", + filter={ + "loc": { + "$geoWithin": { + "$geometry": { + "type": "Polygon", + "coordinates": [[[0, 0], [1, 0], [1, 1], [0, 1], [0, 0]]], + "crs": { + "type": "link", + "properties": {"name": "urn:x-mongodb:crs:strictwinding:EPSG:4326"}, + }, + } + } + } + }, + error_code=BAD_VALUE_ERROR, + msg="crs.type other than 'name' should error", + ), + QueryTestCase( + id="missing_crs_type", + filter={ + "loc": { + "$geoWithin": { + "$geometry": { + "type": "Polygon", + "coordinates": [[[0, 0], [1, 0], [1, 1], [0, 1], [0, 0]]], + "crs": { + "properties": {"name": "urn:x-mongodb:crs:strictwinding:EPSG:4326"} + }, + } + } + } + }, + error_code=BAD_VALUE_ERROR, + msg="crs without type field should error", + ), + QueryTestCase( + id="missing_crs_properties", + filter={ + "loc": { + "$geoWithin": { + "$geometry": { + "type": "Polygon", + "coordinates": [[[0, 0], [1, 0], [1, 1], [0, 1], [0, 0]]], + "crs": {"type": "name"}, + } + } + } + }, + error_code=BAD_VALUE_ERROR, + msg="crs without properties field should error", + ), + QueryTestCase( + id="unknown_crs_name", + filter={ + "loc": { + "$geoWithin": { + "$geometry": { + "type": "Polygon", + "coordinates": [[[0, 0], [1, 0], [1, 1], [0, 1], [0, 0]]], + "crs": {"type": "name", "properties": {"name": "urn:bogus:not-a-real-crs"}}, + } + } + } + }, + error_code=BAD_VALUE_ERROR, + msg="Unknown crs.properties.name URN should error", + ), + QueryTestCase( + id="missing_crs_properties_name", + filter={ + "loc": { + "$geoWithin": { + "$geometry": { + "type": "Polygon", + "coordinates": [[[0, 0], [1, 0], [1, 1], [0, 1], [0, 0]]], + "crs": {"type": "name", "properties": {}}, + } + } + } + }, + error_code=BAD_VALUE_ERROR, + msg="crs.properties without name field should error", + ), +] + + +LEGACY_ERROR_TESTS: list[QueryTestCase] = [ + QueryTestCase( + id="negative_radius", + filter={"loc": {"$geoWithin": {"$centerSphere": [[0, 0], -1]}}}, + error_code=BAD_VALUE_ERROR, + msg="$centerSphere negative radius should error", + ), + QueryTestCase( + id="center_negative_radius", + filter={"loc": {"$geoWithin": {"$center": [[0, 0], -1]}}}, + error_code=BAD_VALUE_ERROR, + msg="$center with negative radius should error", + ), + QueryTestCase( + id="box_single_corner", + filter={"loc": {"$geoWithin": {"$box": [[0, 0]]}}}, + error_code=BAD_VALUE_ERROR, + msg="$box with only one corner should error", + ), + QueryTestCase( + id="polygon_two_points", + filter={"loc": {"$geoWithin": {"$polygon": [[0, 0], [10, 10]]}}}, + error_code=BAD_VALUE_ERROR, + msg="$polygon with fewer than 3 points should error", + ), +] + + +ALL_ERROR_TESTS = GEOJSON_ERROR_TESTS + CRS_ERROR_TESTS + LEGACY_ERROR_TESTS + + +@pytest.mark.parametrize("test", pytest_params(ALL_ERROR_TESTS)) +def test_geoWithin_errors(collection, test): + """Test $geoWithin rejects invalid arguments, coordinates, geometry, and shape parameters.""" + result = execute_command(collection, {"find": collection.name, "filter": test.filter}) + assertFailureCode(result, test.error_code) diff --git a/documentdb_tests/compatibility/tests/core/operator/query/geospatial/geoWithin/test_geoWithin_field_lookup.py b/documentdb_tests/compatibility/tests/core/operator/query/geospatial/geoWithin/test_geoWithin_field_lookup.py new file mode 100644 index 00000000..a223220d --- /dev/null +++ b/documentdb_tests/compatibility/tests/core/operator/query/geospatial/geoWithin/test_geoWithin_field_lookup.py @@ -0,0 +1,171 @@ +""" +Tests for $geoWithin field lookup patterns. + +Covers dotted paths through embedded documents, arrays of embedded documents +with dotted paths, deeply nested paths, and non-existent / null intermediate paths. +""" + +import pytest + +from documentdb_tests.compatibility.tests.core.operator.query.utils.query_test_case import ( + QueryTestCase, +) +from documentdb_tests.framework.assertions import assertSuccess +from documentdb_tests.framework.executor import execute_command +from documentdb_tests.framework.parametrize import pytest_params + +POLYGON = { + "$geometry": { + "type": "Polygon", + "coordinates": [[[-10, -10], [10, -10], [10, 10], [-10, 10], [-10, -10]]], + } +} + +POINT_INSIDE = {"type": "Point", "coordinates": [0, 0]} +POINT_OUTSIDE = {"type": "Point", "coordinates": [50, 50]} + + +DOTTED_PATH_TESTS: list[QueryTestCase] = [ + QueryTestCase( + id="nested_field_inside", + filter={"geo.loc": {"$geoWithin": POLYGON}}, + doc=[{"_id": 1, "geo": {"loc": POINT_INSIDE}}, {"_id": 2, "geo": {"loc": POINT_OUTSIDE}}], + expected=[{"_id": 1, "geo": {"loc": POINT_INSIDE}}], + msg="Dotted path to nested geo field should match point inside", + ), + QueryTestCase( + id="nested_field_outside", + filter={"geo.loc": {"$geoWithin": POLYGON}}, + doc=[{"_id": 1, "geo": {"loc": POINT_OUTSIDE}}], + expected=[], + msg="Dotted path to nested geo field outside polygon should not match", + ), + QueryTestCase( + id="deeply_nested_geojson_feature", + filter={"feature.properties.geometry.location": {"$geoWithin": POLYGON}}, + doc=[ + {"_id": 1, "feature": {"properties": {"geometry": {"location": POINT_INSIDE}}}}, + {"_id": 2, "feature": {"properties": {"geometry": {"location": POINT_OUTSIDE}}}}, + ], + expected=[{"_id": 1, "feature": {"properties": {"geometry": {"location": POINT_INSIDE}}}}], + msg="Deeply nested dotted path through GeoJSON-Feature-like schema should match", + ), + QueryTestCase( + id="null_parent_no_match", + filter={"geo.loc": {"$geoWithin": POLYGON}}, + doc=[{"_id": 1, "geo": None}, {"_id": 2, "geo": {"loc": POINT_INSIDE}}], + expected=[{"_id": 2, "geo": {"loc": POINT_INSIDE}}], + msg="Null parent field should not match", + ), + QueryTestCase( + id="missing_intermediate_field", + filter={"geo.loc": {"$geoWithin": POLYGON}}, + doc=[{"_id": 1, "geo": {"other": "value"}}, {"_id": 2, "geo": {"loc": POINT_INSIDE}}], + expected=[{"_id": 2, "geo": {"loc": POINT_INSIDE}}], + msg="Missing intermediate field (parent has no child) should not match", + ), + QueryTestCase( + id="non_object_intermediate", + filter={"address.geocode.location": {"$geoWithin": POLYGON}}, + doc=[{"_id": 1, "address": {"geocode": "not_an_object"}}], + expected=[], + msg="Dotted path through non-object intermediate should not match", + ), + QueryTestCase( + id="nonexistent_top_field", + filter={"missing.loc": {"$geoWithin": POLYGON}}, + doc=[{"_id": 1, "geo": {"loc": POINT_INSIDE}}], + expected=[], + msg="Dotted path with non-existent top-level field should not match", + ), + QueryTestCase( + id="dotted_path_intermediate_null", + filter={"address.geocode.location": {"$geoWithin": POLYGON}}, + doc=[{"_id": 1, "address": {"geocode": None}}], + expected=[], + msg="Dotted path where intermediate field is null should not match", + ), +] + + +ARRAY_OF_EMBEDDED_DOCS_TESTS: list[QueryTestCase] = [ + QueryTestCase( + id="array_of_objects_any_inside", + filter={"addresses.location": {"$geoWithin": POLYGON}}, + doc=[ + { + "_id": 1, + "addresses": [ + {"label": "home", "location": POINT_INSIDE}, + {"label": "work", "location": POINT_OUTSIDE}, + ], + }, + { + "_id": 2, + "addresses": [ + {"label": "home", "location": {"type": "Point", "coordinates": [60, 60]}}, + {"label": "work", "location": POINT_OUTSIDE}, + ], + }, + ], + expected=[ + { + "_id": 1, + "addresses": [ + {"label": "home", "location": POINT_INSIDE}, + {"label": "work", "location": POINT_OUTSIDE}, + ], + } + ], + msg="Dotted path through array of objects matches if ANY element's geo is inside", + ), + QueryTestCase( + id="array_of_objects_none_inside", + filter={"addresses.location": {"$geoWithin": POLYGON}}, + doc=[ + { + "_id": 1, + "addresses": [ + {"label": "home", "location": POINT_OUTSIDE}, + {"label": "work", "location": {"type": "Point", "coordinates": [60, 60]}}, + ], + } + ], + expected=[], + msg="Dotted path through array of objects with no inside element should not match", + ), + QueryTestCase( + id="trips_waypoints_location", + filter={"trips.start.location": {"$geoWithin": POLYGON}}, + doc=[ + { + "_id": 1, + "trips": [ + {"start": {"location": POINT_INSIDE}}, + {"start": {"location": POINT_OUTSIDE}}, + ], + } + ], + expected=[ + { + "_id": 1, + "trips": [ + {"start": {"location": POINT_INSIDE}}, + {"start": {"location": POINT_OUTSIDE}}, + ], + } + ], + msg="Dotted path traverses array then nested object correctly", + ), +] + + +ALL_TESTS = DOTTED_PATH_TESTS + ARRAY_OF_EMBEDDED_DOCS_TESTS + + +@pytest.mark.parametrize("test", pytest_params(ALL_TESTS)) +def test_geoWithin_field_lookup(collection, test): + """Parametrized test for $geoWithin field lookup patterns.""" + collection.insert_many(test.doc) + result = execute_command(collection, {"find": collection.name, "filter": test.filter}) + assertSuccess(result, test.expected, ignore_doc_order=True) diff --git a/documentdb_tests/compatibility/tests/core/operator/query/geospatial/geoWithin/test_geoWithin_geojson_polygon.py b/documentdb_tests/compatibility/tests/core/operator/query/geospatial/geoWithin/test_geoWithin_geojson_polygon.py new file mode 100644 index 00000000..498b5ac2 --- /dev/null +++ b/documentdb_tests/compatibility/tests/core/operator/query/geospatial/geoWithin/test_geoWithin_geojson_polygon.py @@ -0,0 +1,245 @@ +""" +Tests for $geoWithin GeoJSON polygon edge cases, big polygons, and meridian-crossing polygons. +""" + +import pytest + +from documentdb_tests.compatibility.tests.core.operator.query.utils.query_test_case import ( + QueryTestCase, +) +from documentdb_tests.framework.assertions import assertSuccess +from documentdb_tests.framework.executor import execute_command +from documentdb_tests.framework.parametrize import pytest_params + +# Big polygon covering most of the earth (> hemisphere) +BIG_POLYGON = { + "type": "Polygon", + "coordinates": [[[-170, -80], [170, -80], [170, 80], [-170, 80], [-170, -80]]], + "crs": {"type": "name", "properties": {"name": "urn:x-mongodb:crs:strictwinding:EPSG:4326"}}, +} + +# Complementary polygon (small area NOT covered by big polygon) +SMALL_COMPLEMENT = { + "type": "Polygon", + "coordinates": [[[-170, 80], [170, 80], [170, -80], [-170, -80], [-170, 80]]], + "crs": {"type": "name", "properties": {"name": "urn:x-mongodb:crs:strictwinding:EPSG:4326"}}, +} + + +POLYGON_EDGE_CASE_TESTS: list[QueryTestCase] = [ + QueryTestCase( + id="correct_lon_lat_order", + filter={ + "loc": { + "$geoWithin": { + "$geometry": { + "type": "Polygon", + "coordinates": [[[39, 4], [41, 4], [41, 6], [39, 6], [39, 4]]], + } + } + } + }, + doc=[ + {"_id": 1, "loc": {"type": "Point", "coordinates": [40, 5]}}, + {"_id": 2, "loc": {"type": "Point", "coordinates": [0, 0]}}, + ], + expected=[{"_id": 1, "loc": {"type": "Point", "coordinates": [40, 5]}}], + msg="Correct [longitude, latitude] order should return correct results", + ), + QueryTestCase( + id="point_at_null_island", + filter={ + "loc": { + "$geoWithin": { + "$geometry": { + "type": "Polygon", + "coordinates": [[[-1, -1], [1, -1], [1, 1], [-1, 1], [-1, -1]]], + } + } + } + }, + doc=[{"_id": 1, "loc": {"type": "Point", "coordinates": [0, 0]}}], + expected=[{"_id": 1, "loc": {"type": "Point", "coordinates": [0, 0]}}], + msg="Point at [0, 0] (null island) should match", + ), + QueryTestCase( + id="point_at_extreme_coords", + filter={ + "loc": { + "$geoWithin": { + "$geometry": { + "type": "Polygon", + "coordinates": [[[179, 88], [180, 88], [180, 90], [179, 90], [179, 88]]], + } + } + } + }, + doc=[{"_id": 1, "loc": {"type": "Point", "coordinates": [180, 89]}}], + expected=[{"_id": 1, "loc": {"type": "Point", "coordinates": [180, 89]}}], + msg="Point at extreme coordinates [180, 89] should match", + ), + QueryTestCase( + id="very_small_polygon", + filter={ + "loc": { + "$geoWithin": { + "$geometry": { + "type": "Polygon", + "coordinates": [ + [[0, 0], [0.0001, 0], [0.0001, 0.0001], [0, 0.0001], [0, 0]] + ], + } + } + } + }, + doc=[ + {"_id": 1, "loc": {"type": "Point", "coordinates": [0.00001, 0.00001]}}, + {"_id": 2, "loc": {"type": "Point", "coordinates": [1, 1]}}, + ], + expected=[{"_id": 1, "loc": {"type": "Point", "coordinates": [0.00001, 0.00001]}}], + msg="Very small polygon should match point inside", + ), + QueryTestCase( + id="duplicate_consecutive_vertices", + filter={ + "loc": { + "$geoWithin": { + "$geometry": { + "type": "Polygon", + "coordinates": [[[-1, -1], [-1, -1], [1, -1], [1, 1], [-1, 1], [-1, -1]]], + } + } + } + }, + doc=[{"_id": 1, "loc": {"type": "Point", "coordinates": [0, 0]}}], + expected=[{"_id": 1, "loc": {"type": "Point", "coordinates": [0, 0]}}], + msg="Polygon with duplicate consecutive vertices should still match", + ), + QueryTestCase( + id="point_inside_close_to_boundary", + filter={ + "loc": { + "$geoWithin": { + "$geometry": { + "type": "Polygon", + "coordinates": [[[-10, -10], [10, -10], [10, 10], [-10, 10], [-10, -10]]], + } + } + } + }, + doc=[{"_id": 1, "loc": {"type": "Point", "coordinates": [9.999, 0]}}], + expected=[{"_id": 1, "loc": {"type": "Point", "coordinates": [9.999, 0]}}], + msg="Point very close to boundary (inside) should match", + ), + QueryTestCase( + id="point_outside_close_to_boundary", + filter={ + "loc": { + "$geoWithin": { + "$geometry": { + "type": "Polygon", + "coordinates": [[[-10, -10], [10, -10], [10, 10], [-10, 10], [-10, -10]]], + } + } + } + }, + doc=[{"_id": 1, "loc": {"type": "Point", "coordinates": [10.001, 0]}}], + expected=[], + msg="Point very close to boundary (outside) should not match", + ), + QueryTestCase( + id="polygon_sharing_edge", + filter={ + "loc": { + "$geoWithin": { + "$geometry": { + "type": "Polygon", + "coordinates": [[[0, 0], [10, 0], [10, 10], [0, 10], [0, 0]]], + } + } + } + }, + doc=[ + {"_id": 1, "loc": {"type": "Point", "coordinates": [5, 5]}}, + {"_id": 2, "loc": {"type": "Point", "coordinates": [15, 5]}}, + ], + expected=[{"_id": 1, "loc": {"type": "Point", "coordinates": [5, 5]}}], + msg="Only point inside polygon should match, not one sharing edge outside", + ), +] + + +BIG_POLYGON_TESTS: list[QueryTestCase] = [ + QueryTestCase( + id="big_polygon_with_strictwinding", + filter={"loc": {"$geoWithin": {"$geometry": BIG_POLYGON}}}, + doc=[ + {"_id": 1, "loc": {"type": "Point", "coordinates": [0, 0]}}, + {"_id": 2, "loc": {"type": "Point", "coordinates": [50, 50]}}, + {"_id": 3, "loc": {"type": "Point", "coordinates": [-50, -50]}}, + {"_id": 4, "loc": {"type": "Point", "coordinates": [175, 85]}}, + ], + expected=[ + {"_id": 1, "loc": {"type": "Point", "coordinates": [0, 0]}}, + {"_id": 2, "loc": {"type": "Point", "coordinates": [50, 50]}}, + {"_id": 3, "loc": {"type": "Point", "coordinates": [-50, -50]}}, + {"_id": 4, "loc": {"type": "Point", "coordinates": [175, 85]}}, + ], + msg="Big polygon (>hemisphere) with strictwinding CRS should cover most of earth", + ), + QueryTestCase( + id="reverse_winding_returns_complement", + filter={"loc": {"$geoWithin": {"$geometry": SMALL_COMPLEMENT}}}, + doc=[ + {"_id": 1, "loc": {"type": "Point", "coordinates": [0, 0]}}, + {"_id": 2, "loc": {"type": "Point", "coordinates": [175, 85]}}, + {"_id": 3, "loc": {"type": "Point", "coordinates": [179, 0]}}, + {"_id": 4, "loc": {"type": "Point", "coordinates": [-179, 0]}}, + ], + expected=[ + {"_id": 3, "loc": {"type": "Point", "coordinates": [179, 0]}}, + {"_id": 4, "loc": {"type": "Point", "coordinates": [-179, 0]}}, + ], + msg="Reversed winding with strictwinding CRS returns complement (antimeridian sliver)", + ), +] + + +MERIDIAN_TESTS: list[QueryTestCase] = [ + QueryTestCase( + id="polygon_crossing_antimeridian", + filter={ + "loc": { + "$geoWithin": { + "$geometry": { + "type": "Polygon", + "coordinates": [[[178, -2], [-178, -2], [-178, 2], [178, 2], [178, -2]]], + } + } + } + }, + doc=[ + {"_id": 1, "loc": {"type": "Point", "coordinates": [179, 0]}}, + {"_id": 2, "loc": {"type": "Point", "coordinates": [-179, 0]}}, + {"_id": 3, "loc": {"type": "Point", "coordinates": [179.5, 0.5]}}, + {"_id": 4, "loc": {"type": "Point", "coordinates": [0, 0]}}, + ], + expected=[ + {"_id": 1, "loc": {"type": "Point", "coordinates": [179, 0]}}, + {"_id": 2, "loc": {"type": "Point", "coordinates": [-179, 0]}}, + {"_id": 3, "loc": {"type": "Point", "coordinates": [179.5, 0.5]}}, + ], + msg="Polygon crossing antimeridian should match points near dateline", + ), +] + + +ALL_TESTS = POLYGON_EDGE_CASE_TESTS + BIG_POLYGON_TESTS + MERIDIAN_TESTS + + +@pytest.mark.parametrize("test", pytest_params(ALL_TESTS)) +def test_geoWithin_polygon(collection, test): + """Test $geoWithin polygon edge cases, big polygons, and meridian-crossing polygons.""" + collection.insert_many(test.doc) + result = execute_command(collection, {"find": collection.name, "filter": test.filter}) + assertSuccess(result, test.expected, ignore_doc_order=True) diff --git a/documentdb_tests/compatibility/tests/core/operator/query/geospatial/geoWithin/test_geoWithin_legacy_shapes.py b/documentdb_tests/compatibility/tests/core/operator/query/geospatial/geoWithin/test_geoWithin_legacy_shapes.py new file mode 100644 index 00000000..95e27a20 --- /dev/null +++ b/documentdb_tests/compatibility/tests/core/operator/query/geospatial/geoWithin/test_geoWithin_legacy_shapes.py @@ -0,0 +1,98 @@ +""" +Tests for $geoWithin legacy shape operators ($box, $polygon, $center, $centerSphere). +""" + +import pytest + +from documentdb_tests.compatibility.tests.core.operator.query.utils.query_test_case import ( + QueryTestCase, +) +from documentdb_tests.framework.assertions import assertSuccess +from documentdb_tests.framework.executor import execute_command +from documentdb_tests.framework.parametrize import pytest_params + +LEGACY_SHAPE_TESTS: list[QueryTestCase] = [ + QueryTestCase( + id="box_points_inside", + filter={"loc": {"$geoWithin": {"$box": [[0, 0], [10, 10]]}}}, + doc=[{"_id": 1, "loc": [5, 5]}, {"_id": 2, "loc": [15, 15]}], + expected=[{"_id": 1, "loc": [5, 5]}], + msg="$box should match points inside", + ), + QueryTestCase( + id="polygon_points_inside", + filter={"loc": {"$geoWithin": {"$polygon": [[0, 0], [10, 0], [10, 10], [0, 10]]}}}, + doc=[{"_id": 1, "loc": [5, 5]}, {"_id": 2, "loc": [15, 15]}], + expected=[{"_id": 1, "loc": [5, 5]}], + msg="$polygon should match points inside", + ), + QueryTestCase( + id="center_points_within_radius", + filter={"loc": {"$geoWithin": {"$center": [[0, 0], 5]}}}, + doc=[{"_id": 1, "loc": [1, 1]}, {"_id": 2, "loc": [10, 10]}], + expected=[{"_id": 1, "loc": [1, 1]}], + msg="$center should match points within flat circle radius", + ), + QueryTestCase( + id="centersphere_points_within", + filter={"loc": {"$geoWithin": {"$centerSphere": [[0, 0], 0.01]}}}, + doc=[{"_id": 1, "loc": [0.1, 0.1]}, {"_id": 2, "loc": [10, 10]}], + expected=[{"_id": 1, "loc": [0.1, 0.1]}], + msg="$centerSphere should match points within spherical circle", + ), +] + + +# Flat operators ($box, $polygon, $center) accept legacy [x, y] pairs and +# GeoJSON Point documents (the Point's coordinates are used). Non-Point +# GeoJSON document types (LineString, Polygon, etc.) silently do not match. +FLAT_OPERATOR_GEOJSON_TESTS: list[QueryTestCase] = [ + QueryTestCase( + id="box_with_geojson_point_matches", + filter={"loc": {"$geoWithin": {"$box": [[-10, -10], [10, 10]]}}}, + doc=[ + {"_id": 1, "loc": {"type": "Point", "coordinates": [0, 0]}}, + {"_id": 2, "loc": {"type": "Point", "coordinates": [50, 50]}}, + ], + expected=[{"_id": 1, "loc": {"type": "Point", "coordinates": [0, 0]}}], + msg="$box should match GeoJSON Point inside the box", + ), + QueryTestCase( + id="box_with_geojson_linestring_no_match", + filter={"loc": {"$geoWithin": {"$box": [[-10, -10], [10, 10]]}}}, + doc=[{"_id": 1, "loc": {"type": "LineString", "coordinates": [[0, 0], [1, 1]]}}], + expected=[], + msg="$box should silently not match non-Point GeoJSON document", + ), + QueryTestCase( + id="polygon_with_geojson_point_matches", + filter={"loc": {"$geoWithin": {"$polygon": [[-10, -10], [10, -10], [10, 10], [-10, 10]]}}}, + doc=[ + {"_id": 1, "loc": {"type": "Point", "coordinates": [0, 0]}}, + {"_id": 2, "loc": {"type": "Point", "coordinates": [50, 50]}}, + ], + expected=[{"_id": 1, "loc": {"type": "Point", "coordinates": [0, 0]}}], + msg="$polygon should match GeoJSON Point inside the polygon", + ), + QueryTestCase( + id="center_with_geojson_point_matches", + filter={"loc": {"$geoWithin": {"$center": [[0, 0], 5]}}}, + doc=[ + {"_id": 1, "loc": {"type": "Point", "coordinates": [0, 0]}}, + {"_id": 2, "loc": {"type": "Point", "coordinates": [50, 50]}}, + ], + expected=[{"_id": 1, "loc": {"type": "Point", "coordinates": [0, 0]}}], + msg="$center should match GeoJSON Point inside the radius", + ), +] + + +ALL_TESTS = LEGACY_SHAPE_TESTS + FLAT_OPERATOR_GEOJSON_TESTS + + +@pytest.mark.parametrize("test", pytest_params(ALL_TESTS)) +def test_geoWithin_legacy_shapes(collection, test): + """Test $geoWithin legacy shape operators ($box, $polygon, $center, $centerSphere).""" + collection.insert_many(test.doc) + result = execute_command(collection, {"find": collection.name, "filter": test.filter}) + assertSuccess(result, test.expected, ignore_doc_order=True) diff --git a/documentdb_tests/compatibility/tests/core/operator/query/geospatial/geoWithin/test_geoWithin_logical_operators.py b/documentdb_tests/compatibility/tests/core/operator/query/geospatial/geoWithin/test_geoWithin_logical_operators.py new file mode 100644 index 00000000..3c3c432e --- /dev/null +++ b/documentdb_tests/compatibility/tests/core/operator/query/geospatial/geoWithin/test_geoWithin_logical_operators.py @@ -0,0 +1,159 @@ +""" +Tests for $geoWithin interaction with logical operators ($and, $or, $not, $nor). +""" + +import pytest + +from documentdb_tests.compatibility.tests.core.operator.query.utils.query_test_case import ( + QueryTestCase, +) +from documentdb_tests.framework.assertions import assertSuccess +from documentdb_tests.framework.executor import execute_command +from documentdb_tests.framework.parametrize import pytest_params + +POLYGON = { + "$geometry": { + "type": "Polygon", + "coordinates": [[[-10, -10], [10, -10], [10, 10], [-10, 10], [-10, -10]]], + } +} + +POLYGON2 = { + "$geometry": { + "type": "Polygon", + "coordinates": [[[20, 20], [30, 20], [30, 30], [20, 30], [20, 20]]], + } +} + + +LOGICAL_OPERATOR_TESTS: list[QueryTestCase] = [ + QueryTestCase( + id="and_geo_with_non_geo", + filter={"$and": [{"loc": {"$geoWithin": POLYGON}}, {"status": "active"}]}, + doc=[ + {"_id": 1, "loc": {"type": "Point", "coordinates": [0, 0]}, "status": "active"}, + {"_id": 2, "loc": {"type": "Point", "coordinates": [5, 5]}, "status": "inactive"}, + {"_id": 3, "loc": {"type": "Point", "coordinates": [50, 50]}, "status": "active"}, + ], + expected=[{"_id": 1, "loc": {"type": "Point", "coordinates": [0, 0]}, "status": "active"}], + msg="$and combining geo and non-geo filter should intersect results", + ), + QueryTestCase( + id="or_two_geo_queries", + filter={"$or": [{"loc": {"$geoWithin": POLYGON}}, {"loc": {"$geoWithin": POLYGON2}}]}, + doc=[ + {"_id": 1, "loc": {"type": "Point", "coordinates": [0, 0]}}, + {"_id": 2, "loc": {"type": "Point", "coordinates": [25, 25]}}, + {"_id": 3, "loc": {"type": "Point", "coordinates": [50, 50]}}, + ], + expected=[ + {"_id": 1, "loc": {"type": "Point", "coordinates": [0, 0]}}, + {"_id": 2, "loc": {"type": "Point", "coordinates": [25, 25]}}, + ], + msg="$or combining two geo queries should union results", + ), + QueryTestCase( + id="or_geo_with_non_geo", + filter={"$or": [{"loc": {"$geoWithin": POLYGON}}, {"status": "active"}]}, + doc=[ + {"_id": 1, "loc": {"type": "Point", "coordinates": [0, 0]}, "status": "inactive"}, + {"_id": 2, "loc": {"type": "Point", "coordinates": [50, 50]}, "status": "active"}, + {"_id": 3, "loc": {"type": "Point", "coordinates": [60, 60]}, "status": "inactive"}, + ], + expected=[ + {"_id": 1, "loc": {"type": "Point", "coordinates": [0, 0]}, "status": "inactive"}, + {"_id": 2, "loc": {"type": "Point", "coordinates": [50, 50]}, "status": "active"}, + ], + msg="$or combining geo and non-geo filter should union results", + ), + QueryTestCase( + id="nor_two_geo_queries", + filter={"$nor": [{"loc": {"$geoWithin": POLYGON}}, {"loc": {"$geoWithin": POLYGON2}}]}, + doc=[ + {"_id": 1, "loc": {"type": "Point", "coordinates": [0, 0]}}, + {"_id": 2, "loc": {"type": "Point", "coordinates": [25, 25]}}, + {"_id": 3, "loc": {"type": "Point", "coordinates": [50, 50]}}, + ], + expected=[{"_id": 3, "loc": {"type": "Point", "coordinates": [50, 50]}}], + msg="$nor should return documents not matching any condition", + ), + QueryTestCase( + id="nor_geo_with_non_geo", + filter={"$nor": [{"loc": {"$geoWithin": POLYGON}}, {"status": "inactive"}]}, + doc=[ + {"_id": 1, "loc": {"type": "Point", "coordinates": [0, 0]}, "status": "active"}, + {"_id": 2, "loc": {"type": "Point", "coordinates": [50, 50]}, "status": "active"}, + {"_id": 3, "loc": {"type": "Point", "coordinates": [60, 60]}, "status": "inactive"}, + ], + expected=[ + {"_id": 2, "loc": {"type": "Point", "coordinates": [50, 50]}, "status": "active"} + ], + msg="$nor combining geo and non-geo filter should exclude both", + ), + QueryTestCase( + id="elemMatch_geo", + filter={"locations": {"$elemMatch": {"$geoWithin": POLYGON}}}, + doc=[ + { + "_id": 1, + "locations": [ + {"type": "Point", "coordinates": [0, 0]}, + {"type": "Point", "coordinates": [50, 50]}, + ], + }, + { + "_id": 2, + "locations": [ + {"type": "Point", "coordinates": [50, 50]}, + {"type": "Point", "coordinates": [60, 60]}, + ], + }, + ], + expected=[ + { + "_id": 1, + "locations": [ + {"type": "Point", "coordinates": [0, 0]}, + {"type": "Point", "coordinates": [50, 50]}, + ], + } + ], + msg="$elemMatch with $geoWithin should match if any array element is within", + ), + QueryTestCase( + id="not_geo", + filter={"loc": {"$not": {"$geoWithin": POLYGON}}}, + doc=[ + {"_id": 1, "loc": {"type": "Point", "coordinates": [0, 0]}}, + {"_id": 2, "loc": {"type": "Point", "coordinates": [50, 50]}}, + {"_id": 3, "loc": {"type": "Point", "coordinates": [60, 60]}}, + ], + expected=[ + {"_id": 2, "loc": {"type": "Point", "coordinates": [50, 50]}}, + {"_id": 3, "loc": {"type": "Point", "coordinates": [60, 60]}}, + ], + msg="$not with $geoWithin should return documents NOT within the polygon", + ), + QueryTestCase( + id="and_negated_geo_with_non_geo", + filter={"$and": [{"loc": {"$not": {"$geoWithin": POLYGON}}}, {"name": "B"}]}, + doc=[ + {"_id": 1, "loc": {"type": "Point", "coordinates": [0, 0]}, "name": "A"}, + {"_id": 2, "loc": {"type": "Point", "coordinates": [50, 50]}, "name": "B"}, + {"_id": 3, "loc": {"type": "Point", "coordinates": [60, 60]}, "name": "B"}, + ], + expected=[ + {"_id": 2, "loc": {"type": "Point", "coordinates": [50, 50]}, "name": "B"}, + {"_id": 3, "loc": {"type": "Point", "coordinates": [60, 60]}, "name": "B"}, + ], + msg="$and combining negated $geoWithin with equality match on non-geo field", + ), +] + + +@pytest.mark.parametrize("test", pytest_params(LOGICAL_OPERATOR_TESTS)) +def test_geoWithin_logical_operators(collection, test): + """Test $geoWithin with logical operators.""" + collection.insert_many(test.doc) + result = execute_command(collection, {"find": collection.name, "filter": test.filter}) + assertSuccess(result, test.expected, ignore_doc_order=True)