Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Upgrade jsonschema past bc changes in 4.18 #1

Draft
wants to merge 1 commit into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
24 changes: 13 additions & 11 deletions airbyte_cdk/sources/utils/schema_helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,9 +10,10 @@
from typing import Any, ClassVar, Dict, List, Mapping, MutableMapping, Optional, Tuple

import jsonref
from jsonschema import RefResolver, validate
from jsonschema import validate
from jsonschema.exceptions import ValidationError
from pydantic.v1 import BaseModel, Field
from referencing import Registry, Resource

from airbyte_cdk.models import ConnectorSpecification, FailureType
from airbyte_cdk.utils.traced_exception import AirbyteTracedException
Expand Down Expand Up @@ -63,28 +64,29 @@ def resolve_ref_links(obj: Any) -> Any:
return obj


def _expand_refs(schema: Any, ref_resolver: Optional[RefResolver] = None) -> None:
def _expand_refs(schema: Any, registry: Optional[Registry] = None) -> None:
"""Internal function to iterate over schema and replace all occurrences of $ref with their definitions. Recursive.

:param schema: schema that will be patched
:param ref_resolver: resolver to get definition from $ref, if None pass it will be instantiated
:param registry: registry to resolve references, if None one will be created from schema
"""
ref_resolver = ref_resolver or RefResolver.from_schema(schema)
if registry is None:
resource = Resource.from_contents(schema)
registry = resource @ Registry() # Add the resource to a new registry

if isinstance(schema, MutableMapping):
if "$ref" in schema:
ref_url = schema.pop("$ref")
_, definition = ref_resolver.resolve(ref_url)
_expand_refs(
definition, ref_resolver=ref_resolver
) # expand refs in definitions as well
resolver = registry.resolver()
definition = resolver.lookup(ref_url).contents
_expand_refs(definition, registry=registry) # expand refs in definitions as well
schema.update(definition)
else:
for key, value in schema.items():
_expand_refs(value, ref_resolver=ref_resolver)
for value in schema.values():
_expand_refs(value, registry=registry)
elif isinstance(schema, List):
for value in schema:
_expand_refs(value, ref_resolver=ref_resolver)
_expand_refs(value, registry=registry)


def expand_refs(schema: Any) -> None:
Expand Down
20 changes: 14 additions & 6 deletions airbyte_cdk/sources/utils/transform.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,13 @@
from enum import Flag, auto
from typing import Any, Callable, Dict, Generator, Mapping, Optional, cast

from jsonschema import Draft7Validator, RefResolver, ValidationError, Validator, validators
from jsonschema import (
Draft7Validator,
ValidationError,
Validator,
validators,
)
from referencing import Registry, Resource

MAX_NESTING_DEPTH = 3
json_to_python_simple = {
Expand Down Expand Up @@ -194,11 +200,13 @@ def normalizator(

def resolve(subschema: dict[str, Any]) -> dict[str, Any]:
if "$ref" in subschema:
_, resolved = cast(
RefResolver,
validator_instance.resolver,
).resolve(subschema["$ref"])
return cast(dict[str, Any], resolved)
# Create a registry from the schema if not already present
if not hasattr(validator_instance, "_registry"):
resource = Resource.from_contents(schema)
validator_instance._registry = resource @ Registry()

resolver = validator_instance._registry.resolver()
return resolver.lookup(subschema["$ref"]).contents
return subschema

# Transform object and array values before running json schema type checking for each element.
Expand Down
14 changes: 8 additions & 6 deletions airbyte_cdk/utils/spec_schema_transformations.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,21 +6,23 @@
import re
from typing import Any

from jsonschema import RefResolver
from referencing import Registry, Resource


def resolve_refs(schema: dict[str, Any]) -> dict[str, Any]:
"""
For spec schemas generated using Pydantic models, the resulting JSON schema can contain refs between object
relationships.
"""
json_schema_ref_resolver = RefResolver.from_schema(schema)
resource = Resource.from_contents(schema)
registry = resource @ Registry()
resolver = registry.resolver()

str_schema = json.dumps(schema)
for ref_block in re.findall(r'{"\$ref": "#\/definitions\/.+?(?="})"}', str_schema):
ref = json.loads(ref_block)["$ref"]
str_schema = str_schema.replace(
ref_block, json.dumps(json_schema_ref_resolver.resolve(ref)[1])
)
str_schema = str_schema.replace(ref_block, json.dumps(resolver.lookup(ref).contents))
pyschema: dict[str, Any] = json.loads(str_schema)
del pyschema["definitions"]
if "definitions" in pyschema:
del pyschema["definitions"]
return pyschema
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@ genson = "1.3.0"
isodate = "~0.6.1"
Jinja2 = "~3.1.2"
jsonref = "~0.2"
jsonschema = "~4.17.3" # 4.18 has some significant breaking changes: https://github.com/python-jsonschema/jsonschema/releases/tag/v4.18.0
jsonschema = "4.18.0"
pandas = "2.2.2"
psutil = "6.1.0"
pydantic = "^2.7"
Expand Down