Skip to content

Commit 19335d6

Browse files
[source-us-census] fix empty fields after sync (#45331)
Co-authored-by: Octavia Squidington III <octavia-squidington-iii@users.noreply.github.com>
1 parent 7056428 commit 19335d6

File tree

7 files changed

+595
-215
lines changed

7 files changed

+595
-215
lines changed

airbyte-integrations/connectors/source-us-census/metadata.yaml

+1-1
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@ data:
22
connectorSubtype: api
33
connectorType: source
44
definitionId: c4cfaeda-c757-489a-8aba-859fb08b6970
5-
dockerImageTag: 0.2.0
5+
dockerImageTag: 0.2.1
66
dockerRepository: airbyte/source-us-census
77
githubIssueLabel: source-us-census
88
icon: uscensus.svg

airbyte-integrations/connectors/source-us-census/poetry.lock

+453-196
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

airbyte-integrations/connectors/source-us-census/pyproject.toml

+1-1
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@ requires = [ "poetry-core>=1.0.0",]
33
build-backend = "poetry.core.masonry.api"
44

55
[tool.poetry]
6-
version = "0.2.0"
6+
version = "0.2.1"
77
name = "source-us-census"
88
description = "Source implementation for Us Census."
99
authors = [ "Airbyte <contact@airbyte.io>",]

airbyte-integrations/connectors/source-us-census/source_us_census/components.py

+48-3
Original file line numberDiff line numberDiff line change
@@ -3,20 +3,23 @@
33
#
44

55

6-
from typing import List, Optional, Union
6+
from dataclasses import dataclass
7+
from typing import Any, List, Mapping, Optional, Union
78

89
import requests
910
from airbyte_cdk.models import FailureType
1011
from airbyte_cdk.sources.declarative.extractors.record_extractor import RecordExtractor
1112
from airbyte_cdk.sources.declarative.requesters.error_handlers import DefaultErrorHandler
1213
from airbyte_cdk.sources.declarative.requesters.error_handlers.default_http_response_filter import DefaultHttpResponseFilter
14+
from airbyte_cdk.sources.declarative.schema.schema_loader import SchemaLoader
1315
from airbyte_cdk.sources.declarative.types import Record
1416
from airbyte_cdk.sources.streams.http.error_handlers.response_models import (
15-
DEFAULT_ERROR_RESOLUTION,
1617
SUCCESS_RESOLUTION,
1718
ErrorResolution,
1819
ResponseAction,
20+
create_fallback_error_resolution,
1921
)
22+
from airbyte_cdk.sources.types import Config
2023

2124

2225
class USCensusRecordExtractor(RecordExtractor):
@@ -132,4 +135,46 @@ def interpret_response(self, response_or_exception: Optional[Union[requests.Resp
132135
default_reponse_filter = DefaultHttpResponseFilter(parameters={}, config=self.config)
133136
default_response_filter_resolution = default_reponse_filter.matches(response_or_exception)
134137

135-
return default_response_filter_resolution if default_response_filter_resolution else DEFAULT_ERROR_RESOLUTION
138+
return (
139+
default_response_filter_resolution
140+
if default_response_filter_resolution
141+
else create_fallback_error_resolution(response_or_exception)
142+
)
143+
144+
145+
@dataclass
146+
class USCensusSchema(SchemaLoader):
147+
"""
148+
The US Census website hosts many APIs: https://www.census.gov/data/developers/data-sets.html
149+
150+
These APIs return data in a non standard format.
151+
We create the JSON schemas dynamically by reading the first "row" of data we get.
152+
153+
In this implementation all records are of type "string", but this function could
154+
be changed to try and infer the data type based on the values it finds.
155+
"""
156+
157+
config: Config
158+
159+
def get_json_schema(self) -> Mapping[str, Any]:
160+
query_params = self.config.get("query_params")
161+
if query_params:
162+
parts = query_params.split("&")
163+
parameters = []
164+
for part in parts:
165+
key, value = part.split("=", 1)
166+
if key == "get":
167+
parameters += value.split(",")
168+
elif key == "for":
169+
parameters.append(value.split(":")[0])
170+
else:
171+
parameters.append(key)
172+
json_schema = {k: {"type": "string"} for k in parameters}
173+
else:
174+
json_schema = {"{ @context: https://project-open-data.cio.gov/v1.1/schema/catalog.jsonld": {"type": "string"}}
175+
return {
176+
"$schema": "http://json-schema.org/draft-07/schema#",
177+
"additionalProperties": True,
178+
"type": "object",
179+
"properties": json_schema,
180+
}

airbyte-integrations/connectors/source-us-census/source_us_census/manifest.yaml

+2-14
Original file line numberDiff line numberDiff line change
@@ -24,9 +24,8 @@ definitions:
2424
type: CustomRecordExtractor
2525
class_name: source_us_census.components.USCensusRecordExtractor
2626
schema_loader:
27-
type: InlineSchemaLoader
28-
schema:
29-
$ref: "#/schemas/us_census_stream"
27+
type: CustomSchemaLoader
28+
class_name: "source_us_census.components.USCensusSchema"
3029
base_requester:
3130
type: HttpRequester
3231
url_base: https://api.census.gov/
@@ -84,14 +83,3 @@ spec:
8483
order: 2
8584
airbyte_secret: true
8685
additionalProperties: true
87-
88-
metadata:
89-
autoImportSchema:
90-
us_census_stream: true
91-
92-
schemas:
93-
us_census_stream:
94-
type: object
95-
$schema: http://json-schema.org/draft-07/schema#
96-
additionalProperties: true
97-
properties: {}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,89 @@
1+
# Copyright (c) 2024 Airbyte, Inc., all rights reserved.
2+
3+
from dataclasses import dataclass
4+
from typing import Any, Mapping
5+
from unittest.mock import Mock
6+
7+
import pytest
8+
from source_us_census.components import USCensusSchema
9+
10+
11+
@dataclass
12+
class MockConfig:
13+
query_params: str = None
14+
15+
def get(self, key):
16+
if key == "query_params":
17+
return self.query_params
18+
19+
20+
@pytest.fixture
21+
def census_schema():
22+
def _create_schema(query_params=None):
23+
config = MockConfig(query_params=query_params)
24+
return USCensusSchema(config=config)
25+
return _create_schema
26+
27+
28+
def test_get_json_schema_basic_case(census_schema):
29+
schema_instance = census_schema(query_params="get=NAME,POP&for=state:*")
30+
schema = schema_instance.get_json_schema()
31+
32+
expected_properties = {
33+
"NAME": {"type": "string"},
34+
"POP": {"type": "string"},
35+
"state": {"type": "string"}
36+
}
37+
38+
assert schema["properties"] == expected_properties
39+
assert schema["$schema"] == "http://json-schema.org/draft-07/schema#"
40+
assert schema["type"] == "object"
41+
assert schema["additionalProperties"] is True
42+
43+
44+
def test_get_json_schema_with_get_param(census_schema):
45+
schema_instance = census_schema(query_params="get=NAME,AGE,EMPLOYMENT")
46+
schema = schema_instance.get_json_schema()
47+
48+
expected_properties = {
49+
"NAME": {"type": "string"},
50+
"AGE": {"type": "string"},
51+
"EMPLOYMENT": {"type": "string"}
52+
}
53+
54+
assert schema["properties"] == expected_properties
55+
56+
57+
def test_get_json_schema_with_for_param(census_schema):
58+
schema_instance = census_schema(query_params="for=county:1234")
59+
schema = schema_instance.get_json_schema()
60+
61+
expected_properties = {
62+
"county": {"type": "string"}
63+
}
64+
65+
assert schema["properties"] == expected_properties
66+
67+
68+
def test_get_json_schema_with_additional_params(census_schema):
69+
schema_instance = census_schema(query_params="get=NAME&year=2020&for=us:*")
70+
schema = schema_instance.get_json_schema()
71+
72+
expected_properties = {
73+
"NAME": {"type": "string"},
74+
"year": {"type": "string"},
75+
"us": {"type": "string"}
76+
}
77+
78+
assert schema["properties"] == expected_properties
79+
80+
81+
def test_get_json_schema_no_query_params(census_schema):
82+
schema_instance = census_schema(query_params=None)
83+
schema = schema_instance.get_json_schema()
84+
85+
expected_properties = {
86+
"{ @context: https://project-open-data.cio.gov/v1.1/schema/catalog.jsonld": {"type": "string"}
87+
}
88+
89+
assert schema["properties"] == expected_properties

docs/integrations/sources/us-census.md

+1
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,7 @@ In addition, to understand how to configure the dataset path and query parameter
4545

4646
| Version | Date | Pull Request | Subject |
4747
| :------ | :--------- | :------------------------------------------------------- | :------------------------------------------------ |
48+
| 0.2.1 | 2024-09-07 | [45331](https://github.com/airbytehq/airbyte/pull/45331) | Fix schema |
4849
| 0.2.0 | 2024-08-10 | [43521](https://github.com/airbytehq/airbyte/pull/43521) | Migrate to Low Code |
4950
| 0.1.16 | 2024-08-10 | [43566](https://github.com/airbytehq/airbyte/pull/43566) | Update dependencies |
5051
| 0.1.15 | 2024-08-03 | [43214](https://github.com/airbytehq/airbyte/pull/43214) | Update dependencies |

0 commit comments

Comments
 (0)