Skip to content

Commit b5ed82c

Browse files
ChristoGrabbnchrch
andauthored
chore: sanitize transformation error message to hide record values (#221)
Co-authored-by: Ben Church <ben@airbyte.io>
1 parent c55fbbe commit b5ed82c

File tree

2 files changed

+84
-13
lines changed

2 files changed

+84
-13
lines changed

airbyte_cdk/sources/utils/transform.py

+29-3
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@
99

1010
from jsonschema import Draft7Validator, RefResolver, ValidationError, Validator, validators
1111

12+
MAX_NESTING_DEPTH = 3
1213
json_to_python_simple = {
1314
"string": str,
1415
"number": float,
@@ -225,6 +226,31 @@ def transform(
225226
logger.warning(self.get_error_message(e))
226227

227228
def get_error_message(self, e: ValidationError) -> str:
228-
instance_json_type = python_to_json[type(e.instance)]
229-
key_path = "." + ".".join(map(str, e.path))
230-
return f"Failed to transform value {repr(e.instance)} of type '{instance_json_type}' to '{e.validator_value}', key path: '{key_path}'"
229+
"""
230+
Construct a sanitized error message from a ValidationError instance.
231+
"""
232+
field_path = ".".join(map(str, e.path))
233+
type_structure = self._get_type_structure(e.instance)
234+
235+
return f"Failed to transform value from type '{type_structure}' to type '{e.validator_value}' at path: '{field_path}'"
236+
237+
def _get_type_structure(self, input_data: Any, current_depth: int = 0) -> Any:
238+
"""
239+
Get the structure of a given input data for use in error message construction.
240+
"""
241+
# Handle null values
242+
if input_data is None:
243+
return "null"
244+
245+
# Avoid recursing too deep
246+
if current_depth >= MAX_NESTING_DEPTH:
247+
return "object" if isinstance(input_data, dict) else python_to_json[type(input_data)]
248+
249+
if isinstance(input_data, dict):
250+
return {
251+
key: self._get_type_structure(field_value, current_depth + 1)
252+
for key, field_value in input_data.items()
253+
}
254+
255+
else:
256+
return python_to_json[type(input_data)]

unit_tests/sources/utils/test_transform.py

+55-10
Original file line numberDiff line numberDiff line change
@@ -65,7 +65,6 @@
6565
@pytest.mark.parametrize(
6666
"schema, actual, expected, expected_warns",
6767
[
68-
(SIMPLE_SCHEMA, {"value": 12}, {"value": "12"}, None),
6968
(SIMPLE_SCHEMA, {"value": 12}, {"value": "12"}, None),
7069
(
7170
SIMPLE_SCHEMA,
@@ -104,14 +103,14 @@
104103
COMPLEX_SCHEMA,
105104
{"prop": 12, "number_prop": "aa12", "array": [12]},
106105
{"prop": "12", "number_prop": "aa12", "array": ["12"]},
107-
"Failed to transform value 'aa12' of type 'string' to 'number', key path: '.number_prop'",
106+
"Failed to transform value from type 'string' to type 'number' at path: 'number_prop'",
108107
),
109108
# Field too_many_types have ambigious type, skip formatting
110109
(
111110
COMPLEX_SCHEMA,
112111
{"prop": 12, "too_many_types": 1212, "array": [12]},
113112
{"prop": "12", "too_many_types": 1212, "array": ["12"]},
114-
"Failed to transform value 1212 of type 'integer' to '['boolean', 'null', 'string']', key path: '.too_many_types'",
113+
"Failed to transform value from type 'integer' to type '['boolean', 'null', 'string']' at path: 'too_many_types'",
115114
),
116115
# Test null field
117116
(COMPLEX_SCHEMA, {"prop": None, "array": [12]}, {"prop": "None", "array": ["12"]}, None),
@@ -196,7 +195,7 @@
196195
},
197196
{"value": "string"},
198197
{"value": "string"},
199-
"Failed to transform value 'string' of type 'string' to 'array', key path: '.value'",
198+
"Failed to transform value from type 'string' to type 'array' at path: 'value'",
200199
),
201200
(
202201
{
@@ -205,21 +204,21 @@
205204
},
206205
{"value": {"key": "value"}},
207206
{"value": {"key": "value"}},
208-
"Failed to transform value {'key': 'value'} of type 'object' to 'array', key path: '.value'",
207+
"Failed to transform value from type '{'key': 'string'}' to type 'array' at path: 'value'",
209208
),
210209
(
211210
# Schema root object is not an object, no convertion should happen
212211
{"type": "integer"},
213212
{"value": "12"},
214213
{"value": "12"},
215-
"Failed to transform value {'value': '12'} of type 'object' to 'integer', key path: '.'",
214+
"Failed to transform value from type '{'value': 'string'}' to type 'integer' at path: ''",
216215
),
217216
(
218217
# More than one type except null, no conversion should happen
219218
{"type": "object", "properties": {"value": {"type": ["string", "boolean", "null"]}}},
220219
{"value": 12},
221220
{"value": 12},
222-
"Failed to transform value 12 of type 'integer' to '['string', 'boolean', 'null']', key path: '.value'",
221+
"Failed to transform value from type 'integer' to type '['string', 'boolean', 'null']' at path: 'value'",
223222
),
224223
(
225224
# Oneof not suported, no conversion for one_of_value should happen
@@ -252,7 +251,7 @@
252251
},
253252
{"value": {"key": "value"}},
254253
{"value": {"key": "value"}},
255-
"Failed to transform value {'key': 'value'} of type 'object' to 'array', key path: '.value'",
254+
"Failed to transform value from type '{'key': 'string'}' to type 'array' at path: 'value'",
256255
),
257256
(
258257
{
@@ -263,7 +262,7 @@
263262
},
264263
{"value1": "value2"},
265264
{"value1": "value2"},
266-
"Failed to transform value 'value2' of type 'string' to 'object', key path: '.value1'",
265+
"Failed to transform value from type 'string' to type 'object' at path: 'value1'",
267266
),
268267
(
269268
{
@@ -272,9 +271,55 @@
272271
},
273272
{"value": ["one", "two"]},
274273
{"value": ["one", "two"]},
275-
"Failed to transform value 'one' of type 'string' to 'object', key path: '.value.0'",
274+
"Failed to transform value from type 'string' to type 'object' at path: 'value.0'",
275+
),
276+
(
277+
{"type": "string"},
278+
None,
279+
None,
280+
"Failed to transform value from type 'null' to type 'string' at path: ''",
281+
),
282+
(
283+
{"type": "string"},
284+
{"a": {"b": {"c": {"d": {"e": "deep value"}}}}},
285+
{"a": {"b": {"c": {"d": {"e": "deep value"}}}}},
286+
"Failed to transform value from type '{'a': {'b': {'c': 'object'}}}' to type 'string' at path: ''",
276287
),
277288
],
289+
ids=[
290+
"simple_number_to_string",
291+
"preserve_unexpected_fields",
292+
"array_with_mixed_types",
293+
"nested_list_conversion",
294+
"array_in_nested_object",
295+
"string_to_boolean_nested",
296+
"empty_object",
297+
"string_to_integer",
298+
"skip_invalid_number_format",
299+
"skip_ambiguous_types",
300+
"null_to_string",
301+
"preserve_null_when_allowed",
302+
"very_nested_object_conversion",
303+
"null_in_nested_structure",
304+
"object_without_properties",
305+
"array_without_items",
306+
"non_array_to_array",
307+
"number_to_array",
308+
"null_to_array",
309+
"null_preserved_for_nullable_array",
310+
"number_to_string_array",
311+
"string_fails_object_array",
312+
"object_fails_array_with_string_array_items",
313+
"non_object_root_schema",
314+
"multiple_allowed_types",
315+
"oneof_not_supported",
316+
"facebook_cpc_number_conversion",
317+
"object_fails_array_with_string_item",
318+
"string_fails_object_conversion",
319+
"string_fails_object_in_array",
320+
"null_input_data",
321+
"max_nesting_depth_protection",
322+
],
278323
)
279324
def test_transform(schema, actual, expected, expected_warns, caplog):
280325
t = TypeTransformer(TransformConfig.DefaultSchemaNormalization)

0 commit comments

Comments
 (0)