Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

migrate JsonSchemas to use basic path instead of JSONPath #13917

Merged
merged 3 commits into from
Jun 22, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
import com.jayway.jsonpath.spi.json.JsonProvider;
import com.jayway.jsonpath.spi.mapper.JacksonMappingProvider;
import com.jayway.jsonpath.spi.mapper.MappingProvider;
import io.airbyte.commons.json.JsonSchemas.FieldNameOrList;
import io.airbyte.commons.util.MoreIterators;
import java.util.Collections;
import java.util.EnumSet;
Expand Down Expand Up @@ -94,6 +95,20 @@ public static String appendAppendListSplat(final String jsonPath) {
return jsonPath + JSON_PATH_LIST_SPLAT;
}

/**
* Map path produced by {@link JsonSchemas} to the JSONPath format.
*
* @param jsonSchemaPath - path as described in {@link JsonSchemas}
* @return path as JSONPath
*/
public static String mapJsonSchemaPathToJsonPath(final List<FieldNameOrList> jsonSchemaPath) {
String jsonPath = empty();
for (final FieldNameOrList fieldNameOrList : jsonSchemaPath) {
jsonPath = fieldNameOrList.isList() ? appendAppendListSplat(jsonPath) : appendField(jsonPath, fieldNameOrList.getFieldName());
}
return jsonPath;
}

/*
* This version of the JsonPath Configuration object allows queries to return to the path of values
* instead of the values that were found.
Expand Down
166 changes: 136 additions & 30 deletions airbyte-commons/src/main/java/io/airbyte/commons/json/JsonSchemas.java
Original file line number Diff line number Diff line change
Expand Up @@ -6,19 +6,20 @@

import com.fasterxml.jackson.databind.JsonNode;
import com.fasterxml.jackson.databind.node.ObjectNode;
import com.google.common.base.Preconditions;
import io.airbyte.commons.io.IOs;
import io.airbyte.commons.resources.MoreResources;
import io.airbyte.commons.util.MoreIterators;
import io.airbyte.commons.util.MoreLists;
import java.io.IOException;
import java.nio.file.Files;
import java.nio.file.Path;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Collections;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.Map.Entry;
import java.util.Objects;
import java.util.Optional;
import java.util.Set;
import java.util.function.BiConsumer;
Expand Down Expand Up @@ -95,8 +96,33 @@ public static <T> Path prepareSchemas(final String resourceDir, final Class<T> k
}
}

public static void traverseJsonSchema(final JsonNode jsonSchemaNode, final BiConsumer<JsonNode, String> consumer) {
traverseJsonSchemaInternal(jsonSchemaNode, JsonPaths.empty(), consumer);
/**
* Traverse a JsonSchema object. The provided consumer will be called at each node with the node and
* the path to the node.
*
* @param jsonSchema - JsonSchema object to traverse
* @param consumer - accepts the current node and the path to that node.
*/
public static void traverseJsonSchema(final JsonNode jsonSchema, final BiConsumer<JsonNode, List<FieldNameOrList>> consumer) {
traverseJsonSchemaInternal(jsonSchema, new ArrayList<>(), consumer);
}

/**
* Traverse a JsonSchema object. At each node, map a value.
*
* @param jsonSchema - JsonSchema object to traverse
* @param mapper - accepts the current node and the path to that node. whatever is returned will be
* collected and returned by the final collection.
* @param <T> - type of objects being collected
* @return - collection of all items that were collected during the traversal. Returns a { @link
* Collection } because there is no order or uniqueness guarantee so neither List nor Set
* make sense.
*/
public static <T> List<T> traverseJsonSchemaWithCollector(final JsonNode jsonSchema,
final BiFunction<JsonNode, List<FieldNameOrList>, T> mapper) {
// for the sake of code reuse, use the filtered collector method but makes sure the filter always
// returns true.
return traverseJsonSchemaWithFilteredCollector(jsonSchema, (node, path) -> Optional.ofNullable(mapper.apply(node, path)));
}

/**
Expand All @@ -111,44 +137,45 @@ public static void traverseJsonSchema(final JsonNode jsonSchemaNode, final BiCon
* Collection } because there is no order or uniqueness guarantee so neither List nor Set
* make sense.
*/
public static <T> Collection<T> traverseJsonSchemaWithCollector(final JsonNode jsonSchema, final BiFunction<JsonNode, String, Optional<T>> mapper) {
final List<T> collectors = new ArrayList<>();
traverseJsonSchema(jsonSchema, (node, path) -> mapper.apply(node, path).ifPresent(collectors::add));
return collectors;
public static <T> List<T> traverseJsonSchemaWithFilteredCollector(final JsonNode jsonSchema,
final BiFunction<JsonNode, List<FieldNameOrList>, Optional<T>> mapper) {
final List<T> collector = new ArrayList<>();
traverseJsonSchema(jsonSchema, (node, path) -> mapper.apply(node, path).ifPresent(collector::add));
return collector.stream().toList(); // make list unmodifiable
}

/**
* Traverses a JsonSchema object. It returns the path to each node that meet the provided condition.
* The paths are return in JsonPath format
* The paths are return in JsonPath format. The traversal is depth-first search preoorder and values
* are returned in that order.
*
* @param obj - JsonSchema object to traverse
* @param predicate - predicate to determine if the path for a node should be collected.
* @return - collection of all paths that were collected during the traversal.
*/
public static Set<String> collectJsonPathsThatMeetCondition(final JsonNode obj, final Predicate<JsonNode> predicate) {
return new HashSet<>(traverseJsonSchemaWithCollector(obj, (node, path) -> {
public static List<List<FieldNameOrList>> collectPathsThatMeetCondition(final JsonNode obj, final Predicate<JsonNode> predicate) {
return traverseJsonSchemaWithFilteredCollector(obj, (node, path) -> {
if (predicate.test(node)) {
return Optional.of(path);
} else {
return Optional.empty();
}
}));
});
}

/**
* Recursive, depth-first implementation of { @link JsonSchemas#traverseJsonSchema(final JsonNode
* jsonNode, final BiConsumer<JsonNode, List<String>> consumer) }. Takes path as argument so that
* the path can be passsed to the consumer.
* the path can be passed to the consumer.
*
* @param jsonSchemaNode - jsonschema object to traverse.
* @param path - path from the first call of traverseJsonSchema to the current node.
* @param consumer - consumer to be called at each node. it accepts the current node and the path to
* the node from the root of the object passed at the root level invocation
*
*/
// todo (cgardens) - replace with easier to understand traversal logic from SecretsHelper.
private static void traverseJsonSchemaInternal(final JsonNode jsonSchemaNode,
final String path,
final BiConsumer<JsonNode, String> consumer) {
final List<FieldNameOrList> path,
final BiConsumer<JsonNode, List<FieldNameOrList>> consumer) {
if (!jsonSchemaNode.isObject()) {
throw new IllegalArgumentException(String.format("json schema nodes should always be object nodes. path: %s actual: %s", path, jsonSchemaNode));
}
Expand All @@ -162,23 +189,20 @@ private static void traverseJsonSchemaInternal(final JsonNode jsonSchemaNode,
switch (nodeType) {
// case BOOLEAN_TYPE, NUMBER_TYPE, STRING_TYPE, NULL_TYPE -> do nothing after consumer.accept above.
case ARRAY_TYPE -> {
final String newPath = JsonPaths.appendAppendListSplat(path);
final List<FieldNameOrList> newPath = MoreLists.add(path, FieldNameOrList.list());
// hit every node.
// log.error("array: " + jsonSchemaNode);
traverseJsonSchemaInternal(jsonSchemaNode.get(JSON_SCHEMA_ITEMS_KEY), newPath, consumer);
}
case OBJECT_TYPE -> {
final Optional<String> comboKeyWordOptional = getKeywordIfComposite(jsonSchemaNode);
if (jsonSchemaNode.has(JSON_SCHEMA_PROPERTIES_KEY)) {
for (final Iterator<Entry<String, JsonNode>> it = jsonSchemaNode.get(JSON_SCHEMA_PROPERTIES_KEY).fields(); it.hasNext();) {
final Entry<String, JsonNode> child = it.next();
final String newPath = JsonPaths.appendField(path, child.getKey());
// log.error("obj1: " + jsonSchemaNode);
final List<FieldNameOrList> newPath = MoreLists.add(path, FieldNameOrList.fieldName(child.getKey()));
traverseJsonSchemaInternal(child.getValue(), newPath, consumer);
}
} else if (comboKeyWordOptional.isPresent()) {
for (final JsonNode arrayItem : jsonSchemaNode.get(comboKeyWordOptional.get())) {
// log.error("obj2: " + jsonSchemaNode);
traverseJsonSchemaInternal(arrayItem, path, consumer);
}
} else {
Expand Down Expand Up @@ -206,30 +230,112 @@ private static Optional<String> getKeywordIfComposite(final JsonNode node) {
return Optional.empty();
}

public static List<String> getTypeOrObject(final JsonNode jsonNode) {
final List<String> types = getType(jsonNode);
/**
* Same logic as {@link #getType(JsonNode)} except when no type is found, it defaults to type:
* Object.
*
* @param jsonSchema - JSONSchema object
* @return type of the node.
*/
public static List<String> getTypeOrObject(final JsonNode jsonSchema) {
final List<String> types = getType(jsonSchema);
if (types.isEmpty()) {
return List.of(OBJECT_TYPE);
} else {
return types;
}
}

public static List<String> getType(final JsonNode jsonNode) {
if (jsonNode.has(JSON_SCHEMA_TYPE_KEY)) {
if (jsonNode.get(JSON_SCHEMA_TYPE_KEY).isArray()) {
return MoreIterators.toList(jsonNode.get(JSON_SCHEMA_TYPE_KEY).iterator())
/**
* Get the type of JSONSchema node. Uses JSONSchema types. Only returns the type of the "top-level"
* node. e.g. if more nodes are nested underneath because it is an object or an array, only the top
* level type is returned.
*
* @param jsonSchema - JSONSchema object
* @return type of the node.
*/
public static List<String> getType(final JsonNode jsonSchema) {
if (jsonSchema.has(JSON_SCHEMA_TYPE_KEY)) {
if (jsonSchema.get(JSON_SCHEMA_TYPE_KEY).isArray()) {
return MoreIterators.toList(jsonSchema.get(JSON_SCHEMA_TYPE_KEY).iterator())
.stream()
.map(JsonNode::asText)
.collect(Collectors.toList());
} else {
return List.of(jsonNode.get(JSON_SCHEMA_TYPE_KEY).asText());
return List.of(jsonSchema.get(JSON_SCHEMA_TYPE_KEY).asText());
}
}
if (jsonNode.has(JSON_SCHEMA_ENUM_KEY)) {
if (jsonSchema.has(JSON_SCHEMA_ENUM_KEY)) {
return List.of(STRING_TYPE);
}
return Collections.emptyList();
}

/**
* Provides a basic scheme for describing the path into a JSON object. Each element in the path is
* either a field name or a list.
*
* This class is helpful in the case where fields can be any UTF-8 string, so the only simple way to
* keep track of the different parts of a path without going crazy with escape characters is to keep
* it in a list with list set aside as a special case.
*
* We prefer using this scheme instead of JSONPath in the tree traversal because, it is easier to
* decompose a path in this scheme than it is in JSONPath. Some callers of the traversal logic want
* to isolate parts of the path easily without the need for complex regex (that would be required if
* we used JSONPath).
*/
public static class FieldNameOrList {

private final String fieldName;
private final boolean isList;

public static FieldNameOrList fieldName(final String fieldName) {
return new FieldNameOrList(fieldName);
}

public static FieldNameOrList list() {
return new FieldNameOrList(null);
}

private FieldNameOrList(final String fieldName) {
isList = fieldName == null;
this.fieldName = fieldName;
}

public String getFieldName() {
Preconditions.checkState(!isList, "cannot return field name, is list node");
return fieldName;
}

public boolean isList() {
return isList;
}

@Override
public boolean equals(final Object o) {
if (this == o) {
return true;
}
if (!(o instanceof FieldNameOrList)) {
return false;
}
final FieldNameOrList that = (FieldNameOrList) o;
return isList == that.isList && Objects.equals(fieldName, that.fieldName);
}

@Override
public int hashCode() {
return Objects.hash(fieldName, isList);
}

@Override
public String toString() {
return "FieldNameOrList{" +
"fieldName='" + fieldName + '\'' +
", isList=" + isList +
'}';
}

}

}
Original file line number Diff line number Diff line change
Expand Up @@ -48,4 +48,18 @@ public static <T> List<T> concat(final List<T>... lists) {
return Stream.of(lists).flatMap(List::stream).toList();
}

/**
* Copies provided list and adds the new item to the copy.
*
* @param list list to copy and add to
* @param toAdd item to add
* @param <T> type of list
* @return new list with contents of provided list and the added item
*/
public static <T> List<T> add(final List<T> list, final T toAdd) {
final ArrayList<T> newList = new ArrayList<>(list);
newList.add(toAdd);
return newList;
}

}
Loading