Skip to content

Commit f512208

Browse files
authored
Introduce json path commons lib (#11680)
1 parent 582baf3 commit f512208

File tree

3 files changed

+515
-0
lines changed

3 files changed

+515
-0
lines changed

airbyte-commons/build.gradle

+3
Original file line numberDiff line numberDiff line change
@@ -4,4 +4,7 @@ plugins {
44

55
dependencies {
66
// Dependencies for this module should be specified in the top-level build.gradle. See readme for more explanation.
7+
8+
// this dependency is an exception to the above rule because it is only used INTERNALLY to the commons library.
9+
implementation 'com.jayway.jsonpath:json-path:2.7.0'
710
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,284 @@
1+
/*
2+
* Copyright (c) 2021 Airbyte, Inc., all rights reserved.
3+
*/
4+
5+
package io.airbyte.commons.json;
6+
7+
import com.fasterxml.jackson.databind.JsonNode;
8+
import com.fasterxml.jackson.databind.node.ArrayNode;
9+
import com.google.api.client.util.Preconditions;
10+
import com.jayway.jsonpath.Configuration;
11+
import com.jayway.jsonpath.JsonPath;
12+
import com.jayway.jsonpath.Option;
13+
import com.jayway.jsonpath.PathNotFoundException;
14+
import com.jayway.jsonpath.spi.json.JacksonJsonNodeJsonProvider;
15+
import com.jayway.jsonpath.spi.json.JsonProvider;
16+
import com.jayway.jsonpath.spi.mapper.JacksonMappingProvider;
17+
import com.jayway.jsonpath.spi.mapper.MappingProvider;
18+
import io.airbyte.commons.util.MoreIterators;
19+
import java.util.Collections;
20+
import java.util.EnumSet;
21+
import java.util.List;
22+
import java.util.Optional;
23+
import java.util.Set;
24+
import java.util.function.BiFunction;
25+
import java.util.stream.Collectors;
26+
import org.slf4j.Logger;
27+
import org.slf4j.LoggerFactory;
28+
29+
/**
30+
* JSONPath is specification for querying JSON objects. More information about the specification can
31+
* be found here: https://goessner.net/articles/JsonPath/. For those familiar with jq, JSONPath will
32+
* be most recognizable as "that DSL that jq uses".
33+
*
34+
* We use a java implementation of this specification (repo: https://github.com/json-path/JsonPath).
35+
* This class wraps that implementation to make it easier to leverage this tool internally.
36+
*
37+
* GOTCHA: Keep in mind with JSONPath, depending on the query, 0, 1, or N values may be returned.
38+
* The pattern for handling return values is very much like writing SQL queries. When using it, you
39+
* must consider what the number of return values for your query might be. e.g. for this object: {
40+
* "alpha": [1, 2, 3] }, this JSONPath "$.alpha[*]", would return: [1, 2, 3], but this one
41+
* "$.alpha[0]" would return: [1]. The Java interface we place over this query system defaults to
42+
* returning a list for query results. In addition, we provide helper functions that will just
43+
* return a single value (see: {@link JsonPaths#getSingleValue(JsonNode, String)}). These should
44+
* only be used if it is not possible for a query to return more than one value.
45+
*
46+
* Note: Package private as most uses of JsonPaths seems like they can be hidden inside other
47+
* commons libraries (i.e. Jsons and JsonsSchemas). If this assumption proves incorrect, we can open
48+
* it up.
49+
*/
50+
class JsonPaths {
51+
52+
private static final Logger LOGGER = LoggerFactory.getLogger(JsonPaths.class);
53+
54+
// set default configurations at start up to match our JSON setup.
55+
static {
56+
Configuration.setDefaults(new Configuration.Defaults() {
57+
58+
// allows us to pass in Jackson JsonNode
59+
private static final JsonProvider jsonProvider = new JacksonJsonNodeJsonProvider();
60+
private static final MappingProvider mappingProvider = new JacksonMappingProvider();
61+
62+
@Override
63+
public JsonProvider jsonProvider() {
64+
return jsonProvider;
65+
}
66+
67+
@Override
68+
public MappingProvider mappingProvider() {
69+
return mappingProvider;
70+
}
71+
72+
@Override
73+
public Set<Option> options() {
74+
/*
75+
* All JsonPath queries will return a list of values. This makes parsing the outputs much easier. In
76+
* cases where it is not a list, helpers in this class can assert that. See
77+
* https://github.com/json-path/JsonPath in the JsonPath documentation.
78+
*/
79+
return EnumSet.of(Option.ALWAYS_RETURN_LIST);
80+
}
81+
82+
});
83+
}
84+
85+
/*
86+
* This version of the JsonPath Configuration object allows queries to return to the path of values
87+
* instead of the values that were found.
88+
*/
89+
private static final Configuration GET_PATHS_CONFIGURATION = Configuration.defaultConfiguration().addOptions(Option.AS_PATH_LIST);
90+
91+
/**
92+
* Attempt to validate if a string is a valid JSONPath string. This assertion does NOT handle all
93+
* cases, but at least a common on. We can add to it as we detect others.
94+
*
95+
* @param jsonPath - path to validate
96+
*/
97+
public static void assertIsJsonPath(final String jsonPath) {
98+
Preconditions.checkArgument(jsonPath.startsWith("$"));
99+
}
100+
101+
/**
102+
* Attempt to detect if a JSONPath query could return more than 1 value. This assertion does NOT
103+
* handle all cases, but at least a common on. We can add to it as we detect others.
104+
*
105+
* @param jsonPath - path to validate
106+
*/
107+
public static void assertIsSingleReturnQuery(final String jsonPath) {
108+
Preconditions.checkArgument(!jsonPath.contains("*"), "Cannot accept paths with wildcards because they may return more than one item.");
109+
}
110+
111+
/**
112+
* Given a JSONPath, returns all the values that match that path.
113+
*
114+
* e.g. for this object: { "alpha": [1, 2, 3] }, if the input JSONPath were "$.alpha[*]", this
115+
* function would return: [1, 2, 3].
116+
*
117+
* @param json - json object
118+
* @param jsonPath - path into the json object. must be in the format of JSONPath.
119+
* @return all values that match the input query
120+
*/
121+
public static List<JsonNode> getValues(final JsonNode json, final String jsonPath) {
122+
return getInternal(Configuration.defaultConfiguration(), json, jsonPath);
123+
}
124+
125+
/**
126+
* Given a JSONPath, returns all the path of all values that match that path.
127+
*
128+
* e.g. for this object: { "alpha": [1, 2, 3] }, if the input JSONPath were "$.alpha[*]", this
129+
* function would return: ["$.alpha[0]", "$.alpha[1]", "$.alpha[2]"].
130+
*
131+
* @param json - json object
132+
* @param jsonPath - path into the json object. must be in the format of JSONPath.
133+
* @return all paths that are present that match the input query. returns a list (instead of a set),
134+
* because having a deterministic ordering is valuable for all downstream consumers (i.e. in
135+
* most cases if we returned a set, the downstream would then put it in a set and sort it so
136+
* that if they are doing replacements using the paths, the behavior is predictable e.g. if
137+
* you do replace $.alpha and $.alpha[*], the order you do those replacements in matters).
138+
* specifically that said, we do expect that there will be no duplicates in the returned
139+
* list.
140+
*/
141+
public static List<String> getPaths(final JsonNode json, final String jsonPath) {
142+
return getInternal(GET_PATHS_CONFIGURATION, json, jsonPath)
143+
.stream()
144+
.map(JsonNode::asText)
145+
.collect(Collectors.toList());
146+
}
147+
148+
/**
149+
* Given a JSONPath, returns 1 or 0 values that match the path. Throws if more than 1 value is
150+
* found.
151+
*
152+
* THIS SHOULD ONLY BE USED IF THE JSONPATH CAN ONLY EVER RETURN 0 OR 1 VALUES. e.g. don't do
153+
* "$.alpha[*]"
154+
*
155+
* @param json - json object
156+
* @param jsonPath - path into the json object. must be in the format of JSONPath.
157+
* @return value if present, otherwise empty.
158+
*/
159+
public static Optional<JsonNode> getSingleValue(final JsonNode json, final String jsonPath) {
160+
assertIsSingleReturnQuery(jsonPath);
161+
162+
final List<JsonNode> jsonNodes = getValues(json, jsonPath);
163+
164+
Preconditions.checkState(jsonNodes.size() <= 1, String.format("Path returned more than one item. path: %s items: %s", jsonPath, jsonNodes));
165+
return jsonNodes.isEmpty() ? Optional.empty() : Optional.of(jsonNodes.get(0));
166+
}
167+
168+
/**
169+
* Given a JSONPath, true if path is present in the object, otherwise false. Throws is more than 1
170+
* path is found.
171+
*
172+
* THIS SHOULD ONLY BE USED IF THE JSONPATH CAN ONLY EVER RETURN 0 OR 1 VALUES. e.g. don't do
173+
* "$.alpha[*]"
174+
*
175+
* @param json - json object
176+
* @param jsonPath - path into the json object. must be in the format of JSONPath.
177+
* @return true if path is present in the object, otherwise false.
178+
*/
179+
public static boolean isPathPresent(final JsonNode json, final String jsonPath) {
180+
assertIsSingleReturnQuery(jsonPath);
181+
182+
final List<String> foundPaths = getPaths(json, jsonPath);
183+
184+
Preconditions.checkState(foundPaths.size() <= 1, String.format("Path returned more than one item. path: %s items: %s", jsonPath, foundPaths));
185+
return !foundPaths.isEmpty();
186+
}
187+
188+
/**
189+
* Traverses into a json object and replaces all values that match the input path with the provided
190+
* string. Throws if no existing fields match the path.
191+
*
192+
* @param json - json object
193+
* @param jsonPath - path into the json object. must be in the format of JSONPath.
194+
* @param replacement - a string value to replace the current value at the jsonPath
195+
* @throws PathNotFoundException throws if the path is not present in the object
196+
*/
197+
public static JsonNode replaceAtStringLoud(final JsonNode json, final String jsonPath, final String replacement) {
198+
return replaceAtJsonNodeLoud(json, jsonPath, Jsons.jsonNode(replacement));
199+
}
200+
201+
/**
202+
* Traverses into a json object and replaces all values that match the input path with the provided
203+
* string . Does nothing if no existing fields match the path.
204+
*
205+
* @param json - json object
206+
* @param jsonPath - path into the json object. must be in the format of JSONPath.
207+
* @param replacement - a string value to replace the current value at the jsonPath
208+
*/
209+
public static JsonNode replaceAtString(final JsonNode json, final String jsonPath, final String replacement) {
210+
return replaceAtJsonNode(json, jsonPath, Jsons.jsonNode(replacement));
211+
}
212+
213+
/**
214+
* Traverses into a json object and replaces all values that match the input path with the provided
215+
* json object. Does nothing if no existing fields match the path.
216+
*
217+
* @param json - json object
218+
* @param jsonPath - path into the json object. must be in the format of JSONPath.
219+
* @param replacement - a json node to replace the current value at the jsonPath
220+
*/
221+
public static JsonNode replaceAtJsonNodeLoud(final JsonNode json, final String jsonPath, final JsonNode replacement) {
222+
assertIsJsonPath(jsonPath);
223+
return JsonPath.parse(Jsons.clone(json)).set(jsonPath, replacement).json();
224+
}
225+
226+
/**
227+
* Traverses into a json object and replaces all values that match the input path with the provided
228+
* json object. Does nothing if no existing fields match the path.
229+
*
230+
* @param json - json object
231+
* @param jsonPath - path into the json object. must be in the format of JSONPath.
232+
* @param replacement - a json node to replace the current value at the jsonPath
233+
*/
234+
public static JsonNode replaceAtJsonNode(final JsonNode json, final String jsonPath, final JsonNode replacement) {
235+
try {
236+
return replaceAtJsonNodeLoud(json, jsonPath, replacement);
237+
} catch (final PathNotFoundException e) {
238+
LOGGER.debug("Path not found", e);
239+
return Jsons.clone(json); // defensive copy in failure case.
240+
}
241+
}
242+
243+
/**
244+
* Traverses into a json object and replaces all values that match the input path with the output of
245+
* the provided function. Does nothing if no existing fields match the path.
246+
*
247+
* @param json - json object
248+
* @param jsonPath - path into the json object. must be in the format of JSONPath.
249+
* @param replacementFunction - a function that takes in a node that matches the path as well as the
250+
* path to the node itself. the return of this function will replace the current node.
251+
*/
252+
public static JsonNode replaceAt(final JsonNode json, final String jsonPath, final BiFunction<JsonNode, String, JsonNode> replacementFunction) {
253+
JsonNode clone = Jsons.clone(json);
254+
assertIsJsonPath(jsonPath);
255+
final List<String> foundPaths = getPaths(clone, jsonPath);
256+
for (final String foundPath : foundPaths) {
257+
final Optional<JsonNode> singleValue = getSingleValue(clone, foundPath);
258+
if (singleValue.isPresent()) {
259+
final JsonNode replacement = replacementFunction.apply(singleValue.get(), foundPath);
260+
clone = replaceAtJsonNode(clone, foundPath, replacement);
261+
}
262+
}
263+
return clone;
264+
}
265+
266+
/**
267+
*
268+
* @param conf - JsonPath configuration. Primarily used to reuse code to allow fetching values or
269+
* paths from a json object
270+
* @param json - json object
271+
* @param jsonPath - path into the json object. must be in the format of JSONPath.
272+
* @return all values that match the input query (whether the values are paths or actual values in
273+
* the json object is determined by the conf)
274+
*/
275+
private static List<JsonNode> getInternal(final Configuration conf, final JsonNode json, final String jsonPath) {
276+
assertIsJsonPath(jsonPath);
277+
try {
278+
return MoreIterators.toList(JsonPath.using(conf).parse(json).read(jsonPath, ArrayNode.class).iterator());
279+
} catch (final PathNotFoundException e) {
280+
return Collections.emptyList();
281+
}
282+
}
283+
284+
}

0 commit comments

Comments
 (0)