|
| 1 | +/* |
| 2 | + * Copyright (c) 2021 Airbyte, Inc., all rights reserved. |
| 3 | + */ |
| 4 | + |
| 5 | +package io.airbyte.commons.json; |
| 6 | + |
| 7 | +import com.fasterxml.jackson.databind.JsonNode; |
| 8 | +import com.fasterxml.jackson.databind.node.ArrayNode; |
| 9 | +import com.google.api.client.util.Preconditions; |
| 10 | +import com.jayway.jsonpath.Configuration; |
| 11 | +import com.jayway.jsonpath.JsonPath; |
| 12 | +import com.jayway.jsonpath.Option; |
| 13 | +import com.jayway.jsonpath.PathNotFoundException; |
| 14 | +import com.jayway.jsonpath.spi.json.JacksonJsonNodeJsonProvider; |
| 15 | +import com.jayway.jsonpath.spi.json.JsonProvider; |
| 16 | +import com.jayway.jsonpath.spi.mapper.JacksonMappingProvider; |
| 17 | +import com.jayway.jsonpath.spi.mapper.MappingProvider; |
| 18 | +import io.airbyte.commons.util.MoreIterators; |
| 19 | +import java.util.Collections; |
| 20 | +import java.util.EnumSet; |
| 21 | +import java.util.List; |
| 22 | +import java.util.Optional; |
| 23 | +import java.util.Set; |
| 24 | +import java.util.function.BiFunction; |
| 25 | +import java.util.stream.Collectors; |
| 26 | +import org.slf4j.Logger; |
| 27 | +import org.slf4j.LoggerFactory; |
| 28 | + |
| 29 | +/** |
| 30 | + * JSONPath is specification for querying JSON objects. More information about the specification can |
| 31 | + * be found here: https://goessner.net/articles/JsonPath/. For those familiar with jq, JSONPath will |
| 32 | + * be most recognizable as "that DSL that jq uses". |
| 33 | + * |
| 34 | + * We use a java implementation of this specification (repo: https://github.com/json-path/JsonPath). |
| 35 | + * This class wraps that implementation to make it easier to leverage this tool internally. |
| 36 | + * |
| 37 | + * GOTCHA: Keep in mind with JSONPath, depending on the query, 0, 1, or N values may be returned. |
| 38 | + * The pattern for handling return values is very much like writing SQL queries. When using it, you |
| 39 | + * must consider what the number of return values for your query might be. e.g. for this object: { |
| 40 | + * "alpha": [1, 2, 3] }, this JSONPath "$.alpha[*]", would return: [1, 2, 3], but this one |
| 41 | + * "$.alpha[0]" would return: [1]. The Java interface we place over this query system defaults to |
| 42 | + * returning a list for query results. In addition, we provide helper functions that will just |
| 43 | + * return a single value (see: {@link JsonPaths#getSingleValue(JsonNode, String)}). These should |
| 44 | + * only be used if it is not possible for a query to return more than one value. |
| 45 | + * |
| 46 | + * Note: Package private as most uses of JsonPaths seems like they can be hidden inside other |
| 47 | + * commons libraries (i.e. Jsons and JsonsSchemas). If this assumption proves incorrect, we can open |
| 48 | + * it up. |
| 49 | + */ |
| 50 | +class JsonPaths { |
| 51 | + |
| 52 | + private static final Logger LOGGER = LoggerFactory.getLogger(JsonPaths.class); |
| 53 | + |
| 54 | + // set default configurations at start up to match our JSON setup. |
| 55 | + static { |
| 56 | + Configuration.setDefaults(new Configuration.Defaults() { |
| 57 | + |
| 58 | + // allows us to pass in Jackson JsonNode |
| 59 | + private static final JsonProvider jsonProvider = new JacksonJsonNodeJsonProvider(); |
| 60 | + private static final MappingProvider mappingProvider = new JacksonMappingProvider(); |
| 61 | + |
| 62 | + @Override |
| 63 | + public JsonProvider jsonProvider() { |
| 64 | + return jsonProvider; |
| 65 | + } |
| 66 | + |
| 67 | + @Override |
| 68 | + public MappingProvider mappingProvider() { |
| 69 | + return mappingProvider; |
| 70 | + } |
| 71 | + |
| 72 | + @Override |
| 73 | + public Set<Option> options() { |
| 74 | + /* |
| 75 | + * All JsonPath queries will return a list of values. This makes parsing the outputs much easier. In |
| 76 | + * cases where it is not a list, helpers in this class can assert that. See |
| 77 | + * https://github.com/json-path/JsonPath in the JsonPath documentation. |
| 78 | + */ |
| 79 | + return EnumSet.of(Option.ALWAYS_RETURN_LIST); |
| 80 | + } |
| 81 | + |
| 82 | + }); |
| 83 | + } |
| 84 | + |
| 85 | + /* |
| 86 | + * This version of the JsonPath Configuration object allows queries to return to the path of values |
| 87 | + * instead of the values that were found. |
| 88 | + */ |
| 89 | + private static final Configuration GET_PATHS_CONFIGURATION = Configuration.defaultConfiguration().addOptions(Option.AS_PATH_LIST); |
| 90 | + |
| 91 | + /** |
| 92 | + * Attempt to validate if a string is a valid JSONPath string. This assertion does NOT handle all |
| 93 | + * cases, but at least a common on. We can add to it as we detect others. |
| 94 | + * |
| 95 | + * @param jsonPath - path to validate |
| 96 | + */ |
| 97 | + public static void assertIsJsonPath(final String jsonPath) { |
| 98 | + Preconditions.checkArgument(jsonPath.startsWith("$")); |
| 99 | + } |
| 100 | + |
| 101 | + /** |
| 102 | + * Attempt to detect if a JSONPath query could return more than 1 value. This assertion does NOT |
| 103 | + * handle all cases, but at least a common on. We can add to it as we detect others. |
| 104 | + * |
| 105 | + * @param jsonPath - path to validate |
| 106 | + */ |
| 107 | + public static void assertIsSingleReturnQuery(final String jsonPath) { |
| 108 | + Preconditions.checkArgument(!jsonPath.contains("*"), "Cannot accept paths with wildcards because they may return more than one item."); |
| 109 | + } |
| 110 | + |
| 111 | + /** |
| 112 | + * Given a JSONPath, returns all the values that match that path. |
| 113 | + * |
| 114 | + * e.g. for this object: { "alpha": [1, 2, 3] }, if the input JSONPath were "$.alpha[*]", this |
| 115 | + * function would return: [1, 2, 3]. |
| 116 | + * |
| 117 | + * @param json - json object |
| 118 | + * @param jsonPath - path into the json object. must be in the format of JSONPath. |
| 119 | + * @return all values that match the input query |
| 120 | + */ |
| 121 | + public static List<JsonNode> getValues(final JsonNode json, final String jsonPath) { |
| 122 | + return getInternal(Configuration.defaultConfiguration(), json, jsonPath); |
| 123 | + } |
| 124 | + |
| 125 | + /** |
| 126 | + * Given a JSONPath, returns all the path of all values that match that path. |
| 127 | + * |
| 128 | + * e.g. for this object: { "alpha": [1, 2, 3] }, if the input JSONPath were "$.alpha[*]", this |
| 129 | + * function would return: ["$.alpha[0]", "$.alpha[1]", "$.alpha[2]"]. |
| 130 | + * |
| 131 | + * @param json - json object |
| 132 | + * @param jsonPath - path into the json object. must be in the format of JSONPath. |
| 133 | + * @return all paths that are present that match the input query. returns a list (instead of a set), |
| 134 | + * because having a deterministic ordering is valuable for all downstream consumers (i.e. in |
| 135 | + * most cases if we returned a set, the downstream would then put it in a set and sort it so |
| 136 | + * that if they are doing replacements using the paths, the behavior is predictable e.g. if |
| 137 | + * you do replace $.alpha and $.alpha[*], the order you do those replacements in matters). |
| 138 | + * specifically that said, we do expect that there will be no duplicates in the returned |
| 139 | + * list. |
| 140 | + */ |
| 141 | + public static List<String> getPaths(final JsonNode json, final String jsonPath) { |
| 142 | + return getInternal(GET_PATHS_CONFIGURATION, json, jsonPath) |
| 143 | + .stream() |
| 144 | + .map(JsonNode::asText) |
| 145 | + .collect(Collectors.toList()); |
| 146 | + } |
| 147 | + |
| 148 | + /** |
| 149 | + * Given a JSONPath, returns 1 or 0 values that match the path. Throws if more than 1 value is |
| 150 | + * found. |
| 151 | + * |
| 152 | + * THIS SHOULD ONLY BE USED IF THE JSONPATH CAN ONLY EVER RETURN 0 OR 1 VALUES. e.g. don't do |
| 153 | + * "$.alpha[*]" |
| 154 | + * |
| 155 | + * @param json - json object |
| 156 | + * @param jsonPath - path into the json object. must be in the format of JSONPath. |
| 157 | + * @return value if present, otherwise empty. |
| 158 | + */ |
| 159 | + public static Optional<JsonNode> getSingleValue(final JsonNode json, final String jsonPath) { |
| 160 | + assertIsSingleReturnQuery(jsonPath); |
| 161 | + |
| 162 | + final List<JsonNode> jsonNodes = getValues(json, jsonPath); |
| 163 | + |
| 164 | + Preconditions.checkState(jsonNodes.size() <= 1, String.format("Path returned more than one item. path: %s items: %s", jsonPath, jsonNodes)); |
| 165 | + return jsonNodes.isEmpty() ? Optional.empty() : Optional.of(jsonNodes.get(0)); |
| 166 | + } |
| 167 | + |
| 168 | + /** |
| 169 | + * Given a JSONPath, true if path is present in the object, otherwise false. Throws is more than 1 |
| 170 | + * path is found. |
| 171 | + * |
| 172 | + * THIS SHOULD ONLY BE USED IF THE JSONPATH CAN ONLY EVER RETURN 0 OR 1 VALUES. e.g. don't do |
| 173 | + * "$.alpha[*]" |
| 174 | + * |
| 175 | + * @param json - json object |
| 176 | + * @param jsonPath - path into the json object. must be in the format of JSONPath. |
| 177 | + * @return true if path is present in the object, otherwise false. |
| 178 | + */ |
| 179 | + public static boolean isPathPresent(final JsonNode json, final String jsonPath) { |
| 180 | + assertIsSingleReturnQuery(jsonPath); |
| 181 | + |
| 182 | + final List<String> foundPaths = getPaths(json, jsonPath); |
| 183 | + |
| 184 | + Preconditions.checkState(foundPaths.size() <= 1, String.format("Path returned more than one item. path: %s items: %s", jsonPath, foundPaths)); |
| 185 | + return !foundPaths.isEmpty(); |
| 186 | + } |
| 187 | + |
| 188 | + /** |
| 189 | + * Traverses into a json object and replaces all values that match the input path with the provided |
| 190 | + * string. Throws if no existing fields match the path. |
| 191 | + * |
| 192 | + * @param json - json object |
| 193 | + * @param jsonPath - path into the json object. must be in the format of JSONPath. |
| 194 | + * @param replacement - a string value to replace the current value at the jsonPath |
| 195 | + * @throws PathNotFoundException throws if the path is not present in the object |
| 196 | + */ |
| 197 | + public static JsonNode replaceAtStringLoud(final JsonNode json, final String jsonPath, final String replacement) { |
| 198 | + return replaceAtJsonNodeLoud(json, jsonPath, Jsons.jsonNode(replacement)); |
| 199 | + } |
| 200 | + |
| 201 | + /** |
| 202 | + * Traverses into a json object and replaces all values that match the input path with the provided |
| 203 | + * string . Does nothing if no existing fields match the path. |
| 204 | + * |
| 205 | + * @param json - json object |
| 206 | + * @param jsonPath - path into the json object. must be in the format of JSONPath. |
| 207 | + * @param replacement - a string value to replace the current value at the jsonPath |
| 208 | + */ |
| 209 | + public static JsonNode replaceAtString(final JsonNode json, final String jsonPath, final String replacement) { |
| 210 | + return replaceAtJsonNode(json, jsonPath, Jsons.jsonNode(replacement)); |
| 211 | + } |
| 212 | + |
| 213 | + /** |
| 214 | + * Traverses into a json object and replaces all values that match the input path with the provided |
| 215 | + * json object. Does nothing if no existing fields match the path. |
| 216 | + * |
| 217 | + * @param json - json object |
| 218 | + * @param jsonPath - path into the json object. must be in the format of JSONPath. |
| 219 | + * @param replacement - a json node to replace the current value at the jsonPath |
| 220 | + */ |
| 221 | + public static JsonNode replaceAtJsonNodeLoud(final JsonNode json, final String jsonPath, final JsonNode replacement) { |
| 222 | + assertIsJsonPath(jsonPath); |
| 223 | + return JsonPath.parse(Jsons.clone(json)).set(jsonPath, replacement).json(); |
| 224 | + } |
| 225 | + |
| 226 | + /** |
| 227 | + * Traverses into a json object and replaces all values that match the input path with the provided |
| 228 | + * json object. Does nothing if no existing fields match the path. |
| 229 | + * |
| 230 | + * @param json - json object |
| 231 | + * @param jsonPath - path into the json object. must be in the format of JSONPath. |
| 232 | + * @param replacement - a json node to replace the current value at the jsonPath |
| 233 | + */ |
| 234 | + public static JsonNode replaceAtJsonNode(final JsonNode json, final String jsonPath, final JsonNode replacement) { |
| 235 | + try { |
| 236 | + return replaceAtJsonNodeLoud(json, jsonPath, replacement); |
| 237 | + } catch (final PathNotFoundException e) { |
| 238 | + LOGGER.debug("Path not found", e); |
| 239 | + return Jsons.clone(json); // defensive copy in failure case. |
| 240 | + } |
| 241 | + } |
| 242 | + |
| 243 | + /** |
| 244 | + * Traverses into a json object and replaces all values that match the input path with the output of |
| 245 | + * the provided function. Does nothing if no existing fields match the path. |
| 246 | + * |
| 247 | + * @param json - json object |
| 248 | + * @param jsonPath - path into the json object. must be in the format of JSONPath. |
| 249 | + * @param replacementFunction - a function that takes in a node that matches the path as well as the |
| 250 | + * path to the node itself. the return of this function will replace the current node. |
| 251 | + */ |
| 252 | + public static JsonNode replaceAt(final JsonNode json, final String jsonPath, final BiFunction<JsonNode, String, JsonNode> replacementFunction) { |
| 253 | + JsonNode clone = Jsons.clone(json); |
| 254 | + assertIsJsonPath(jsonPath); |
| 255 | + final List<String> foundPaths = getPaths(clone, jsonPath); |
| 256 | + for (final String foundPath : foundPaths) { |
| 257 | + final Optional<JsonNode> singleValue = getSingleValue(clone, foundPath); |
| 258 | + if (singleValue.isPresent()) { |
| 259 | + final JsonNode replacement = replacementFunction.apply(singleValue.get(), foundPath); |
| 260 | + clone = replaceAtJsonNode(clone, foundPath, replacement); |
| 261 | + } |
| 262 | + } |
| 263 | + return clone; |
| 264 | + } |
| 265 | + |
| 266 | + /** |
| 267 | + * |
| 268 | + * @param conf - JsonPath configuration. Primarily used to reuse code to allow fetching values or |
| 269 | + * paths from a json object |
| 270 | + * @param json - json object |
| 271 | + * @param jsonPath - path into the json object. must be in the format of JSONPath. |
| 272 | + * @return all values that match the input query (whether the values are paths or actual values in |
| 273 | + * the json object is determined by the conf) |
| 274 | + */ |
| 275 | + private static List<JsonNode> getInternal(final Configuration conf, final JsonNode json, final String jsonPath) { |
| 276 | + assertIsJsonPath(jsonPath); |
| 277 | + try { |
| 278 | + return MoreIterators.toList(JsonPath.using(conf).parse(json).read(jsonPath, ArrayNode.class).iterator()); |
| 279 | + } catch (final PathNotFoundException e) { |
| 280 | + return Collections.emptyList(); |
| 281 | + } |
| 282 | + } |
| 283 | + |
| 284 | +} |
0 commit comments