diff --git a/CHANGELOG.md b/CHANGELOG.md index edbf7c8ed065c..81e0033436a06 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -18,6 +18,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), - Bump `com.azure:azure-storage-blob` from 12.23.0 to 12.28.1 ([#16501](https://github.com/opensearch-project/OpenSearch/pull/16501)) ### Changed +- Add Open Parameters to Flat_object Field Type ([#13853](https://github.com/opensearch-project/OpenSearch/pull/13853)) ### Deprecated diff --git a/rest-api-spec/src/main/resources/rest-api-spec/test/index/106_flat_object_with_parameter.yml b/rest-api-spec/src/main/resources/rest-api-spec/test/index/106_flat_object_with_parameter.yml new file mode 100644 index 0000000000000..baed556ee0926 --- /dev/null +++ b/rest-api-spec/src/main/resources/rest-api-spec/test/index/106_flat_object_with_parameter.yml @@ -0,0 +1,357 @@ +--- +# The test setup includes: +# - Create flat_object mapping for flat_object_normalizer_ignore_above, flat_object_index_false, flat_object_doc_values_false indices to test five parameters: +# - Index example documents +# - Refresh the index so it is ready for search tests +# - Search indices +setup: + - skip: + version: " - 2.99.99" + reason: "introduced in 3.0.0 " + + - do: + indices.create: + index: flat_object_normalizer_ignore_above + body: + mappings: + properties: + issue: + properties: + labels: + type: flat_object + normalizer: lowercase + depth_limit: 3 + ignore_above: 3 + doc_values: false + similarity: boolean + + - do: + indices.create: + index: flat_object_index_false + body: + mappings: + properties: + issue: + properties: + labels: + type: flat_object + index: false + + - do: + indices.create: + index: flat_object_doc_values_false + body: + mappings: + properties: + issue: + properties: + labels: + type: flat_object + doc_values: false + + - do: + index: + index: flat_object_normalizer_ignore_above + id: 1 + body: { + "issue": { + "labels": { + "Category": { + "type": "API", + "level": "bug", + "content": "cont" + }, + "priority": 5.00 + } + } + } + + - do: + index: + index: flat_object_normalizer_ignore_above + id: 2 + body: { + "issue": { + "labels": { + "Category": { + "level": [ "bug", "bug", "bug" ] + } + } + } + } + + - do: + index: + index: flat_object_index_false + id: 1 + body: { + "issue": { + "labels": { + "Category": { + "type": "API", + "level": "bug", + "content": "cont" + }, + "priority": 5.00 + } + } + } + + - do: + index: + index: flat_object_doc_values_false + id: 1 + body: { + "issue": { + "labels": { + "Category": { + "type": "API", + "level": "bug", + "content": "cont" + }, + "priority": 5.00 + } + } + } + + - do: + indices.refresh: + index: flat_object_* + +--- +# Delete Index when connection is teardown +teardown: + - skip: + version: " - 2.99.99" + reason: "introduced in 3.0.0 " + + - do: + indices.delete: + index: [flat_object_normalizer_ignore_above, flat_object_index_false, flat_object_doc_values_false] + +--- +# Verify that mappings under the catalog field did not expand +# and no dynamic fields were created. +"Mappings": + - skip: + version: " - 2.99.99" + reason: "flat_object is introduced in 3.0.0 in main branch" + + - do: + indices.get_mapping: + index: flat_object_normalizer_ignore_above + - is_true: flat_object_normalizer_ignore_above.mappings + - match: { flat_object_normalizer_ignore_above.mappings.properties.issue.properties.labels.type: flat_object } + # https://github.com/opensearch-project/OpenSearch/tree/main/rest-api-spec/src/main/resources/rest-api-spec/test#length + - length: { flat_object_normalizer_ignore_above.mappings.properties.issue.properties: 1 } + - length: { flat_object_normalizer_ignore_above.mappings.properties.issue.properties.labels: 6 } + +--- +"Supported queries": + - skip: + version: " - 2.99.99" + reason: "flat_object is introduced in 3.0.0 in main branch" + + # Verify Document Count + - do: + search: + index: flat_object_normalizer_ignore_above, + body: { + query: { + match_all: { } + } + } + + - length: { hits.hits: 2 } + + - do: + search: + index: flat_object_index_false, + body: { + query: { + match_all: { } + } + } + + - length: { hits.hits: 1 } + + - do: + search: + index: flat_object_doc_values_false, + body: { + query: { + match_all: { } + } + } + + - length: { hits.hits: 1 } + + # test normalizer=lowercase. + - do: + search: + index: flat_object_normalizer_ignore_above, + body: { + _source: true, + query: { + term: { issue.labels: "api" } + } + } + + - length: { hits.hits: 1 } + - match: { hits.hits.0._source.issue.labels.Category.type: "API" } + + - do: + search: + index: flat_object_normalizer_ignore_above, + body: { + _source: true, + query: { + term: { issue.labels.Category.type: "api" } + } + } + + - length: { hits.hits: 1 } + - match: { hits.hits.0._source.issue.labels.Category.type: "API" } + + - do: + search: + index: flat_object_normalizer_ignore_above, + body: { + _source: true, + query: { + prefix: { issue.labels: "ap" } + } + } + + - length: { hits.hits: 1 } + - match: { hits.hits.0._source.issue.labels.Category.type: "API" } + + - do: + search: + index: flat_object_normalizer_ignore_above, + body: { + _source: true, + query: { + prefix: { issue.labels.Category.type: "ap" } + } + } + + - length: { hits.hits: 1 } + - match: { hits.hits.0._source.issue.labels.Category.type: "API" } + + # test ignore_above=4. + - do: + search: + index: flat_object_normalizer_ignore_above, + body: { + _source: true, + query: { + term: { issue.labels: "API" } + } + } + + - length: { hits.hits: 1 } + - match: { hits.hits.0._source.issue.labels.Category.type: "API" } + + - do: + search: + index: flat_object_normalizer_ignore_above, + body: { + _source: true, + query: { + term: { issue.labels: "Approved" } + } + } + + - length: { hits.hits: 0 } + +# test similarity + # test ignore_above=4. + - do: + search: + index: flat_object_normalizer_ignore_above, + body: { + _source: true, + query: { + term: { issue.labels.Category.level: "bug" } + } + } + + - length: { hits.hits: 2 } + - match: { hits.hits.0._score: 1 } + - match: { hits.hits.0._source.issue.labels.Category.level: "bug" } + - match: { hits.hits.1._score: 1 } + + +# test doc_values=false and index=false + - do: + search: + index: flat_object_*, + body: { + _source: true, + query: { + terms: { issue.labels: [ "API" ] } + } + } + + - length: { hits.hits: 3 } + - match: { hits.hits.0._index: "flat_object_doc_values_false" } + - match: { hits.hits.0._source.issue.labels.Category.type: "API" } + - match: { hits.hits.1._index: "flat_object_index_false" } + - match: { hits.hits.1._source.issue.labels.Category.type: "API" } + - match: { hits.hits.2._index: "flat_object_normalizer_ignore_above" } + - match: { hits.hits.2._source.issue.labels.Category.type: "API" } + + - do: + search: + index: flat_object_*, + body: { + _source: true, + query: { + terms: { issue.labels.Category.type: [ "API" ] } + } + } + + - length: { hits.hits: 3 } + - match: { hits.hits.0._index: "flat_object_doc_values_false" } + - match: { hits.hits.0._source.issue.labels.Category.type: "API" } + - match: { hits.hits.1._index: "flat_object_index_false" } + - match: { hits.hits.1._source.issue.labels.Category.type: "API" } + - match: { hits.hits.2._index: "flat_object_normalizer_ignore_above" } + - match: { hits.hits.2._source.issue.labels.Category.type: "API" } + + - do: + search: + index: flat_object_*, + body: { + _source: true, + query: { + wildcard: { issue.labels: "*P*" } + } + } + + - length: { hits.hits: 3 } + - match: { hits.hits.0._index: "flat_object_doc_values_false" } + - match: { hits.hits.0._source.issue.labels.Category.type: "API" } + - match: { hits.hits.1._index: "flat_object_index_false" } + - match: { hits.hits.1._source.issue.labels.Category.type: "API" } + - match: { hits.hits.2._index: "flat_object_normalizer_ignore_above" } + - match: { hits.hits.2._source.issue.labels.Category.type: "API" } + + - do: + search: + index: flat_object_*, + body: { + _source: true, + query: { + wildcard: { issue.labels.Category.type: "*P*" } + } + } + + - length: { hits.hits: 3 } + - match: { hits.hits.0._index: "flat_object_doc_values_false" } + - match: { hits.hits.0._source.issue.labels.Category.type: "API" } + - match: { hits.hits.1._index: "flat_object_index_false" } + - match: { hits.hits.1._source.issue.labels.Category.type: "API" } + - match: { hits.hits.2._index: "flat_object_normalizer_ignore_above" } + - match: { hits.hits.2._source.issue.labels.Category.type: "API" } diff --git a/server/src/main/java/org/opensearch/common/xcontent/JsonToStringXContentParser.java b/server/src/main/java/org/opensearch/common/xcontent/JsonToStringXContentParser.java index 21270b4241b15..b016ca07d4d93 100644 --- a/server/src/main/java/org/opensearch/common/xcontent/JsonToStringXContentParser.java +++ b/server/src/main/java/org/opensearch/common/xcontent/JsonToStringXContentParser.java @@ -20,6 +20,7 @@ import org.opensearch.core.xcontent.XContentBuilder; import org.opensearch.core.xcontent.XContentLocation; import org.opensearch.core.xcontent.XContentParser; +import org.opensearch.index.mapper.MapperParsingException; import java.io.IOException; import java.math.BigInteger; @@ -54,17 +55,24 @@ public class JsonToStringXContentParser extends AbstractXContentParser { private final DeprecationHandler deprecationHandler; + private int depthLimit; + private int ignoreAbove; + public JsonToStringXContentParser( NamedXContentRegistry xContentRegistry, DeprecationHandler deprecationHandler, XContentParser parser, - String fieldTypeName + String fieldTypeName, + int depthLimit, + int ignoreAbove ) throws IOException { super(xContentRegistry, deprecationHandler); this.deprecationHandler = deprecationHandler; this.xContentRegistry = xContentRegistry; this.parser = parser; this.fieldTypeName = fieldTypeName; + this.depthLimit = depthLimit; + this.ignoreAbove = ignoreAbove; } public XContentParser parseObject() throws IOException { @@ -74,7 +82,7 @@ public XContentParser parseObject() throws IOException { builder.startObject(); LinkedList path = new LinkedList<>(Collections.singleton(fieldTypeName)); while (currentToken() != Token.END_OBJECT) { - parseToken(path); + parseToken(path, 1); } // deduplication the fieldName,valueList,valueAndPathList builder.field(this.fieldTypeName, new HashSet<>(keyList)); @@ -82,13 +90,19 @@ public XContentParser parseObject() throws IOException { builder.field(this.fieldTypeName + VALUE_AND_PATH_SUFFIX, new HashSet<>(valueAndPathList)); builder.endObject(); String jString = XContentHelper.convertToJson(BytesReference.bytes(builder), false, MediaTypeRegistry.JSON); + System.out.println(jString); return JsonXContent.jsonXContent.createParser(this.xContentRegistry, this.deprecationHandler, String.valueOf(jString)); } /** * @return true if the child object contains no_null value, false otherwise */ - private boolean parseToken(Deque path) throws IOException { + private boolean parseToken(Deque path, int depth) throws IOException { + if (depth > depthLimit) { + throw new MapperParsingException( + "the depth of flat_object field path " + path + " is bigger than maximum" + " depth [" + depthLimit + "]" + ); + } boolean isChildrenValueValid = false; boolean visitFieldName = false; if (this.parser.currentToken() == Token.FIELD_NAME) { @@ -104,21 +118,21 @@ private boolean parseToken(Deque path) throws IOException { } this.keyList.add(parts); // parts has no dot, so either it's the original fieldName or it's the last part this.parser.nextToken(); // advance to the value of fieldName - isChildrenValueValid = parseToken(path); // parse the value for fieldName (which will be an array, an object, - // or a primitive value) + isChildrenValueValid = parseToken(path, depth); // parse the value for fieldName (which will be an array, an object, + // or a primitive value) path.removeLast(); // Here is where we pop fieldName from the stack (since we're done with the value of fieldName) // Note that whichever other branch we just passed through has already ended with nextToken(), so we // don't need to call it. } else if (this.parser.currentToken() == Token.START_ARRAY) { parser.nextToken(); while (this.parser.currentToken() != Token.END_ARRAY) { - isChildrenValueValid |= parseToken(path); + isChildrenValueValid |= parseToken(path, depth + 1); } this.parser.nextToken(); } else if (this.parser.currentToken() == Token.START_OBJECT) { parser.nextToken(); while (this.parser.currentToken() != Token.END_OBJECT) { - isChildrenValueValid |= parseToken(path); + isChildrenValueValid |= parseToken(path, depth + 1); } this.parser.nextToken(); } else { @@ -150,11 +164,16 @@ private String parseValue() throws IOException { case VALUE_NUMBER: case VALUE_STRING: case VALUE_NULL: - return this.parser.textOrNull(); + String value = this.parser.textOrNull(); + if (value != null && value.length() <= ignoreAbove) { + return value; + } + break; // Handle other token types as needed default: throw new ParsingException(parser.getTokenLocation(), "Unexpected value token type [" + parser.currentToken() + "]"); } + return null; } @Override diff --git a/server/src/main/java/org/opensearch/index/mapper/FlatObjectFieldMapper.java b/server/src/main/java/org/opensearch/index/mapper/FlatObjectFieldMapper.java index 0ccdb40f9d33a..75b741d173b23 100644 --- a/server/src/main/java/org/opensearch/index/mapper/FlatObjectFieldMapper.java +++ b/server/src/main/java/org/opensearch/index/mapper/FlatObjectFieldMapper.java @@ -21,21 +21,25 @@ import org.apache.lucene.search.Query; import org.apache.lucene.search.TermQuery; import org.apache.lucene.util.BytesRef; -import org.opensearch.Version; import org.opensearch.common.Nullable; import org.opensearch.common.collect.Iterators; +import org.opensearch.common.collect.Tuple; import org.opensearch.common.lucene.Lucene; import org.opensearch.common.unit.Fuzziness; import org.opensearch.common.xcontent.JsonToStringXContentParser; +import org.opensearch.common.xcontent.support.XContentMapValues; import org.opensearch.core.common.ParsingException; import org.opensearch.core.xcontent.DeprecationHandler; import org.opensearch.core.xcontent.NamedXContentRegistry; +import org.opensearch.core.xcontent.XContentBuilder; import org.opensearch.core.xcontent.XContentParser; +import org.opensearch.index.analysis.IndexAnalyzers; import org.opensearch.index.analysis.NamedAnalyzer; import org.opensearch.index.fielddata.IndexFieldData; import org.opensearch.index.fielddata.plain.SortedSetOrdinalsIndexFieldData; import org.opensearch.index.mapper.KeywordFieldMapper.KeywordFieldType; import org.opensearch.index.query.QueryShardContext; +import org.opensearch.index.similarity.SimilarityProvider; import org.opensearch.search.aggregations.support.CoreValuesSourceType; import org.opensearch.search.lookup.SearchLookup; @@ -46,6 +50,7 @@ import java.util.Iterator; import java.util.List; import java.util.Map; +import java.util.Objects; import java.util.function.BiFunction; import java.util.function.Supplier; @@ -53,7 +58,9 @@ import static org.opensearch.common.xcontent.JsonToStringXContentParser.EQUAL_SYMBOL; import static org.opensearch.common.xcontent.JsonToStringXContentParser.VALUE_AND_PATH_SUFFIX; import static org.opensearch.common.xcontent.JsonToStringXContentParser.VALUE_SUFFIX; -import static org.opensearch.index.mapper.FlatObjectFieldMapper.FlatObjectFieldType.getKeywordFieldType; +import static org.opensearch.index.mapper.KeywordFieldMapper.Builder.getNormalizerAndSearchAnalyzer; +import static org.opensearch.index.mapper.TypeParsers.DOC_VALUES; +import static org.opensearch.index.mapper.TypeParsers.checkNull; /** * A field mapper for flat_objects. @@ -64,6 +71,14 @@ public final class FlatObjectFieldMapper extends DynamicKeyFieldMapper { public static final String CONTENT_TYPE = "flat_object"; + private final String normalizerName; + private final boolean isSearchable; + private final boolean hasDocValues; + private final int ignoreAbove; + private final SimilarityProvider similarity; + private final int depthLimit; + private final IndexAnalyzers indexAnalyzers; + /** * In flat_object field mapper, field type is similar to keyword field type * Cannot be tokenized, can OmitNorms, and can setIndexOption. @@ -83,11 +98,21 @@ public static class Defaults { @Override public MappedFieldType keyedFieldType(String key) { + Tuple analyzers = getNormalizerAndSearchAnalyzer( + this.name() + DOT_SYMBOL + key, + normalizerName != null ? normalizerName : "default", + false, + indexAnalyzers + ); return new FlatObjectFieldType( this.name() + DOT_SYMBOL + key, this.name(), (KeywordFieldType) valueFieldMapper.fieldType(), - (KeywordFieldType) valueAndPathFieldMapper.fieldType() + (KeywordFieldType) valueAndPathFieldMapper.fieldType(), + similarity, + analyzers.v1(), + analyzers.v2(), + ignoreAbove ); } @@ -107,10 +132,20 @@ public FlatObjectField(String field, BytesRef term, FieldType ft) { * @opensearch.internal */ public static class Builder extends FieldMapper.Builder { + private String normalizerName; + private int ignoreAbove = Integer.MAX_VALUE; + private SimilarityProvider similarity; + private int depthLimit = Integer.MAX_VALUE; + private final IndexAnalyzers indexAnalyzers; - public Builder(String name) { + public Builder(String name, IndexAnalyzers indexAnalyzers) { super(name, Defaults.FIELD_TYPE); builder = this; + this.indexAnalyzers = indexAnalyzers; + } + + public void setNormalizer(String normalizer) { + this.normalizerName = normalizer; } /** @@ -133,20 +168,49 @@ private ValueAndPathFieldMapper buildValueAndPathFieldMapper(FieldType fieldType @Override public FlatObjectFieldMapper build(BuilderContext context) { - boolean isSearchable = true; - boolean hasDocValue = true; - KeywordFieldType valueFieldType = getKeywordFieldType(buildFullName(context), VALUE_SUFFIX, isSearchable, hasDocValue); - KeywordFieldType valueAndPathFieldType = getKeywordFieldType( + Tuple analyzers = getNormalizerAndSearchAnalyzer( + buildFullName(context), + normalizerName != null ? normalizerName : "default", + false, + indexAnalyzers + ); + + KeywordFieldType valueFieldType = FlatObjectFieldType.getKeywordFieldType( + buildFullName(context), + VALUE_SUFFIX, + indexed, + hasDocValues, + similarity, + analyzers.v1(), + analyzers.v2() + ); + KeywordFieldType valueAndPathFieldType = FlatObjectFieldType.getKeywordFieldType( buildFullName(context), VALUE_AND_PATH_SUFFIX, - isSearchable, - hasDocValue + indexed, + hasDocValues, + similarity, + analyzers.v1(), + analyzers.v2() + ); + + FieldType fieldtype = new FieldType(Defaults.FIELD_TYPE); + fieldtype.setIndexOptions(TextParams.toIndexOptions(indexed, "docs")); + + FlatObjectFieldType fft = new FlatObjectFieldType( + buildFullName(context), + null, + valueFieldType, + valueAndPathFieldType, + similarity, + analyzers.v1(), + analyzers.v2(), + ignoreAbove ); - FlatObjectFieldType fft = new FlatObjectFieldType(buildFullName(context), null, valueFieldType, valueAndPathFieldType); return new FlatObjectFieldMapper( name, - Defaults.FIELD_TYPE, + fieldtype, fft, buildValueFieldMapper(Defaults.FIELD_TYPE, valueFieldType), buildValueAndPathFieldMapper(Defaults.FIELD_TYPE, valueAndPathFieldType), @@ -156,12 +220,18 @@ public FlatObjectFieldMapper build(BuilderContext context) { } } - public static final TypeParser PARSER = new TypeParser((n, c) -> new Builder(n)); + public static final TypeParser PARSER = new TypeParser((n, c) -> new Builder(n, c.getIndexAnalyzers())); /** * Creates a new TypeParser for flatObjectFieldMapper that does not use ParameterizedFieldMapper */ public static class TypeParser implements Mapper.TypeParser { + + static final String INDEXED = "index"; + static final String NORMALIZER = "normalizer"; + static final String IGNORE_ABOVE = "ignore_above"; + static final String SIMILARITY = "similarity"; + static final String DEPTH_LIMIT = "depth_limit"; private final BiFunction builderFunction; public TypeParser(BiFunction builderFunction) { @@ -171,6 +241,44 @@ public TypeParser(BiFunction builderFunction) { @Override public Mapper.Builder parse(String name, Map node, ParserContext parserContext) throws MapperParsingException { Builder builder = builderFunction.apply(name, parserContext); + for (Iterator> iterator = node.entrySet().iterator(); iterator.hasNext();) { + Map.Entry entry = iterator.next(); + String propName = entry.getKey(); + Object propNode = entry.getValue(); + checkNull(propName, propNode); + switch (propName) { + case NORMALIZER: // allow lowercase, and custom normalizer + builder.setNormalizer(XContentMapValues.nodeStringValue(propNode)); + iterator.remove(); + break; + case INDEXED: // allow to set indexed to be false + builder.index(XContentMapValues.nodeBooleanValue(propNode)); + iterator.remove(); + break; + case DOC_VALUES: // allow to set docValues to be false + builder.hasDocValues = XContentMapValues.nodeBooleanValue(propNode); + iterator.remove(); + break; + case IGNORE_ABOVE:// allow to set if the length of a field is go above certain limit then ignore the document. + builder.ignoreAbove = XContentMapValues.nodeIntegerValue(propNode); + if (builder.ignoreAbove < 0) { + throw new IllegalArgumentException("[ignore_above] must be positive, got " + builder.ignoreAbove); + } + iterator.remove(); + break; + case SIMILARITY:// allow to set similarity setting + builder.similarity = TypeParsers.resolveSimilarity(parserContext, name, propNode); + iterator.remove(); + break; + case DEPTH_LIMIT:// allow to set maximum depth limitation to the JSON document + builder.depthLimit = XContentMapValues.nodeIntegerValue(propNode); + if (builder.depthLimit < 0) { + throw new IllegalArgumentException("[depth_limit] must be positive, got " + builder.depthLimit); + } + iterator.remove(); + break; + } + } return builder; } } @@ -182,7 +290,6 @@ public Mapper.Builder parse(String name, Map node, ParserCont public static final class FlatObjectFieldType extends StringFieldType { private final int ignoreAbove; - private final String nullValue; private final String mappedFieldTypeName; @@ -195,7 +302,10 @@ public FlatObjectFieldType( String mappedFieldTypeName, boolean isSearchable, boolean hasDocValues, - NamedAnalyzer analyzer, + int ignoreAbove, + SimilarityProvider similarity, + NamedAnalyzer normalizer, + NamedAnalyzer searchAnalyzer, Map meta ) { super( @@ -203,40 +313,77 @@ public FlatObjectFieldType( isSearchable, false, hasDocValues, - analyzer == null ? TextSearchInfo.SIMPLE_MATCH_ONLY : new TextSearchInfo(Defaults.FIELD_TYPE, null, analyzer, analyzer), + new TextSearchInfo(Defaults.FIELD_TYPE, similarity, normalizer, searchAnalyzer), meta ); - setIndexAnalyzer(Lucene.KEYWORD_ANALYZER); - this.ignoreAbove = Integer.MAX_VALUE; - this.nullValue = null; + + assert normalizer != null; + setIndexAnalyzer(normalizer); + this.ignoreAbove = ignoreAbove; this.mappedFieldTypeName = mappedFieldTypeName; - this.valueFieldType = getKeywordFieldType(name, VALUE_SUFFIX, isSearchable, hasDocValues); - this.valueAndPathFieldType = getKeywordFieldType(name, VALUE_AND_PATH_SUFFIX, isSearchable, hasDocValues); + this.valueFieldType = getKeywordFieldType( + name, + VALUE_SUFFIX, + isSearchable, + hasDocValues, + similarity, + normalizer, + searchAnalyzer + ); + this.valueAndPathFieldType = getKeywordFieldType( + name, + VALUE_AND_PATH_SUFFIX, + isSearchable, + hasDocValues, + similarity, + normalizer, + searchAnalyzer + ); } public FlatObjectFieldType( String name, String mappedFieldTypeName, KeywordFieldType valueFieldType, - KeywordFieldType valueAndPathFieldType + KeywordFieldType valueAndPathFieldType, + SimilarityProvider similarity, + NamedAnalyzer normalizer, + NamedAnalyzer searchAnalyzer, + int ignoreAbove ) { super( name, valueFieldType.isSearchable(), false, valueFieldType.hasDocValues(), - new TextSearchInfo(Defaults.FIELD_TYPE, null, Lucene.KEYWORD_ANALYZER, Lucene.KEYWORD_ANALYZER), + new TextSearchInfo(Defaults.FIELD_TYPE, similarity, normalizer, searchAnalyzer), Collections.emptyMap() ); - this.ignoreAbove = Integer.MAX_VALUE; - this.nullValue = null; + setIndexAnalyzer(normalizer); + this.ignoreAbove = ignoreAbove; this.mappedFieldTypeName = mappedFieldTypeName; this.valueFieldType = valueFieldType; this.valueAndPathFieldType = valueAndPathFieldType; } - static KeywordFieldType getKeywordFieldType(String fullName, String valueType, boolean isSearchable, boolean hasDocValue) { - return new KeywordFieldType(fullName + valueType, isSearchable, hasDocValue, Collections.emptyMap()) { + static KeywordFieldType getKeywordFieldType( + String fullName, + String valueType, + boolean isSearchable, + boolean hasDocValue, + SimilarityProvider similarity, + NamedAnalyzer normalizer, + NamedAnalyzer searchAnalyzer + ) { + TextSearchInfo textSearchInfo = new TextSearchInfo(Defaults.FIELD_TYPE, similarity, searchAnalyzer, searchAnalyzer); + return new KeywordFieldType( + fullName + valueType, + isSearchable, + hasDocValue, + normalizer, + textSearchInfo, + Collections.emptyMap() + ) { @Override protected String rewriteForDocValue(Object value) { assert value instanceof String; @@ -281,7 +428,7 @@ public ValueFetcher valueFetcher(QueryShardContext context, SearchLookup searchL throw new IllegalArgumentException("Field [" + name() + "] of type [" + typeName() + "] doesn't support formats."); } - return new SourceValueFetcher(name(), context, nullValue) { + return new SourceValueFetcher(name(), context, null) { @Override protected String parseSourceValue(Object value) { String flatObjectKeywordValue = value.toString(); @@ -416,35 +563,9 @@ private String inputToString(Object inputValue) { if (inputValue == null) { return null; } - if (inputValue instanceof Integer) { - String inputToString = Integer.toString((Integer) inputValue); - return inputToString; - } else if (inputValue instanceof Float) { - String inputToString = Float.toString((Float) inputValue); - return inputToString; - } else if (inputValue instanceof Boolean) { - String inputToString = Boolean.toString((Boolean) inputValue); - return inputToString; - } else if (inputValue instanceof Short) { - String inputToString = Short.toString((Short) inputValue); - return inputToString; - } else if (inputValue instanceof Long) { - String inputToString = Long.toString((Long) inputValue); - return inputToString; - } else if (inputValue instanceof Double) { - String inputToString = Double.toString((Double) inputValue); - return inputToString; - } else if (inputValue instanceof BytesRef) { - String inputToString = (((BytesRef) inputValue).utf8ToString()); - return inputToString; - } else if (inputValue instanceof String) { - String inputToString = (String) inputValue; - return inputToString; - } else if (inputValue instanceof Version) { - String inputToString = inputValue.toString(); - return inputToString; + if (inputValue instanceof BytesRef) { + return (((BytesRef) inputValue).utf8ToString()); } else { - // default to cast toString return inputValue.toString(); } } @@ -549,7 +670,13 @@ public Query wildcardQuery( this.fieldType = fieldType; this.valueFieldMapper = valueFieldMapper; this.valueAndPathFieldMapper = valueAndPathFieldMapper; - this.mappedFieldType = mappedFieldType; + this.normalizerName = builder.normalizerName; + this.isSearchable = builder.indexed; + this.hasDocValues = builder.hasDocValues; + this.ignoreAbove = builder.ignoreAbove; + this.similarity = builder.similarity; + this.depthLimit = builder.depthLimit; + this.indexAnalyzers = builder.indexAnalyzers; } @Override @@ -559,7 +686,25 @@ protected FlatObjectFieldMapper clone() { @Override protected void mergeOptions(FieldMapper other, List conflicts) { - + FlatObjectFieldMapper mappers = (FlatObjectFieldMapper) other; + if (!Objects.equals(this.normalizerName, mappers.normalizerName)) { + conflicts.add("mapper [" + name() + "] has different [normalizer]"); + } + if (!Objects.equals(this.isSearchable, mappers.isSearchable)) { + conflicts.add("mapper [" + name() + "] has different [index]"); + } + if (!Objects.equals(this.hasDocValues, mappers.hasDocValues)) { + conflicts.add("mapper [" + name() + "] has different [doc_values]"); + } + if (!Objects.equals(this.ignoreAbove, mappers.ignoreAbove)) { + conflicts.add("mapper [" + name() + "] has different [ignore_above]"); + } + if (!Objects.equals(this.similarity, mappers.similarity)) { + conflicts.add("mapper [" + name() + "] has different [similarity]"); + } + if (!Objects.equals(this.depthLimit, mappers.depthLimit)) { + conflicts.add("mapper [" + name() + "] has different [depth_limit]"); + } } @Override @@ -588,7 +733,9 @@ protected void parseCreateField(ParseContext context) throws IOException { NamedXContentRegistry.EMPTY, DeprecationHandler.IGNORE_DEPRECATIONS, ctxParser, - fieldType().name() + fieldType().name(), + depthLimit, + ignoreAbove ); /* JsonToStringParser is the main parser class to transform JSON into stringFields in a XContentParser @@ -607,7 +754,6 @@ protected void parseCreateField(ParseContext context) throws IOException { parseValueAddFields(context, value, fieldName); break; } - } } } @@ -643,6 +789,9 @@ private void parseValueAddFields(ParseContext context, String value, String fiel assert valueFieldMapper != null; assert valueAndPathFieldMapper != null; + if (value == null) { + return; + } NamedAnalyzer normalizer = fieldType().normalizer(); if (normalizer != null) { value = normalizeValue(normalizer, name(), value); @@ -651,9 +800,10 @@ private void parseValueAddFields(ParseContext context, String value, String fiel String[] valueTypeList = fieldName.split("\\._"); String valueType = "._" + valueTypeList[valueTypeList.length - 1]; + // convert to utf8 only once before feeding postings/dv/stored fields + final BytesRef binaryValue = new BytesRef(fieldType().name() + DOT_SYMBOL + value); + if (fieldType.indexOptions() != IndexOptions.NONE || fieldType.stored()) { - // convert to utf8 only once before feeding postings/dv/stored fields - final BytesRef binaryValue = new BytesRef(fieldType().name() + DOT_SYMBOL + value); if (fieldType().hasDocValues() == false) { createFieldNamesField(context); @@ -666,15 +816,15 @@ private void parseValueAddFields(ParseContext context, String value, String fiel } else if (valueType.equals(VALUE_AND_PATH_SUFFIX)) { valueAndPathFieldMapper.addField(context, value); } + } - if (fieldType().hasDocValues()) { - if (fieldName.equals(fieldType().name())) { - context.doc().add(new SortedSetDocValuesField(fieldType().name(), binaryValue)); - } else if (valueType.equals(VALUE_SUFFIX)) { - context.doc().add(new SortedSetDocValuesField(fieldType().name() + VALUE_SUFFIX, binaryValue)); - } else if (valueType.equals(VALUE_AND_PATH_SUFFIX)) { - context.doc().add(new SortedSetDocValuesField(fieldType().name() + VALUE_AND_PATH_SUFFIX, binaryValue)); - } + if (fieldType().hasDocValues()) { + if (fieldName.equals(fieldType().name())) { + context.doc().add(new SortedSetDocValuesField(fieldType().name(), binaryValue)); + } else if (valueType.equals(VALUE_SUFFIX)) { + context.doc().add(new SortedSetDocValuesField(fieldType().name() + VALUE_SUFFIX, binaryValue)); + } else if (valueType.equals(VALUE_AND_PATH_SUFFIX)) { + context.doc().add(new SortedSetDocValuesField(fieldType().name() + VALUE_AND_PATH_SUFFIX, binaryValue)); } } } @@ -702,6 +852,7 @@ private static String errorMessage(NamedAnalyzer normalizer, String value) { + " and input \"" + value + "\""; + } @Override @@ -709,6 +860,31 @@ protected String contentType() { return CONTENT_TYPE; } + @Override + protected void doXContentBody(XContentBuilder builder, boolean includeDefaults, Params params) throws IOException { + super.doXContentBody(builder, includeDefaults, params); + if (includeDefaults || normalizerName != null) { + builder.field("normalizer", normalizerName); + } + if (includeDefaults || ignoreAbove != Integer.MAX_VALUE) { + builder.field("ignore_above", ignoreAbove); + } + if (includeDefaults || similarity != null) { + builder.field("similarity", similarity.name()); + } + if (includeDefaults || depthLimit != Integer.MAX_VALUE) { + builder.field("depth_limit", depthLimit); + } + } + + int ignoreAbove() { + return ignoreAbove; + } + + int getDepthLimit() { + return depthLimit; + } + private static final class ValueAndPathFieldMapper extends FieldMapper { protected ValueAndPathFieldMapper(FieldType fieldType, KeywordFieldType mappedFieldType) { @@ -735,7 +911,7 @@ protected void parseCreateField(ParseContext context) { @Override protected void mergeOptions(FieldMapper other, List conflicts) { - + throw new UnsupportedOperationException(); } @Override @@ -775,7 +951,7 @@ protected void parseCreateField(ParseContext context) { @Override protected void mergeOptions(FieldMapper other, List conflicts) { - + throw new UnsupportedOperationException(); } @Override diff --git a/server/src/main/java/org/opensearch/index/mapper/KeywordFieldMapper.java b/server/src/main/java/org/opensearch/index/mapper/KeywordFieldMapper.java index 54a1aead5fcc7..8ab2cdc9c2141 100644 --- a/server/src/main/java/org/opensearch/index/mapper/KeywordFieldMapper.java +++ b/server/src/main/java/org/opensearch/index/mapper/KeywordFieldMapper.java @@ -52,6 +52,7 @@ import org.apache.lucene.util.automaton.Operations; import org.opensearch.OpenSearchException; import org.opensearch.common.Nullable; +import org.opensearch.common.collect.Tuple; import org.opensearch.common.lucene.BytesRefs; import org.opensearch.common.lucene.Lucene; import org.opensearch.common.lucene.search.AutomatonQueries; @@ -218,25 +219,40 @@ protected List> getParameters() { ); } - protected KeywordFieldType buildFieldType(BuilderContext context, FieldType fieldType) { + public static Tuple getNormalizerAndSearchAnalyzer( + String fieldName, + String normalizerName, + boolean splitQueriesOnWhitespace, + IndexAnalyzers indexAnalyzers + ) { NamedAnalyzer normalizer = Lucene.KEYWORD_ANALYZER; NamedAnalyzer searchAnalyzer = Lucene.KEYWORD_ANALYZER; - String normalizerName = this.normalizer.getValue(); + assert normalizerName != null; if (Objects.equals(normalizerName, "default") == false) { assert indexAnalyzers != null; normalizer = indexAnalyzers.getNormalizer(normalizerName); if (normalizer == null) { - throw new MapperParsingException("normalizer [" + normalizerName + "] not found for field [" + name + "]"); + throw new MapperParsingException("normalizer [" + normalizerName + "] not found for field [" + fieldName + "]"); } - if (splitQueriesOnWhitespace.getValue()) { + if (splitQueriesOnWhitespace) { searchAnalyzer = indexAnalyzers.getWhitespaceNormalizer(normalizerName); } else { searchAnalyzer = normalizer; } - } else if (splitQueriesOnWhitespace.getValue()) { + } else if (splitQueriesOnWhitespace) { searchAnalyzer = Lucene.WHITESPACE_ANALYZER; } - return new KeywordFieldType(buildFullName(context), fieldType, normalizer, searchAnalyzer, this); + return new Tuple<>(normalizer, searchAnalyzer); + } + + protected KeywordFieldType buildFieldType(BuilderContext context, FieldType fieldType) { + Tuple analyzers = getNormalizerAndSearchAnalyzer( + name, + this.normalizer.getValue(), + this.splitQueriesOnWhitespace.getValue(), + indexAnalyzers + ); + return new KeywordFieldType(buildFullName(context), fieldType, analyzers.v1(), analyzers.v2(), this); } @Override @@ -285,8 +301,19 @@ public KeywordFieldType(String name, FieldType fieldType, NamedAnalyzer normaliz } public KeywordFieldType(String name, boolean isSearchable, boolean hasDocValues, Map meta) { - super(name, isSearchable, false, hasDocValues, TextSearchInfo.SIMPLE_MATCH_ONLY, meta); - setIndexAnalyzer(Lucene.KEYWORD_ANALYZER); + this(name, isSearchable, hasDocValues, Lucene.KEYWORD_ANALYZER, TextSearchInfo.SIMPLE_MATCH_ONLY, meta); + } + + public KeywordFieldType( + String name, + boolean isSearchable, + boolean hasDocValues, + NamedAnalyzer normalizer, + TextSearchInfo textSearchInfo, + Map meta + ) { + super(name, isSearchable, false, hasDocValues, textSearchInfo, meta); + setIndexAnalyzer(normalizer); this.ignoreAbove = Integer.MAX_VALUE; this.nullValue = null; } diff --git a/server/src/test/java/org/opensearch/common/xcontent/JsonToStringXContentParserTests.java b/server/src/test/java/org/opensearch/common/xcontent/JsonToStringXContentParserTests.java index 3c292181b4d8f..f9c3414c35af1 100644 --- a/server/src/test/java/org/opensearch/common/xcontent/JsonToStringXContentParserTests.java +++ b/server/src/test/java/org/opensearch/common/xcontent/JsonToStringXContentParserTests.java @@ -12,13 +12,15 @@ import org.opensearch.core.xcontent.DeprecationHandler; import org.opensearch.core.xcontent.XContentBuilder; import org.opensearch.core.xcontent.XContentParser; +import org.opensearch.index.mapper.MapperParsingException; import org.opensearch.test.OpenSearchTestCase; +import org.hamcrest.Matchers; import java.io.IOException; public class JsonToStringXContentParserTests extends OpenSearchTestCase { - private String flattenJsonString(String fieldName, String in) throws IOException { + private String flattenJsonString(String fieldName, String in, int depthLimit, String nullValue, int ignoreAbove) throws IOException { try ( XContentParser parser = JsonXContent.jsonXContent.createParser( xContentRegistry(), @@ -30,7 +32,9 @@ private String flattenJsonString(String fieldName, String in) throws IOException xContentRegistry(), DeprecationHandler.THROW_UNSUPPORTED_OPERATION, parser, - fieldName + fieldName, + depthLimit, + ignoreAbove ); // Point to the first token (should be START_OBJECT) jsonToStringXContentParser.nextToken(); @@ -53,7 +57,7 @@ public void testNestedObjects() throws IOException { + "\"flat._value\":[\"1\",\"2.0\",\"three\"]," + "\"flat._valueAndPath\":[\"flat.second.inner=2.0\",\"flat.first=1\",\"flat.third=three\"]" + "}", - flattenJsonString("flat", jsonExample) + flattenJsonString("flat", jsonExample, 5, null, 100) ); } @@ -68,7 +72,7 @@ public void testChildHasDots() throws IOException { + "\"flat._value\":[\"1\",\"2.0\",\"three\"]," + "\"flat._valueAndPath\":[\"flat.second.inner=2.0\",\"flat.first=1\",\"flat.third=three\"]" + "}", - flattenJsonString("flat", jsonExample) + flattenJsonString("flat", jsonExample, 5, null, 100) ); } @@ -87,7 +91,7 @@ public void testNestChildObjectWithDots() throws IOException { + "\"flat._value\":[\"1\",\"2.0\",\"three\"]," + "\"flat._valueAndPath\":[\"flat.first=1\",\"flat.second.inner.really_inner=2.0\",\"flat.third=three\"]" + "}", - flattenJsonString("flat", jsonExample) + flattenJsonString("flat", jsonExample, 5, null, 100) ); } @@ -106,7 +110,7 @@ public void testNestChildObjectWithDotsAndFieldWithDots() throws IOException { + "\"flat._value\":[\"1\",\"2.0\",\"three\"]," + "\"flat._valueAndPath\":[\"flat.first=1\",\"flat.second.inner.totally.absolutely.inner=2.0\",\"flat.third=three\"]" + "}", - flattenJsonString("flat", jsonExample) + flattenJsonString("flat", jsonExample, 5, null, 100) ); } @@ -128,7 +132,7 @@ public void testArrayOfObjects() throws IOException { + "\"flat._valueAndPath\":[" + "\"flat.field.detail.foooooooooooo.name=baz\"" + "]}", - flattenJsonString("flat", jsonExample) + flattenJsonString("flat", jsonExample, 5, null, 100) ); } @@ -158,7 +162,58 @@ public void testArraysOfObjectsAndValues() throws IOException { + "\"flat.field.numbers=3\"," + "\"flat.field.numbers=2\"" + "]}", - flattenJsonString("flat", jsonExample) + flattenJsonString("flat", jsonExample, 5, null, 100) + ); + } + + public void testDepthLimit() throws IOException { + String jsonExample = "{" + + "\"first\" : \"1\"," + + "\"second.inner\" : {" + + " \"totally.absolutely.inner\" : \"2.0\"" + + "}," + + "\"third\": \"three\"" + + "}"; + MapperParsingException e = expectThrows(MapperParsingException.class, () -> flattenJsonString("flat", jsonExample, 1, null, 100)); + assertThat( + e.getRootCause().getMessage(), + Matchers.containsString("the depth of flat_object field path [flat, second.inner] is bigger than maximum depth [1]") + ); + assertEquals( + "{" + + "\"flat\":[\"third\",\"absolutely\",\"totally\",\"inner\",\"first\",\"second\"]," + + "\"flat._value\":[\"1\",\"2.0\",\"three\"]," + + "\"flat._valueAndPath\":[\"flat.first=1\",\"flat.second.inner.totally.absolutely.inner=2.0\",\"flat.third=three\"]" + + "}", + flattenJsonString("flat", jsonExample, 3, null, 100) + ); + } + + public void testIgnoreAbove() throws IOException { + String jsonExample = "{" + + "\"first\" : \"1\"," + + "\"second.inner\" : {" + + " \"totally.absolutely.inner\" : \"2.0\"" + + "}," + + "\"third\": \"three\"" + + "}"; + + assertEquals( + "{" + + "\"flat\":[\"third\",\"absolutely\",\"totally\",\"inner\",\"first\",\"second\"]," + + "\"flat._value\":[\"1\",\"2.0\",\"three\"]," + + "\"flat._valueAndPath\":[\"flat.first=1\",\"flat.second.inner.totally.absolutely.inner=2.0\",\"flat.third=three\"]" + + "}", + flattenJsonString("flat", jsonExample, 5, null, 5) + ); + + assertEquals( + "{" + + "\"flat\":[\"absolutely\",\"totally\",\"inner\",\"first\",\"second\"]," + + "\"flat._value\":[\"1\",\"2.0\"]," + + "\"flat._valueAndPath\":[\"flat.first=1\",\"flat.second.inner.totally.absolutely.inner=2.0\"]" + + "}", + flattenJsonString("flat", jsonExample, 5, null, 4) ); } } diff --git a/server/src/test/java/org/opensearch/index/fielddata/AbstractFieldDataTestCase.java b/server/src/test/java/org/opensearch/index/fielddata/AbstractFieldDataTestCase.java index 63c74b3cfa64f..7b249bb7342c8 100644 --- a/server/src/test/java/org/opensearch/index/fielddata/AbstractFieldDataTestCase.java +++ b/server/src/test/java/org/opensearch/index/fielddata/AbstractFieldDataTestCase.java @@ -144,7 +144,7 @@ public > IFD getForField(String type, String field } else if (type.equals("geo_point")) { fieldType = new GeoPointFieldMapper.Builder(fieldName).docValues(docValues).build(context).fieldType(); } else if (type.equals("flat_object")) { - fieldType = new FlatObjectFieldMapper.Builder(fieldName).docValues(docValues).build(context).fieldType(); + fieldType = new FlatObjectFieldMapper.Builder(fieldName, null).docValues(docValues).build(context).fieldType(); } else if (type.equals("binary")) { fieldType = new BinaryFieldMapper.Builder(fieldName, docValues).build(context).fieldType(); } else { diff --git a/server/src/test/java/org/opensearch/index/mapper/FlatObjectFieldMapperTests.java b/server/src/test/java/org/opensearch/index/mapper/FlatObjectFieldMapperTests.java index afd9e994ce3ae..01320eda15688 100644 --- a/server/src/test/java/org/opensearch/index/mapper/FlatObjectFieldMapperTests.java +++ b/server/src/test/java/org/opensearch/index/mapper/FlatObjectFieldMapperTests.java @@ -8,6 +8,11 @@ package org.opensearch.index.mapper; +import org.apache.lucene.analysis.TokenStream; +import org.apache.lucene.analysis.core.LowerCaseFilter; +import org.apache.lucene.analysis.core.WhitespaceTokenizer; +import org.apache.lucene.analysis.standard.StandardAnalyzer; +import org.apache.lucene.document.SortedSetDocValuesField; import org.apache.lucene.index.DocValuesType; import org.apache.lucene.index.IndexOptions; import org.apache.lucene.index.IndexableField; @@ -20,13 +25,30 @@ import org.opensearch.common.xcontent.json.JsonXContent; import org.opensearch.core.xcontent.ToXContent; import org.opensearch.core.xcontent.XContentBuilder; +import org.opensearch.index.IndexSettings; +import org.opensearch.index.analysis.AnalyzerScope; +import org.opensearch.index.analysis.CharFilterFactory; +import org.opensearch.index.analysis.CustomAnalyzer; +import org.opensearch.index.analysis.IndexAnalyzers; +import org.opensearch.index.analysis.LowercaseNormalizer; +import org.opensearch.index.analysis.NamedAnalyzer; +import org.opensearch.index.analysis.TokenFilterFactory; +import org.opensearch.index.analysis.TokenizerFactory; import org.opensearch.index.query.QueryShardContext; +import org.hamcrest.Matchers; import java.io.IOException; +import java.util.Map; +import static java.util.Collections.singletonMap; import static org.opensearch.common.xcontent.JsonToStringXContentParser.VALUE_AND_PATH_SUFFIX; import static org.opensearch.common.xcontent.JsonToStringXContentParser.VALUE_SUFFIX; import static org.opensearch.index.mapper.FlatObjectFieldMapper.CONTENT_TYPE; +import static org.opensearch.index.mapper.FlatObjectFieldMapper.TypeParser.DEPTH_LIMIT; +import static org.opensearch.index.mapper.FlatObjectFieldMapper.TypeParser.IGNORE_ABOVE; +import static org.opensearch.index.mapper.FlatObjectFieldMapper.TypeParser.NORMALIZER; +import static org.opensearch.index.mapper.FlatObjectFieldMapper.TypeParser.SIMILARITY; +import static org.opensearch.index.mapper.TypeParsers.DOC_VALUES; import static org.hamcrest.Matchers.instanceOf; import static org.hamcrest.core.IsEqual.equalTo; @@ -80,6 +102,7 @@ protected void assertExistsQuery(FlatObjectFieldMapper.FlatObjectFieldType field } } + @Override public void minimalMapping(XContentBuilder b) throws IOException { b.field("type", CONTENT_TYPE); } @@ -94,6 +117,7 @@ protected void writeFieldValue(XContentBuilder builder) throws IOException { builder.endObject(); } + @Override public void testMinimalToMaximal() throws IOException { XContentBuilder orig = JsonXContent.contentBuilder().startObject(); createMapperService(fieldMapping(this::minimalMapping)).documentMapper().mapping().toXContent(orig, ToXContent.EMPTY_PARAMS); @@ -128,6 +152,9 @@ public void testDefaults() throws Exception { assertFalse(fieldType.stored()); assertThat(fieldType.indexOptions(), equalTo(IndexOptions.DOCS)); assertEquals(DocValuesType.NONE, fieldType.docValuesType()); + fieldType = fields[1].fieldType(); + assertThat(fieldType.indexOptions(), Matchers.equalTo(IndexOptions.NONE)); + assertEquals(DocValuesType.SORTED_SET, fieldType.docValuesType()); // Test internal substring fields as well IndexableField[] fieldValues = doc.rootDoc().getFields("field" + VALUE_SUFFIX); @@ -141,6 +168,50 @@ public void testDefaults() throws Exception { assertEquals(new BytesRef("field.foo=bar"), fieldValueAndPaths[0].binaryValue()); } + public void testIgnoreAbove() throws IOException { + DocumentMapper mapper = createDocumentMapper(fieldMapping(b -> b.field("type", "flat_object").field("ignore_above", 5))); + + String json = XContentFactory.jsonBuilder() + .startObject() + .startObject("field") + .field("foo", "bar") + .endObject() + .endObject() + .toString(); + ParsedDocument doc = mapper.parse(source(json)); + + IndexableField[] fields = doc.rootDoc().getFields("field"); + assertEquals(2, fields.length); + IndexableField[] fieldValues = doc.rootDoc().getFields("field" + VALUE_SUFFIX); + assertEquals(2, fieldValues.length); + + IndexableField[] fieldValueAndPaths = doc.rootDoc().getFields("field" + VALUE_AND_PATH_SUFFIX); + assertEquals(2, fieldValueAndPaths.length); + + json = XContentFactory.jsonBuilder() + .startObject() + .startObject("field") + .field("foo", "opensearch") + .endObject() + .endObject() + .toString(); + doc = mapper.parse(source(json)); + fields = doc.rootDoc().getFields("field"); + assertEquals(0, fields.length); + fieldValues = doc.rootDoc().getFields("field" + VALUE_SUFFIX); + assertEquals(0, fieldValues.length); + + fieldValueAndPaths = doc.rootDoc().getFields("field" + VALUE_AND_PATH_SUFFIX); + assertEquals(0, fieldValueAndPaths.length); + + // test negative depth_limit + MapperParsingException e = expectThrows( + MapperParsingException.class, + () -> createDocumentMapper(fieldMapping(b -> b.field("type", "flat_object").field("ignore_above", "-1"))) + ); + assertThat(e.getRootCause().getMessage(), Matchers.containsString("[ignore_above] must be positive, got -1")); + } + public void testNullValue() throws IOException { DocumentMapper mapper = createDocumentMapper(fieldMapping(this::minimalMapping)); ParsedDocument parsedDocument = mapper.parse(source(b -> b.nullField("field"))); @@ -397,9 +468,237 @@ public void testDeduplicationValue() throws IOException { assertEquals(new BytesRef("field.labels=3"), fieldValueAndPaths[4].binaryValue()); } + public void testIndexed() throws IOException { + String json = XContentFactory.jsonBuilder() + .startObject() + .startObject("field") + .field("foo", "bar") + .endObject() + .endObject() + .toString(); + // test index=false + DocumentMapper mapperWithDocValues = createDocumentMapper(fieldMapping(b -> b.field("type", "flat_object").field("index", false))); + ParsedDocument doc = mapperWithDocValues.parse(source(json)); + IndexableField[] fields = doc.rootDoc().getFields("field"); + assertEquals(1, fields.length); + assertTrue(fields[0] instanceof SortedSetDocValuesField); + assertEquals(new BytesRef("field.foo"), fields[0].binaryValue()); + + IndexableField[] fieldValues = doc.rootDoc().getFields("field" + VALUE_SUFFIX); + assertEquals(1, fieldValues.length); + assertTrue(fieldValues[0] instanceof SortedSetDocValuesField); + assertEquals(new BytesRef("field.bar"), fieldValues[0].binaryValue()); + + IndexableField[] fieldValueAndPaths = doc.rootDoc().getFields("field" + VALUE_AND_PATH_SUFFIX); + assertEquals(1, fieldValueAndPaths.length); + assertTrue(fieldValueAndPaths[0] instanceof SortedSetDocValuesField); + assertEquals(new BytesRef("field.field.foo=bar"), fieldValueAndPaths[0].binaryValue()); + } + + public void testDocValues() throws IOException { + String json = XContentFactory.jsonBuilder() + .startObject() + .startObject("field") + .field("foo", "bar") + .endObject() + .endObject() + .toString(); + // test dov_values=false + { + DocumentMapper mapperWithDocValues = createDocumentMapper( + fieldMapping(b -> b.field("type", "flat_object").field("doc_values", false)) + ); + ParsedDocument doc = mapperWithDocValues.parse(source(json)); + IndexableField[] fields = doc.rootDoc().getFields("field"); + assertEquals(1, fields.length); + assertEquals(DocValuesType.NONE, fields[0].fieldType().docValuesType()); + + IndexableField[] fieldValues = doc.rootDoc().getFields("field" + VALUE_SUFFIX); + assertEquals(1, fieldValues.length); + assertTrue(fieldValues[0] instanceof KeywordFieldMapper.KeywordField); + assertEquals(new BytesRef("bar"), fieldValues[0].binaryValue()); + + IndexableField[] fieldValueAndPaths = doc.rootDoc().getFields("field" + VALUE_AND_PATH_SUFFIX); + assertEquals(1, fieldValueAndPaths.length); + assertTrue(fieldValues[0] instanceof KeywordFieldMapper.KeywordField); + assertEquals(new BytesRef("bar"), fieldValues[0].binaryValue()); + } + + // test dov_values=true + { + DocumentMapper mapperWithDocValues = createDocumentMapper( + fieldMapping(b -> b.field("type", "flat_object").field("doc_values", true)) + ); + ParsedDocument doc = mapperWithDocValues.parse(source(json)); + IndexableField[] fields = doc.rootDoc().getFields("field"); + assertEquals(2, fields.length); + assertEquals(DocValuesType.SORTED_SET, fields[1].fieldType().docValuesType()); + assertEquals(new BytesRef("field.foo"), fields[0].binaryValue()); + assertEquals(new BytesRef("field.foo"), fields[1].binaryValue()); + + IndexableField[] fieldValues = doc.rootDoc().getFields("field" + VALUE_SUFFIX); + assertEquals(2, fieldValues.length); + assertTrue(fieldValues[0] instanceof KeywordFieldMapper.KeywordField); + assertEquals(new BytesRef("bar"), fieldValues[0].binaryValue()); + assertEquals(new BytesRef("field.bar"), fieldValues[1].binaryValue()); + + IndexableField[] fieldValueAndPaths = doc.rootDoc().getFields("field" + VALUE_AND_PATH_SUFFIX); + assertEquals(2, fieldValueAndPaths.length); + assertTrue(fieldValueAndPaths[0] instanceof KeywordFieldMapper.KeywordField); + assertEquals(new BytesRef("field.foo=bar"), fieldValueAndPaths[0].binaryValue()); + assertEquals(new BytesRef("field.field.foo=bar"), fieldValueAndPaths[1].binaryValue()); + } + } + + public void testNormalizer() throws IOException { + DocumentMapper mapper = createDocumentMapper(fieldMapping(b -> b.field("type", "flat_object").field("normalizer", "lowercase"))); + String json = XContentFactory.jsonBuilder() + .startObject() + .startObject("field") + .field("Foo", "Bar") + .endObject() + .endObject() + .toString(); + ParsedDocument doc = mapper.parse(source(json)); + IndexableField[] fields = doc.rootDoc().getFields("field"); + assertEquals(2, fields.length); + assertEquals(new BytesRef("field.foo"), fields[0].binaryValue()); + IndexableFieldType fieldType = fields[0].fieldType(); + assertThat(fieldType.indexOptions(), Matchers.equalTo(IndexOptions.DOCS)); + assertEquals(DocValuesType.NONE, fieldType.docValuesType()); + + IndexableField[] fieldValues = doc.rootDoc().getFields("field" + VALUE_SUFFIX); + assertEquals(2, fieldValues.length); + assertEquals(new BytesRef("bar"), fieldValues[0].binaryValue()); + + IndexableField[] fieldValueAndPaths = doc.rootDoc().getFields("field" + VALUE_AND_PATH_SUFFIX); + assertEquals(2, fieldValueAndPaths.length); + assertTrue(fieldValueAndPaths[0] instanceof KeywordFieldMapper.KeywordField); + assertEquals(new BytesRef("field.foo=bar"), fieldValueAndPaths[0].binaryValue()); + } + + public void testDepthLimit() throws IOException { + final DocumentMapper mapper = createDocumentMapper(fieldMapping(b -> b.field("type", "flat_object").field("depth_limit", "1"))); + String json = XContentFactory.jsonBuilder() + .startObject() + .startObject("field") + .field("Foo", "Bar") + .endObject() + .endObject() + .toString(); + ParsedDocument doc = mapper.parse(source(json)); + IndexableField[] fields = doc.rootDoc().getFields("field"); + assertEquals(2, fields.length); + assertEquals(new BytesRef("field.Foo"), fields[0].binaryValue()); + IndexableFieldType fieldType = fields[0].fieldType(); + assertThat(fieldType.indexOptions(), Matchers.equalTo(IndexOptions.DOCS)); + assertEquals(DocValuesType.NONE, fieldType.docValuesType()); + + IndexableField[] fieldValues = doc.rootDoc().getFields("field" + VALUE_SUFFIX); + assertEquals(2, fieldValues.length); + assertEquals(new BytesRef("Bar"), fieldValues[0].binaryValue()); + + IndexableField[] fieldValueAndPaths = doc.rootDoc().getFields("field" + VALUE_AND_PATH_SUFFIX); + assertEquals(2, fieldValueAndPaths.length); + assertTrue(fieldValueAndPaths[0] instanceof KeywordFieldMapper.KeywordField); + assertEquals(new BytesRef("field.Foo=Bar"), fieldValueAndPaths[0].binaryValue()); + + // beyond depth_limit + String json1 = XContentFactory.jsonBuilder() + .startObject() + .startObject("field") + .startObject("field1") + .field("Foo", "Bar") + .endObject() + .endObject() + .endObject() + .toString(); + + MapperParsingException e = expectThrows(MapperParsingException.class, () -> mapper.parse(source(json1))); + assertThat( + e.getRootCause().getMessage(), + Matchers.containsString("the depth of flat_object field path [field, field1] is bigger than maximum depth [1]") + ); + + // test negative depth_limit + e = expectThrows( + MapperParsingException.class, + () -> createDocumentMapper(fieldMapping(b -> b.field("type", "flat_object").field("depth_limit", "-1"))) + ); + assertThat(e.getRootCause().getMessage(), Matchers.containsString("[depth_limit] must be positive, got -1")); + + } + + public void testUpdateNormalizer() throws IOException { + MapperService mapperService = createMapperService( + fieldMapping(b -> b.field("type", "flat_object").field("normalizer", "lowercase")) + ); + IllegalArgumentException e = expectThrows( + IllegalArgumentException.class, + () -> merge(mapperService, fieldMapping(b -> b.field("type", "flat_object").field("normalizer", "other_lowercase"))) + ); + assertEquals( + "Mapper for [field] conflicts with existing mapping:\n" + + "[mapper [field] has different [analyzer], mapper [field] has different [normalizer]]", + e.getMessage() + ); + } + + public void testConfigureSimilarity() throws IOException { + MapperService mapperService = createMapperService(fieldMapping(b -> b.field("type", "flat_object").field("similarity", "boolean"))); + MappedFieldType ft = mapperService.documentMapper().fieldTypes().get("field"); + assertEquals("boolean", ft.getTextSearchInfo().getSimilarity().name()); + + IllegalArgumentException e = expectThrows( + IllegalArgumentException.class, + () -> merge(mapperService, fieldMapping(b -> b.field("type", "flat_object").field("similarity", "BM25"))) + ); + assertThat(e.getMessage(), Matchers.containsString("mapper [field] has different [similarity]")); + } + @Override protected void registerParameters(ParameterChecker checker) throws IOException { - // In the future we will want to make sure parameter updates are covered. + checker.registerConflictCheck(DOC_VALUES, b -> b.field(DOC_VALUES, false)); + checker.registerConflictCheck(NORMALIZER, b -> b.field(NORMALIZER, "lowercase")); + checker.registerConflictCheck(DEPTH_LIMIT, b -> b.field(DEPTH_LIMIT, 34)); + checker.registerConflictCheck(IGNORE_ABOVE, b -> b.field(IGNORE_ABOVE, 256)); + checker.registerConflictCheck(SIMILARITY, b -> b.field(SIMILARITY, "boolean")); + } + + @Override + protected IndexAnalyzers createIndexAnalyzers(IndexSettings indexSettings) { + return new IndexAnalyzers( + singletonMap("default", new NamedAnalyzer("default", AnalyzerScope.INDEX, new StandardAnalyzer())), + Map.of( + "lowercase", + new NamedAnalyzer("lowercase", AnalyzerScope.INDEX, new LowercaseNormalizer()), + "other_lowercase", + new NamedAnalyzer("other_lowercase", AnalyzerScope.INDEX, new LowercaseNormalizer()) + ), + singletonMap( + "lowercase", + new NamedAnalyzer( + "lowercase", + AnalyzerScope.INDEX, + new CustomAnalyzer( + TokenizerFactory.newFactory("lowercase", WhitespaceTokenizer::new), + new CharFilterFactory[0], + new TokenFilterFactory[] { new TokenFilterFactory() { + + @Override + public String name() { + return "lowercase"; + } + + @Override + public TokenStream create(TokenStream tokenStream) { + return new LowerCaseFilter(tokenStream); + } + } } + ) + ) + ) + ); } } diff --git a/server/src/test/java/org/opensearch/index/mapper/FlatObjectFieldTypeTests.java b/server/src/test/java/org/opensearch/index/mapper/FlatObjectFieldTypeTests.java index 38a6f13777f00..8a71f61f0ab4a 100644 --- a/server/src/test/java/org/opensearch/index/mapper/FlatObjectFieldTypeTests.java +++ b/server/src/test/java/org/opensearch/index/mapper/FlatObjectFieldTypeTests.java @@ -24,6 +24,7 @@ import org.apache.lucene.search.WildcardQuery; import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.automaton.Operations; +import org.opensearch.common.lucene.Lucene; import org.opensearch.common.unit.Fuzziness; import org.opensearch.index.analysis.AnalyzerScope; import org.opensearch.index.analysis.NamedAnalyzer; @@ -49,13 +50,15 @@ private static MappedFieldType getFlatParentFieldType( boolean isSearchable, boolean hasDocValues ) { - FlatObjectFieldMapper.Builder builder = new FlatObjectFieldMapper.Builder(fieldName); FlatObjectFieldMapper.FlatObjectFieldType flatObjectFieldType = new FlatObjectFieldMapper.FlatObjectFieldType( fieldName, mappedFieldTypeName, isSearchable, hasDocValues, + Integer.MAX_VALUE, null, + Lucene.KEYWORD_ANALYZER, + Lucene.KEYWORD_ANALYZER, Collections.emptyMap() ); FieldType fieldtype = new FieldType(FlatObjectFieldMapper.Defaults.FIELD_TYPE); @@ -100,7 +103,11 @@ public void testDirectSubfield() { "bar", flatParentFieldType.name(), flatParentFieldType.getValueFieldType(), - flatParentFieldType.getValueAndPathFieldType() + flatParentFieldType.getValueAndPathFieldType(), + null, + Lucene.KEYWORD_ANALYZER, + Lucene.KEYWORD_ANALYZER, + Integer.MAX_VALUE ); // when searching for "foo" in "field.bar", the directSubfield is field._valueAndPath field String searchFieldNameDocPath = ((FlatObjectFieldMapper.FlatObjectFieldType) dynamicMappedFieldType).directSubfield(); @@ -108,7 +115,17 @@ public void testDirectSubfield() { } { NamedAnalyzer analyzer = new NamedAnalyzer("default", AnalyzerScope.INDEX, null); - MappedFieldType ft = new FlatObjectFieldMapper.FlatObjectFieldType("field", null, true, true, analyzer, Collections.emptyMap()); + MappedFieldType ft = new FlatObjectFieldMapper.FlatObjectFieldType( + "field", + null, + true, + true, + Integer.MAX_VALUE, + null, + analyzer, + analyzer, + Collections.emptyMap() + ); assertEquals("field._value", ((FlatObjectFieldMapper.FlatObjectFieldType) ft).directSubfield()); } } @@ -129,7 +146,11 @@ public void testRewriteValue() { "field.bar", flatParentFieldType.name(), flatParentFieldType.getValueFieldType(), - flatParentFieldType.getValueAndPathFieldType() + flatParentFieldType.getValueAndPathFieldType(), + null, + Lucene.KEYWORD_ANALYZER, + Lucene.KEYWORD_ANALYZER, + Integer.MAX_VALUE ); // when searching for "foo" in "field.bar", the rewrite value is "field.bar=foo" @@ -157,7 +178,11 @@ public void testTermQuery() { "field.bar", flatParentFieldType.name(), flatParentFieldType.getValueFieldType(), - flatParentFieldType.getValueAndPathFieldType() + flatParentFieldType.getValueAndPathFieldType(), + null, + Lucene.KEYWORD_ANALYZER, + Lucene.KEYWORD_ANALYZER, + Integer.MAX_VALUE ); // when searching for "foo" in "field.bar", the term query is directed to search in field._valueAndPath field @@ -171,7 +196,10 @@ public void testTermQuery() { null, false, true, + Integer.MAX_VALUE, null, + Lucene.KEYWORD_ANALYZER, + Lucene.KEYWORD_ANALYZER, Collections.emptyMap() ); IllegalArgumentException e = expectThrows( @@ -197,7 +225,11 @@ public void testExistsQuery() { "field.bar", ft.name(), ft.getValueFieldType(), - ft.getValueAndPathFieldType() + ft.getValueAndPathFieldType(), + null, + Lucene.KEYWORD_ANALYZER, + Lucene.KEYWORD_ANALYZER, + Integer.MAX_VALUE ); assertEquals(new TermQuery(new Term("field", "field.bar")), dynamicMappedFieldType.existsQuery(null)); @@ -209,7 +241,10 @@ public void testExistsQuery() { null, true, false, + Integer.MAX_VALUE, null, + Lucene.KEYWORD_ANALYZER, + Lucene.KEYWORD_ANALYZER, Collections.emptyMap() ); assertEquals(new TermQuery(new Term(FieldNamesFieldMapper.NAME, "field")), ft.existsQuery(MOCK_QSC_ENABLE_INDEX_DOC_VALUES));