diff --git a/docs/changelog/107640.yaml b/docs/changelog/107640.yaml
new file mode 100644
index 0000000000000..9871943481f20
--- /dev/null
+++ b/docs/changelog/107640.yaml
@@ -0,0 +1,6 @@
+pr: 107640
+summary: "Unified Highlighter to support matched_fields "
+area: Highlighting
+type: enhancement
+issues:
+ - 5172
diff --git a/docs/reference/search/search-your-data/highlighting.asciidoc b/docs/reference/search/search-your-data/highlighting.asciidoc
index 55e737eb00197..7ee13d971b035 100644
--- a/docs/reference/search/search-your-data/highlighting.asciidoc
+++ b/docs/reference/search/search-your-data/highlighting.asciidoc
@@ -46,8 +46,9 @@ for each field.
The `unified` highlighter uses the Lucene Unified Highlighter. This
highlighter breaks the text into sentences and uses the BM25 algorithm to score
individual sentences as if they were documents in the corpus. It also supports
-accurate phrase and multi-term (fuzzy, prefix, regex) highlighting. This is the
-default highlighter.
+accurate phrase and multi-term (fuzzy, prefix, regex) highlighting. The `unified`
+highlighter can combine matches from multiple fields into one result (see
+`matched_fields`). This is the default highlighter.
[discrete]
[[plain-highlighter]]
@@ -199,10 +200,27 @@ include the search query as part of the `highlight_query`.
matched_fields:: Combine matches on multiple fields to highlight a single field.
This is most intuitive for multifields that analyze the same string in different
-ways. All `matched_fields` must have `term_vector` set to
-`with_positions_offsets`, but only the field to which
-the matches are combined is loaded so only that field benefits from having
-`store` set to `yes`. Only valid for the `fvh` highlighter.
+ways. Valid for the `unified` and fvh` highlighters, but the behavior of this
+option is different for each highlighter.
+
+For the `unified` highlighter:
+
+- `matched_fields` array should **not** contain the original field that you want to highlight. The
+original field will be automatically added to the `matched_fields`, and there is no
+way to exclude its matches when highlighting.
+- `matched_fields` and the original field can be indexed with different strategies (with or
+without `offsets`, with or without `term_vectors`).
+- only the original field to which the matches are combined is loaded so only that field
+benefits from having `store` set to `yes`
+
+For the `fvh` highlighter:
+
+- `matched_fields` array may or may not contain the original field
+depending on your needs. If you want to include the original field's matches in
+highlighting, add it to the `matched_fields` array.
+- all `matched_fields` must have `term_vector` set to `with_positions_offsets`
+- only the original field to which the matches are combined is loaded so only that field
+benefits from having `store` set to `yes`.
no_match_size:: The amount of text you want to return from the beginning
of the field if there are no matching fragments to highlight. Defaults
@@ -498,100 +516,14 @@ GET /_search
[discrete]
=== Combine matches on multiple fields
-WARNING: This is only supported by the `fvh` highlighter
+WARNING: Supported by the `unified` and `fvh` highlighters.
-The Fast Vector Highlighter can combine matches on multiple fields to
+The Unified and Fast Vector Highlighter can combine matches on multiple fields to
highlight a single field. This is most intuitive for multifields that
-analyze the same string in different ways. All `matched_fields` must have
-`term_vector` set to `with_positions_offsets` but only the field to which
-the matches are combined is loaded so only that field would benefit from having
-`store` set to `yes`.
-
-In the following examples, `comment` is analyzed by the `english`
-analyzer and `comment.plain` is analyzed by the `standard` analyzer.
-
-[source,console]
---------------------------------------------------
-GET /_search
-{
- "query": {
- "query_string": {
- "query": "comment.plain:running scissors",
- "fields": [ "comment" ]
- }
- },
- "highlight": {
- "order": "score",
- "fields": {
- "comment": {
- "matched_fields": [ "comment", "comment.plain" ],
- "type": "fvh"
- }
- }
- }
-}
---------------------------------------------------
-// TEST[setup:my_index]
-
-The above matches both "run with scissors" and "running with scissors"
-and would highlight "running" and "scissors" but not "run". If both
-phrases appear in a large document then "running with scissors" is
-sorted above "run with scissors" in the fragments list because there
-are more matches in that fragment.
-
-[source,console]
---------------------------------------------------
-GET /_search
-{
- "query": {
- "query_string": {
- "query": "running scissors",
- "fields": ["comment", "comment.plain^10"]
- }
- },
- "highlight": {
- "order": "score",
- "fields": {
- "comment": {
- "matched_fields": ["comment", "comment.plain"],
- "type" : "fvh"
- }
- }
- }
-}
---------------------------------------------------
-// TEST[setup:my_index]
+analyze the same string in different ways.
-The above highlights "run" as well as "running" and "scissors" but
-still sorts "running with scissors" above "run with scissors" because
-the plain match ("running") is boosted.
+include::{es-ref-dir}/tab-widgets/highlighting-multi-fields-widget.asciidoc[]
-[source,console]
---------------------------------------------------
-GET /_search
-{
- "query": {
- "query_string": {
- "query": "running scissors",
- "fields": [ "comment", "comment.plain^10" ]
- }
- },
- "highlight": {
- "order": "score",
- "fields": {
- "comment": {
- "matched_fields": [ "comment.plain" ],
- "type": "fvh"
- }
- }
- }
-}
---------------------------------------------------
-// TEST[setup:my_index]
-
-The above query wouldn't highlight "run" or "scissor" but shows that
-it is just fine not to list the field to which the matches are combined
-(`comment`) in the matched fields.
[NOTE]
Technically it is also fine to add fields to `matched_fields` that
@@ -599,32 +531,6 @@ don't share the same underlying string as the field to which the matches
are combined. The results might not make much sense and if one of the
matches is off the end of the text then the whole query will fail.
-[NOTE]
-===================================================================
-There is a small amount of overhead involved with setting
-`matched_fields` to a non-empty array so always prefer
-[source,js]
---------------------------------------------------
- "highlight": {
- "fields": {
- "comment": {}
- }
- }
---------------------------------------------------
-// NOTCONSOLE
-to
-[source,js]
---------------------------------------------------
- "highlight": {
- "fields": {
- "comment": {
- "matched_fields": ["comment"],
- "type" : "fvh"
- }
- }
- }
---------------------------------------------------
-// NOTCONSOLE
===================================================================
diff --git a/docs/reference/tab-widgets/highlighting-multi-fields-widget.asciidoc b/docs/reference/tab-widgets/highlighting-multi-fields-widget.asciidoc
new file mode 100644
index 0000000000000..e307ea002f542
--- /dev/null
+++ b/docs/reference/tab-widgets/highlighting-multi-fields-widget.asciidoc
@@ -0,0 +1,40 @@
+++++
+
+
+
+ Unified
+
+
+ FVH
+
+
+
+++++
+
+include::highlighting-multi-fields.asciidoc[tag=unified]
+
+++++
+
+
+++++
+
+include::highlighting-multi-fields.asciidoc[tag=fvh]
+
+++++
+
+
+++++
diff --git a/docs/reference/tab-widgets/highlighting-multi-fields.asciidoc b/docs/reference/tab-widgets/highlighting-multi-fields.asciidoc
new file mode 100644
index 0000000000000..5af85f33c99fa
--- /dev/null
+++ b/docs/reference/tab-widgets/highlighting-multi-fields.asciidoc
@@ -0,0 +1,465 @@
+// tag::unified[]
+
+In the following examples, `comment` is analyzed by the `standard`
+analyzer and `comment.english` is analyzed by the `english` analyzer.
+
+[source,console]
+--------------------------------------------------
+PUT index1
+{
+ "mappings": {
+ "properties": {
+ "comment": {
+ "type": "text",
+ "analyzer": "standard",
+ "fields": {
+ "english": {
+ "type": "text",
+ "analyzer": "english"
+ }
+ }
+ }
+ }
+ }
+}
+--------------------------------------------------
+
+
+[source,console]
+--------------------------------------------------
+PUT index1/_bulk?refresh=true
+{"index": {"_id": "doc1" }}
+{"comment": "run with scissors"}
+{ "index" : {"_id": "doc2"} }
+{"comment": "running with scissors"}
+
+--------------------------------------------------
+// TEST[continued]
+
+
+[source,console]
+--------------------------------------------------
+GET index1/_search
+{
+ "query": {
+ "query_string": {
+ "query": "running with scissors",
+ "fields": ["comment", "comment.english"]
+ }
+ },
+ "highlight": {
+ "order": "score",
+ "fields": {
+ "comment": {}
+ }
+ }
+}
+--------------------------------------------------
+// TEST[continued]
+
+The above request matches both "run with scissors" and "running with scissors"
+and would highlight "running" and "scissors" but not "run". If both
+phrases appear in a large document then "running with scissors" is
+sorted above "run with scissors" in the fragments list because there
+are more matches in that fragment.
+
+[source,console-result]
+----
+{
+ ...
+ "hits" : {
+ "total" : {
+ "value" : 2,
+ "relation" : "eq"
+ },
+ "max_score": 1.0577903,
+ "hits" : [
+ {
+ "_index" : "index1",
+ "_id" : "doc2",
+ "_score" : 1.0577903,
+ "_source" : {
+ "comment" : "running with scissors"
+ },
+ "highlight" : {
+ "comment" : [
+ "running with scissors "
+ ]
+ }
+ },
+ {
+ "_index" : "index1",
+ "_id" : "doc1",
+ "_score" : 0.36464313,
+ "_source" : {
+ "comment" : "run with scissors"
+ },
+ "highlight" : {
+ "comment" : [
+ "run with scissors "
+ ]
+ }
+ }
+ ]
+ }
+}
+----
+// TESTRESPONSE[s/\.\.\./"took" : $body.took,"timed_out" : $body.timed_out,"_shards" : $body._shards,/]
+
+The below request highlights "run" as well as "running" and "scissors",
+because the `matched_fields` parameter instructs that for highlighting
+we need to combine matches from the `comment.english` field with
+the matches from the original `comment` field.
+
+[source,console]
+--------------------------------------------------
+GET index1/_search
+{
+ "query": {
+ "query_string": {
+ "query": "running with scissors",
+ "fields": ["comment", "comment.english"]
+ }
+ },
+ "highlight": {
+ "order": "score",
+ "fields": {
+ "comment": {
+ "matched_fields": ["comment.english"]
+ }
+ }
+ }
+}
+--------------------------------------------------
+// TEST[continued]
+
+[source,console-result]
+----
+{
+ ...
+ "hits" : {
+ "total" : {
+ "value" : 2,
+ "relation" : "eq"
+ },
+ "max_score": 1.0577903,
+ "hits" : [
+ {
+ "_index" : "index1",
+ "_id" : "doc2",
+ "_score" : 1.0577903,
+ "_source" : {
+ "comment" : "running with scissors"
+ },
+ "highlight" : {
+ "comment" : [
+ "running with scissors "
+ ]
+ }
+ },
+ {
+ "_index" : "index1",
+ "_id" : "doc1",
+ "_score" : 0.36464313,
+ "_source" : {
+ "comment" : "run with scissors"
+ },
+ "highlight" : {
+ "comment" : [
+ "run with scissors "
+ ]
+ }
+ }
+ ]
+ }
+}
+----
+// TESTRESPONSE[s/\.\.\./"took" : $body.took,"timed_out" : $body.timed_out,"_shards" : $body._shards,/]
+
+// end::unified[]
+
+
+
+
+
+// tag::fvh[]
+
+In the following examples, `comment` is analyzed by the `standard`
+analyzer and `comment.english` is analyzed by the `english` analyzer.
+
+[source,console]
+--------------------------------------------------
+PUT index2
+{
+ "mappings": {
+ "properties": {
+ "comment": {
+ "type": "text",
+ "analyzer": "standard",
+ "term_vector": "with_positions_offsets",
+ "fields": {
+ "english": {
+ "type": "text",
+ "analyzer": "english",
+ "term_vector": "with_positions_offsets"
+ }
+ }
+ }
+ }
+ }
+}
+--------------------------------------------------
+
+
+[source,console]
+--------------------------------------------------
+PUT index2/_bulk?refresh=true
+{"index": {"_id": "doc1" }}
+{"comment": "run with scissors"}
+{ "index" : {"_id": "doc2"} }
+{"comment": "running with scissors"}
+
+--------------------------------------------------
+// TEST[continued]
+
+
+[source,console]
+--------------------------------------------------
+GET index2/_search
+{
+ "query": {
+ "query_string": {
+ "query": "running with scissors",
+ "fields": ["comment", "comment.english"]
+ }
+ },
+ "highlight": {
+ "order": "score",
+ "fields": {
+ "comment": {
+ "type" : "fvh"
+ }
+ }
+ }
+}
+--------------------------------------------------
+// TEST[continued]
+
+The above request matches both "run with scissors" and "running with scissors"
+and would highlight "running" and "scissors" but not "run". If both
+phrases appear in a large document then "running with scissors" is
+sorted above "run with scissors" in the fragments list because there
+are more matches in that fragment.
+
+[source,console-result]
+----
+{
+ ...
+ "hits" : {
+ "total" : {
+ "value" : 2,
+ "relation" : "eq"
+ },
+ "max_score": 1.0577903,
+ "hits" : [
+ {
+ "_index" : "index2",
+ "_id" : "doc2",
+ "_score" : 1.0577903,
+ "_source" : {
+ "comment" : "running with scissors"
+ },
+ "highlight" : {
+ "comment" : [
+ "running with scissors "
+ ]
+ }
+ },
+ {
+ "_index" : "index2",
+ "_id" : "doc1",
+ "_score" : 0.36464313,
+ "_source" : {
+ "comment" : "run with scissors"
+ },
+ "highlight" : {
+ "comment" : [
+ "run with scissors "
+ ]
+ }
+ }
+ ]
+ }
+}
+----
+// TESTRESPONSE[s/\.\.\./"took" : $body.took,"timed_out" : $body.timed_out,"_shards" : $body._shards,/]
+
+The below request highlights "run" as well as "running" and "scissors",
+because the `matched_fields` parameter instructs that for highlighting
+we need to combine matches from the `comment` and `comment.english` fields.
+
+[source,console]
+--------------------------------------------------
+GET index2/_search
+{
+ "query": {
+ "query_string": {
+ "query": "running with scissors",
+ "fields": ["comment", "comment.english"]
+ }
+ },
+ "highlight": {
+ "order": "score",
+ "fields": {
+ "comment": {
+ "type" : "fvh",
+ "matched_fields": ["comment", "comment.english"]
+ }
+ }
+ }
+}
+--------------------------------------------------
+// TEST[continued]
+
+[source,console-result]
+----
+{
+ ...
+ "hits" : {
+ "total" : {
+ "value" : 2,
+ "relation" : "eq"
+ },
+ "max_score": 1.0577903,
+ "hits" : [
+ {
+ "_index" : "index2",
+ "_id" : "doc2",
+ "_score" : 1.0577903,
+ "_source" : {
+ "comment" : "running with scissors"
+ },
+ "highlight" : {
+ "comment" : [
+ "running with scissors "
+ ]
+ }
+ },
+ {
+ "_index" : "index2",
+ "_id" : "doc1",
+ "_score" : 0.36464313,
+ "_source" : {
+ "comment" : "run with scissors"
+ },
+ "highlight" : {
+ "comment" : [
+ "run with scissors "
+ ]
+ }
+ }
+ ]
+ }
+}
+----
+// TESTRESPONSE[s/\.\.\./"took" : $body.took,"timed_out" : $body.timed_out,"_shards" : $body._shards,/]
+
+The below request wouldn't highlight "run" or "scissor" but shows that
+it is just fine not to list the field to which the matches are combined
+(`comment.english`) in the matched fields.
+
+[source,console]
+--------------------------------------------------
+GET index2/_search
+{
+ "query": {
+ "query_string": {
+ "query": "running with scissors",
+ "fields": ["comment", "comment.english"]
+ }
+ },
+ "highlight": {
+ "order": "score",
+ "fields": {
+ "comment.english": {
+ "type" : "fvh",
+ "matched_fields": ["comment"]
+ }
+ }
+ }
+}
+--------------------------------------------------
+// TEST[continued]
+
+
+[source,console-result]
+----
+{
+ ...
+ "hits" : {
+ "total" : {
+ "value" : 2,
+ "relation" : "eq"
+ },
+ "max_score": 1.0577903,
+ "hits" : [
+ {
+ "_index" : "index2",
+ "_id" : "doc2",
+ "_score" : 1.0577903,
+ "_source" : {
+ "comment" : "running with scissors"
+ },
+ "highlight" : {
+ "comment.english" : [
+ "running with scissors "
+ ]
+ }
+ },
+ {
+ "_index" : "index2",
+ "_id" : "doc1",
+ "_score" : 0.36464313,
+ "_source" : {
+ "comment" : "run with scissors"
+ },
+ "highlight" : {
+ "comment.english" : [
+ "run with scissors "
+ ]
+ }
+ }
+ ]
+ }
+}
+----
+// TESTRESPONSE[s/\.\.\./"took" : $body.took,"timed_out" : $body.timed_out,"_shards" : $body._shards,/]
+
+[NOTE]
+===================================================================
+There is a small amount of overhead involved with setting
+`matched_fields` to a non-empty array so always prefer
+[source,js]
+--------------------------------------------------
+ "highlight": {
+ "fields": {
+ "comment": {}
+ }
+ }
+--------------------------------------------------
+// NOTCONSOLE
+to
+[source,js]
+--------------------------------------------------
+ "highlight": {
+ "fields": {
+ "comment": {
+ "matched_fields": ["comment"],
+ "type" : "fvh"
+ }
+ }
+ }
+--------------------------------------------------
+// NOTCONSOLE
+
+// end::fvh[]
diff --git a/qa/ccs-common-rest/src/yamlRestTest/java/org/elasticsearch/test/rest/yaml/CcsCommonYamlTestSuiteIT.java b/qa/ccs-common-rest/src/yamlRestTest/java/org/elasticsearch/test/rest/yaml/CcsCommonYamlTestSuiteIT.java
index a8cff14ff6220..49db5e3a1cd99 100644
--- a/qa/ccs-common-rest/src/yamlRestTest/java/org/elasticsearch/test/rest/yaml/CcsCommonYamlTestSuiteIT.java
+++ b/qa/ccs-common-rest/src/yamlRestTest/java/org/elasticsearch/test/rest/yaml/CcsCommonYamlTestSuiteIT.java
@@ -79,6 +79,7 @@ public class CcsCommonYamlTestSuiteIT extends ESClientYamlSuiteTestCase {
private static LocalClusterConfigProvider commonClusterConfig = cluster -> cluster.module("x-pack-async-search")
.module("aggregations")
+ .module("analysis-common")
.module("mapper-extras")
.module("vector-tile")
.module("x-pack-analytics")
diff --git a/rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/search.highlight/60_unified_matched_fields.yml b/rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/search.highlight/60_unified_matched_fields.yml
new file mode 100644
index 0000000000000..a0abff2d6726f
--- /dev/null
+++ b/rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/search.highlight/60_unified_matched_fields.yml
@@ -0,0 +1,108 @@
+setup:
+ - requires:
+ cluster_features: 'unified_highlighter_matched_fields'
+ reason: 'test requires unified highlighter to support matched_fields'
+
+ - do:
+ indices.create:
+ index: index1
+ body:
+ settings:
+ index:
+ number_of_shards: 1
+ number_of_replicas: 0
+ analysis:
+ filter:
+ my_edge_ngram:
+ type: edge_ngram
+ min_gram: 2
+ max_gram: 20
+ analyzer:
+ my_analyzer:
+ tokenizer: whitespace
+ filter: [ my_edge_ngram ]
+ mappings:
+ properties:
+ title:
+ type: text
+ fields:
+ english:
+ type: text
+ analyzer: english
+ ngram:
+ type: text
+ analyzer: my_analyzer
+ body :
+ type: text
+
+
+ - do:
+ bulk:
+ refresh: true
+ index: index1
+ body:
+ - '{"index": {"_id": 1 }}'
+ - '{"title": "dancing with the stars", "body": "Dancing with the Stars is a popular TV show"}'
+ - '{"index": {"_id": 2 }}'
+ - '{"title": "dance with star", "body": "Dancing with the Stars is a popular TV show"}'
+
+---
+"Highlight based on single masked field":
+ - do:
+ search:
+ index: index1
+ body:
+ query:
+ query_string:
+ query: "\"dancing with the stars\""
+ fields: ["title^5", "title.english"]
+ phrase_slop: 2
+ highlight:
+ fields:
+ title:
+ matched_fields: ["title.english"]
+
+ - length: {hits.hits: 2}
+ - match: {hits.hits.0.highlight.title.0: "dancing with the stars "}
+ - match: {hits.hits.1.highlight.title.0: "dance with star "}
+
+---
+"Highlight based on multiple masked fields":
+ - do:
+ search:
+ index: index1
+ body:
+ query:
+ query_string:
+ query: "dan with star"
+ fields: ["title^5", "title.ngram", "title.english"]
+ highlight:
+ fields:
+ title:
+ matched_fields: ["title.ngram", "title.english"]
+
+ - length: {hits.hits: 2}
+ - match: {hits.hits.0.highlight.title.0: "dance with star " }
+ - match: {hits.hits.1.highlight.title.0: "dancing with the stars "}
+
+
+---
+"Highlight using matched_fields is not allowed when require_field_match is set to false":
+ - do:
+ catch: bad_request
+ search:
+ index: index1
+ body:
+ query:
+ query_string:
+ query: "dan with star"
+ fields: ["title^5", "title.ngram", "title.english"]
+ highlight:
+ require_field_match: false
+ fields:
+ title:
+ matched_fields: ["title.ngram", "title.english"]
+
+ - match: { status: 400 }
+ - match: { error.root_cause.0.type: "illegal_argument_exception" }
+ - match: { error.root_cause.0.reason: "Matched fields are not supported when [require_field_match] is set to [false]" }
diff --git a/server/src/internalClusterTest/java/org/elasticsearch/search/fetch/subphase/highlight/HighlighterSearchIT.java b/server/src/internalClusterTest/java/org/elasticsearch/search/fetch/subphase/highlight/HighlighterSearchIT.java
index ab72dbd4db707..0a6fceea9a3f1 100644
--- a/server/src/internalClusterTest/java/org/elasticsearch/search/fetch/subphase/highlight/HighlighterSearchIT.java
+++ b/server/src/internalClusterTest/java/org/elasticsearch/search/fetch/subphase/highlight/HighlighterSearchIT.java
@@ -1037,14 +1037,19 @@ public void testFVHManyMatches() throws Exception {
}
public void testMatchedFieldsFvhRequireFieldMatch() throws Exception {
- checkMatchedFieldsCase(true);
+ checkMatchedFieldsCase(true, "fvh");
}
public void testMatchedFieldsFvhNoRequireFieldMatch() throws Exception {
- checkMatchedFieldsCase(false);
+ checkMatchedFieldsCase(false, "fvh");
}
- private void checkMatchedFieldsCase(boolean requireFieldMatch) throws Exception {
+ public void testMatchedFieldsUnified() throws Exception {
+ // unified highlighter requires that "require_field_match" is true when matched fields are used
+ checkMatchedFieldsCase(true, "unified");
+ }
+
+ private void checkMatchedFieldsCase(boolean requireFieldMatch, String type) throws Exception {
Settings.Builder settings = Settings.builder();
settings.put(indexSettings());
settings.put("index.analysis.analyzer.mock_english.tokenizer", "standard");
@@ -1104,7 +1109,7 @@ private void checkMatchedFieldsCase(boolean requireFieldMatch) throws Exception
Field fooField = new Field("foo").numOfFragments(1)
.order("score")
.fragmentSize(25)
- .highlighterType("fvh")
+ .highlighterType(type)
.requireFieldMatch(requireFieldMatch);
SearchRequestBuilder req = prepareSearch("test").highlighter(new HighlightBuilder().field(fooField));
@@ -1125,7 +1130,7 @@ private void checkMatchedFieldsCase(boolean requireFieldMatch) throws Exception
fooField = new Field("foo").numOfFragments(1)
.order("score")
.fragmentSize(25)
- .highlighterType("fvh")
+ .highlighterType(type)
.requireFieldMatch(requireFieldMatch);
fooField.matchedFields("foo", "foo.plain");
req = prepareSearch("test").highlighter(new HighlightBuilder().field(fooField));
@@ -1144,20 +1149,22 @@ private void checkMatchedFieldsCase(boolean requireFieldMatch) throws Exception
fooField = new Field("foo").numOfFragments(1)
.order("score")
.fragmentSize(25)
- .highlighterType("fvh")
+ .highlighterType(type)
.requireFieldMatch(requireFieldMatch);
fooField.matchedFields("foo.plain");
req = prepareSearch("test").highlighter(new HighlightBuilder().field(fooField));
+ // unified highlighter always keeps the original field in the list of matched fields
+ String expectedHighlight0 = type.equals("unified") ? "running with scissors " : "running with scissors";
assertResponse(
req.setQuery(queryStringQuery("foo.plain:running scissors").field("foo")),
- response -> assertHighlight(response, 0, "foo", 0, equalTo("running with scissors"))
+ response -> assertHighlight(response, 0, "foo", 0, equalTo(expectedHighlight0))
);
// Now make sure boosted fields don't blow up when matched fields is both the subfield and stored field.
fooField = new Field("foo").numOfFragments(1)
.order("score")
.fragmentSize(25)
- .highlighterType("fvh")
+ .highlighterType(type)
.requireFieldMatch(requireFieldMatch);
fooField.matchedFields("foo", "foo.plain");
req = prepareSearch("test").highlighter(new HighlightBuilder().field(fooField));
@@ -1184,16 +1191,19 @@ private void checkMatchedFieldsCase(boolean requireFieldMatch) throws Exception
response -> assertHighlight(response, 0, "foo", 0, equalTo("running with scissors "))
);
+ // Unified and FVH highlighters break text into fragments differently
+ String expectedHighlight1 = type.equals("unified") ? "junk junk junk cats junk" : "junk junk cats junk junk";
+
// But we use the best found score when sorting fragments
assertResponse(
req.setQuery(queryStringQuery("cats foo.plain:cats^5").field("foo")),
- response -> assertHighlight(response, 0, "foo", 0, equalTo("junk junk cats junk junk"))
+ response -> assertHighlight(response, 0, "foo", 0, equalTo(expectedHighlight1))
);
// which can also be written by searching on the subfield
assertResponse(
req.setQuery(queryStringQuery("cats").field("foo").field("foo.plain", 5)),
- response -> assertHighlight(response, 0, "foo", 0, equalTo("junk junk cats junk junk"))
+ response -> assertHighlight(response, 0, "foo", 0, equalTo(expectedHighlight1))
);
// Speaking of two fields, you can have two fields, only one of which has matchedFields enabled
@@ -1201,23 +1211,23 @@ private void checkMatchedFieldsCase(boolean requireFieldMatch) throws Exception
Field barField = new Field("bar").numOfFragments(1)
.order("score")
.fragmentSize(25)
- .highlighterType("fvh")
+ .highlighterType(type)
.requireFieldMatch(requireFieldMatch);
assertResponse(req.setQuery(twoFieldsQuery).highlighter(new HighlightBuilder().field(fooField).field(barField)), response -> {
- assertHighlight(response, 0, "foo", 0, equalTo("junk junk cats junk junk"));
+ assertHighlight(response, 0, "foo", 0, equalTo(expectedHighlight1));
assertHighlight(response, 0, "bar", 0, equalTo("cat cat junk junk junk junk"));
});
// And you can enable matchedField highlighting on both
barField.matchedFields("bar", "bar.plain");
assertResponse(req.setQuery(twoFieldsQuery).highlighter(new HighlightBuilder().field(fooField).field(barField)), response -> {
- assertHighlight(response, 0, "foo", 0, equalTo("junk junk cats junk junk"));
- assertHighlight(response, 0, "bar", 0, equalTo("junk junk cats junk junk"));
+ assertHighlight(response, 0, "foo", 0, equalTo(expectedHighlight1));
+ assertHighlight(response, 0, "bar", 0, equalTo(expectedHighlight1));
});
// Setting a matchedField that isn't searched/doesn't exist is simply ignored.
barField.matchedFields("bar", "candy");
assertResponse(req.setQuery(twoFieldsQuery).highlighter(new HighlightBuilder().field(fooField).field(barField)), response -> {
- assertHighlight(response, 0, "foo", 0, equalTo("junk junk cats junk junk"));
+ assertHighlight(response, 0, "foo", 0, equalTo(expectedHighlight1));
assertHighlight(response, 0, "bar", 0, equalTo("cat cat junk junk junk junk"));
});
@@ -1233,12 +1243,15 @@ private void checkMatchedFieldsCase(boolean requireFieldMatch) throws Exception
);
// If the stored field is found but the matched field isn't then you don't get a result either.
- fooField.matchedFields("bar.plain");
- assertResponse(
- req.setQuery(queryStringQuery("running scissors").field("foo").field("foo.plain").field("bar").field("bar.plain"))
- .highlighter(new HighlightBuilder().field(fooField).field(barField)),
- response -> assertThat(response.getHits().getAt(0).getHighlightFields(), not(hasKey("foo")))
- );
+ // only applicable to fvh highlighter, as unified highlighter always keeps the original field in the list of matched fields
+ if (type.equals("fvh")) {
+ fooField.matchedFields("bar.plain");
+ assertResponse(
+ req.setQuery(queryStringQuery("running scissors").field("foo").field("foo.plain").field("bar").field("bar.plain"))
+ .highlighter(new HighlightBuilder().field(fooField).field(barField)),
+ response -> assertThat(response.getHits().getAt(0).getHighlightFields(), not(hasKey("foo")))
+ );
+ }
// But if you add the stored field to the list of matched fields then you'll get a result again
fooField.matchedFields("foo", "bar.plain");
@@ -1261,11 +1274,22 @@ private void checkMatchedFieldsCase(boolean requireFieldMatch) throws Exception
}
);
- assertFailures(
- req.setQuery(queryStringQuery("result").field("foo").field("foo.plain").field("bar").field("bar.plain")),
- RestStatus.INTERNAL_SERVER_ERROR,
- containsString("IndexOutOfBoundsException")
- );
+ if (type.equals("unified")) {
+ assertResponse(
+ req.setQuery(queryStringQuery("result").field("foo").field("foo.plain").field("bar").field("bar.plain"))
+ .highlighter(new HighlightBuilder().field(fooField).field(barField)),
+ response -> {
+ assertHighlight(response, 0, "bar", 0, equalTo("result "));
+ }
+ );
+ } else {
+ assertFailures(
+ req.setQuery(queryStringQuery("result").field("foo").field("foo.plain").field("bar").field("bar.plain"))
+ .highlighter(new HighlightBuilder().field(fooField).field(barField)),
+ RestStatus.INTERNAL_SERVER_ERROR,
+ containsString("IndexOutOfBoundsException")
+ );
+ }
}
public void testFastVectorHighlighterManyDocs() throws Exception {
diff --git a/server/src/main/java/org/elasticsearch/rest/RestFeatures.java b/server/src/main/java/org/elasticsearch/rest/RestFeatures.java
index 73b788d63b2ab..93cbd6376cbde 100644
--- a/server/src/main/java/org/elasticsearch/rest/RestFeatures.java
+++ b/server/src/main/java/org/elasticsearch/rest/RestFeatures.java
@@ -14,8 +14,16 @@
import org.elasticsearch.rest.action.admin.cluster.RestClusterGetSettingsAction;
import java.util.Map;
+import java.util.Set;
+
+import static org.elasticsearch.search.fetch.subphase.highlight.DefaultHighlighter.UNIFIED_HIGHLIGHTER_MATCHED_FIELDS;
public class RestFeatures implements FeatureSpecification {
+ @Override
+ public Set getFeatures() {
+ return Set.of(UNIFIED_HIGHLIGHTER_MATCHED_FIELDS);
+ }
+
@Override
public Map getHistoricalFeatures() {
return Map.of(RestClusterGetSettingsAction.SUPPORTS_GET_SETTINGS_ACTION, Version.V_8_3_0);
diff --git a/server/src/main/java/org/elasticsearch/search/fetch/subphase/highlight/DefaultHighlighter.java b/server/src/main/java/org/elasticsearch/search/fetch/subphase/highlight/DefaultHighlighter.java
index da1be48e6b2c0..8f9bca2bbea93 100644
--- a/server/src/main/java/org/elasticsearch/search/fetch/subphase/highlight/DefaultHighlighter.java
+++ b/server/src/main/java/org/elasticsearch/search/fetch/subphase/highlight/DefaultHighlighter.java
@@ -21,6 +21,7 @@
import org.elasticsearch.common.Strings;
import org.elasticsearch.common.lucene.Lucene;
import org.elasticsearch.common.text.Text;
+import org.elasticsearch.features.NodeFeature;
import org.elasticsearch.index.IndexSettings;
import org.elasticsearch.index.mapper.IdFieldMapper;
import org.elasticsearch.index.mapper.MappedFieldType;
@@ -36,15 +37,20 @@
import java.io.IOException;
import java.text.BreakIterator;
import java.util.ArrayList;
+import java.util.Collections;
import java.util.HashMap;
import java.util.List;
import java.util.Locale;
import java.util.Map;
+import java.util.Set;
import java.util.function.Predicate;
import static org.elasticsearch.lucene.search.uhighlight.CustomUnifiedHighlighter.MULTIVAL_SEP_CHAR;
public class DefaultHighlighter implements Highlighter {
+
+ public static final NodeFeature UNIFIED_HIGHLIGHTER_MATCHED_FIELDS = new NodeFeature("unified_highlighter_matched_fields");
+
@Override
public boolean canHighlight(MappedFieldType fieldType) {
return true;
@@ -142,8 +148,18 @@ CustomUnifiedHighlighter buildHighlighter(FieldHighlightContext fieldContext) {
}
Builder builder = UnifiedHighlighter.builder(searcher, analyzer);
builder.withBreakIterator(() -> breakIterator);
- builder.withFieldMatcher(fieldMatcher(fieldContext));
builder.withFormatter(passageFormatter);
+
+ Set matchedFields = fieldContext.field.fieldOptions().matchedFields();
+ if (matchedFields != null && matchedFields.isEmpty() == false) {
+ // Masked fields require that the default field matcher is used
+ if (fieldContext.field.fieldOptions().requireFieldMatch() == false) {
+ throw new IllegalArgumentException("Matched fields are not supported when [require_field_match] is set to [false]");
+ }
+ builder.withMaskedFieldsFunc((fieldName) -> fieldName.equals(fieldContext.fieldName) ? matchedFields : Collections.emptySet());
+ } else {
+ builder.withFieldMatcher(fieldMatcher(fieldContext));
+ }
return new CustomUnifiedHighlighter(
builder,
offsetSource,