From 2337eb05a03ea6943e59079ff7c95313a5522239 Mon Sep 17 00:00:00 2001
From: Mayya Sharipova <mayya.sharipova@elastic.co>
Date: Thu, 9 May 2024 10:35:29 -0400
Subject: [PATCH] Unified Highlighter to support matched_fields (#107640)

Add support to the Unified highlighter to combine matches on multiple fields
to highlight a single field: "matched_fields".

Based on Lucene PR: https://github.com/apache/lucene/pull/13268

Lucene PR is based on the concept of masked fields where masked fields
are different from the original highlighted field. This PR in
Elasticsearch uses the already existing highlighter parameter
"matched_fields".
---
 docs/changelog/107640.yaml                    |   6 +
 .../search-your-data/highlighting.asciidoc    | 150 ++----
 .../highlighting-multi-fields-widget.asciidoc |  40 ++
 .../highlighting-multi-fields.asciidoc        | 465 ++++++++++++++++++
 .../rest/yaml/CcsCommonYamlTestSuiteIT.java   |   1 +
 .../60_unified_matched_fields.yml             | 108 ++++
 .../highlight/HighlighterSearchIT.java        |  76 ++-
 .../org/elasticsearch/rest/RestFeatures.java  |   8 +
 .../highlight/DefaultHighlighter.java         |  18 +-
 9 files changed, 723 insertions(+), 149 deletions(-)
 create mode 100644 docs/changelog/107640.yaml
 create mode 100644 docs/reference/tab-widgets/highlighting-multi-fields-widget.asciidoc
 create mode 100644 docs/reference/tab-widgets/highlighting-multi-fields.asciidoc
 create mode 100644 rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/search.highlight/60_unified_matched_fields.yml

diff --git a/docs/changelog/107640.yaml b/docs/changelog/107640.yaml
new file mode 100644
index 0000000000000..9871943481f20
--- /dev/null
+++ b/docs/changelog/107640.yaml
@@ -0,0 +1,6 @@
+pr: 107640
+summary: "Unified Highlighter to support matched_fields "
+area: Highlighting
+type: enhancement
+issues:
+  - 5172
diff --git a/docs/reference/search/search-your-data/highlighting.asciidoc b/docs/reference/search/search-your-data/highlighting.asciidoc
index 55e737eb00197..7ee13d971b035 100644
--- a/docs/reference/search/search-your-data/highlighting.asciidoc
+++ b/docs/reference/search/search-your-data/highlighting.asciidoc
@@ -46,8 +46,9 @@ for each field.
 The `unified` highlighter uses the Lucene Unified Highlighter. This
 highlighter breaks the text into sentences and uses the BM25 algorithm to score
 individual sentences as if they were documents in the corpus. It also supports
-accurate phrase and multi-term (fuzzy, prefix, regex) highlighting. This is the
-default highlighter.
+accurate phrase and multi-term (fuzzy, prefix, regex) highlighting. The `unified`
+highlighter can combine matches from multiple fields into one result (see
+`matched_fields`). This is the default highlighter.
 
 [discrete]
 [[plain-highlighter]]
@@ -199,10 +200,27 @@ include the search query as part of the `highlight_query`.
 
 matched_fields:: Combine matches on multiple fields to highlight a single field.
 This is most intuitive for multifields that analyze the same string in different
-ways. All `matched_fields` must have `term_vector` set to
-`with_positions_offsets`, but only the field to which
-the matches are combined is loaded so only that field benefits from having
-`store` set to `yes`. Only valid for the `fvh` highlighter.
+ways. Valid for the `unified` and fvh` highlighters, but the behavior of this
+option is different for each highlighter.
+
+For the `unified` highlighter:
+
+- `matched_fields` array should **not** contain the original field that you want to highlight. The
+original field will be automatically added to the `matched_fields`, and there is no
+way to exclude its matches when highlighting.
+- `matched_fields` and the original field can be indexed with different strategies (with or
+without `offsets`, with or without `term_vectors`).
+- only the original field to which the matches are combined is loaded so only that field
+benefits from having `store` set to `yes`
+
+For the `fvh` highlighter:
+
+- `matched_fields` array may or may not contain the original field
+depending on your needs. If you want to include the original field's matches in
+highlighting, add it to the `matched_fields` array.
+- all `matched_fields` must have `term_vector` set to `with_positions_offsets`
+- only the original field to which the matches are combined is loaded so only that field
+benefits from having `store` set to `yes`.
 
 no_match_size:: The amount of text you want to return from the beginning
 of the field if there are no matching fragments to highlight. Defaults
@@ -498,100 +516,14 @@ GET /_search
 [discrete]
 === Combine matches on multiple fields
 
-WARNING: This is only supported by the `fvh` highlighter
+WARNING: Supported by the `unified` and `fvh` highlighters.
 
-The Fast Vector Highlighter can combine matches on multiple fields to
+The Unified and Fast Vector Highlighter can combine matches on multiple fields to
 highlight a single field. This is most intuitive for multifields that
-analyze the same string in different ways. All `matched_fields` must have
-`term_vector` set to `with_positions_offsets` but only the field to which
-the matches are combined is loaded so only that field would benefit from having
-`store` set to `yes`.
-
-In the following examples, `comment` is analyzed by the `english`
-analyzer and `comment.plain` is analyzed by the `standard` analyzer.
-
-[source,console]
---------------------------------------------------
-GET /_search
-{
-  "query": {
-    "query_string": {
-      "query": "comment.plain:running scissors",
-      "fields": [ "comment" ]
-    }
-  },
-  "highlight": {
-    "order": "score",
-    "fields": {
-      "comment": {
-        "matched_fields": [ "comment", "comment.plain" ],
-        "type": "fvh"
-      }
-    }
-  }
-}
---------------------------------------------------
-// TEST[setup:my_index]
-
-The above matches both "run with scissors" and "running with scissors"
-and would highlight "running" and "scissors" but not "run". If both
-phrases appear in a large document then "running with scissors" is
-sorted above "run with scissors" in the fragments list because there
-are more matches in that fragment.
-
-[source,console]
---------------------------------------------------
-GET /_search
-{
-  "query": {
-    "query_string": {
-      "query": "running scissors",
-      "fields": ["comment", "comment.plain^10"]
-    }
-  },
-  "highlight": {
-    "order": "score",
-    "fields": {
-      "comment": {
-        "matched_fields": ["comment", "comment.plain"],
-        "type" : "fvh"
-      }
-    }
-  }
-}
---------------------------------------------------
-// TEST[setup:my_index]
+analyze the same string in different ways.
 
-The above highlights "run" as well as "running" and "scissors" but
-still sorts "running with scissors" above "run with scissors" because
-the plain match ("running") is boosted.
+include::{es-ref-dir}/tab-widgets/highlighting-multi-fields-widget.asciidoc[]
 
-[source,console]
---------------------------------------------------
-GET /_search
-{
-  "query": {
-    "query_string": {
-      "query": "running scissors",
-      "fields": [ "comment", "comment.plain^10" ]
-    }
-  },
-  "highlight": {
-    "order": "score",
-    "fields": {
-      "comment": {
-        "matched_fields": [ "comment.plain" ],
-        "type": "fvh"
-      }
-    }
-  }
-}
---------------------------------------------------
-// TEST[setup:my_index]
-
-The above query wouldn't highlight "run" or "scissor" but shows that
-it is just fine not to list the field to which the matches are combined
-(`comment`) in the matched fields.
 
 [NOTE]
 Technically it is also fine to add fields to `matched_fields` that
@@ -599,32 +531,6 @@ don't share the same underlying string as the field to which the matches
 are combined. The results might not make much sense and if one of the
 matches is off the end of the text then the whole query will fail.
 
-[NOTE]
-===================================================================
-There is a small amount of overhead involved with setting
-`matched_fields` to a non-empty array so always prefer
-[source,js]
---------------------------------------------------
-    "highlight": {
-        "fields": {
-            "comment": {}
-        }
-    }
---------------------------------------------------
-// NOTCONSOLE
-to
-[source,js]
---------------------------------------------------
-    "highlight": {
-        "fields": {
-            "comment": {
-                "matched_fields": ["comment"],
-                "type" : "fvh"
-            }
-        }
-    }
---------------------------------------------------
-// NOTCONSOLE
 ===================================================================
 
 
diff --git a/docs/reference/tab-widgets/highlighting-multi-fields-widget.asciidoc b/docs/reference/tab-widgets/highlighting-multi-fields-widget.asciidoc
new file mode 100644
index 0000000000000..e307ea002f542
--- /dev/null
+++ b/docs/reference/tab-widgets/highlighting-multi-fields-widget.asciidoc
@@ -0,0 +1,40 @@
+++++
+<div class="tabs" data-tab-group="highligther">
+  <div role="tablist" aria-label="Highlighting based on multi fields">
+    <button role="tab"
+            aria-selected="true"
+            aria-controls="unified-tab"
+            id="unified-highlighter">
+      Unified
+    </button>
+    <button role="tab"
+            aria-selected="false"
+            aria-controls="fvh-tab"
+            id="fvh-highlighter"
+            tabindex="-1">
+      FVH
+    </button>
+  </div>
+  <div tabindex="0"
+       role="tabpanel"
+       id="unified-tab"
+       aria-labelledby="unified-highlighter">
+++++
+
+include::highlighting-multi-fields.asciidoc[tag=unified]
+
+++++
+  </div>
+  <div tabindex="0"
+       role="tabpanel"
+       id="fvh-tab"
+       aria-labelledby="fvh-highlighter"
+       hidden="">
+++++
+
+include::highlighting-multi-fields.asciidoc[tag=fvh]
+
+++++
+  </div>
+</div>
+++++
diff --git a/docs/reference/tab-widgets/highlighting-multi-fields.asciidoc b/docs/reference/tab-widgets/highlighting-multi-fields.asciidoc
new file mode 100644
index 0000000000000..5af85f33c99fa
--- /dev/null
+++ b/docs/reference/tab-widgets/highlighting-multi-fields.asciidoc
@@ -0,0 +1,465 @@
+// tag::unified[]
+
+In the following examples, `comment` is analyzed by the `standard`
+analyzer and `comment.english` is analyzed by the `english` analyzer.
+
+[source,console]
+--------------------------------------------------
+PUT index1
+{
+  "mappings": {
+    "properties": {
+      "comment": {
+        "type": "text",
+        "analyzer": "standard",
+        "fields": {
+          "english": {
+            "type": "text",
+            "analyzer": "english"
+          }
+        }
+      }
+    }
+  }
+}
+--------------------------------------------------
+
+
+[source,console]
+--------------------------------------------------
+PUT index1/_bulk?refresh=true
+{"index": {"_id": "doc1" }}
+{"comment": "run with scissors"}
+{ "index" : {"_id": "doc2"} }
+{"comment": "running with scissors"}
+
+--------------------------------------------------
+// TEST[continued]
+
+
+[source,console]
+--------------------------------------------------
+GET index1/_search
+{
+  "query": {
+    "query_string": {
+      "query": "running with scissors",
+      "fields": ["comment", "comment.english"]
+    }
+  },
+  "highlight": {
+    "order": "score",
+    "fields": {
+      "comment": {}
+    }
+  }
+}
+--------------------------------------------------
+// TEST[continued]
+
+The above request matches both "run with scissors" and "running with scissors"
+and would highlight "running" and "scissors" but not "run". If both
+phrases appear in a large document then "running with scissors" is
+sorted above "run with scissors" in the fragments list because there
+are more matches in that fragment.
+
+[source,console-result]
+----
+{
+  ...
+  "hits" : {
+    "total" : {
+      "value" : 2,
+      "relation" : "eq"
+    },
+    "max_score": 1.0577903,
+    "hits" : [
+      {
+        "_index" : "index1",
+        "_id" : "doc2",
+        "_score" : 1.0577903,
+        "_source" : {
+          "comment" : "running with scissors"
+        },
+        "highlight" : {
+          "comment" : [
+            "<em>running</em> <em>with</em> <em>scissors</em>"
+          ]
+        }
+      },
+      {
+        "_index" : "index1",
+        "_id" : "doc1",
+        "_score" : 0.36464313,
+        "_source" : {
+          "comment" : "run with scissors"
+        },
+        "highlight" : {
+          "comment" : [
+            "run <em>with</em> <em>scissors</em>"
+          ]
+        }
+      }
+    ]
+  }
+}
+----
+// TESTRESPONSE[s/\.\.\./"took" : $body.took,"timed_out" : $body.timed_out,"_shards" : $body._shards,/]
+
+The below request highlights "run" as well as "running" and "scissors",
+because the `matched_fields` parameter instructs that for highlighting
+we need to combine matches from the `comment.english` field with
+the matches from the original `comment` field.
+
+[source,console]
+--------------------------------------------------
+GET index1/_search
+{
+  "query": {
+    "query_string": {
+      "query": "running with scissors",
+      "fields": ["comment", "comment.english"]
+    }
+  },
+  "highlight": {
+    "order": "score",
+    "fields": {
+      "comment": {
+        "matched_fields": ["comment.english"]
+      }
+    }
+  }
+}
+--------------------------------------------------
+// TEST[continued]
+
+[source,console-result]
+----
+{
+  ...
+  "hits" : {
+    "total" : {
+      "value" : 2,
+      "relation" : "eq"
+    },
+    "max_score": 1.0577903,
+    "hits" : [
+      {
+        "_index" : "index1",
+        "_id" : "doc2",
+        "_score" : 1.0577903,
+        "_source" : {
+          "comment" : "running with scissors"
+        },
+        "highlight" : {
+          "comment" : [
+            "<em>running</em> <em>with</em> <em>scissors</em>"
+          ]
+        }
+      },
+      {
+        "_index" : "index1",
+        "_id" : "doc1",
+        "_score" : 0.36464313,
+        "_source" : {
+          "comment" : "run with scissors"
+        },
+        "highlight" : {
+          "comment" : [
+            "<em>run</em> <em>with</em> <em>scissors</em>"
+          ]
+        }
+      }
+    ]
+  }
+}
+----
+// TESTRESPONSE[s/\.\.\./"took" : $body.took,"timed_out" : $body.timed_out,"_shards" : $body._shards,/]
+
+// end::unified[]
+
+
+
+
+
+// tag::fvh[]
+
+In the following examples, `comment` is analyzed by the `standard`
+analyzer and `comment.english` is analyzed by the `english` analyzer.
+
+[source,console]
+--------------------------------------------------
+PUT index2
+{
+  "mappings": {
+    "properties": {
+      "comment": {
+        "type": "text",
+        "analyzer": "standard",
+        "term_vector": "with_positions_offsets",
+        "fields": {
+          "english": {
+            "type": "text",
+            "analyzer": "english",
+            "term_vector": "with_positions_offsets"
+          }
+        }
+      }
+    }
+  }
+}
+--------------------------------------------------
+
+
+[source,console]
+--------------------------------------------------
+PUT index2/_bulk?refresh=true
+{"index": {"_id": "doc1" }}
+{"comment": "run with scissors"}
+{ "index" : {"_id": "doc2"} }
+{"comment": "running with scissors"}
+
+--------------------------------------------------
+// TEST[continued]
+
+
+[source,console]
+--------------------------------------------------
+GET index2/_search
+{
+  "query": {
+    "query_string": {
+      "query": "running with scissors",
+      "fields": ["comment", "comment.english"]
+    }
+  },
+  "highlight": {
+    "order": "score",
+    "fields": {
+      "comment": {
+        "type" : "fvh"
+      }
+    }
+  }
+}
+--------------------------------------------------
+// TEST[continued]
+
+The above request matches both "run with scissors" and "running with scissors"
+and would highlight "running" and "scissors" but not "run". If both
+phrases appear in a large document then "running with scissors" is
+sorted above "run with scissors" in the fragments list because there
+are more matches in that fragment.
+
+[source,console-result]
+----
+{
+  ...
+  "hits" : {
+    "total" : {
+      "value" : 2,
+      "relation" : "eq"
+    },
+    "max_score": 1.0577903,
+    "hits" : [
+      {
+        "_index" : "index2",
+        "_id" : "doc2",
+        "_score" : 1.0577903,
+        "_source" : {
+          "comment" : "running with scissors"
+        },
+        "highlight" : {
+          "comment" : [
+            "<em>running</em> <em>with</em> <em>scissors</em>"
+          ]
+        }
+      },
+      {
+        "_index" : "index2",
+        "_id" : "doc1",
+        "_score" : 0.36464313,
+        "_source" : {
+          "comment" : "run with scissors"
+        },
+        "highlight" : {
+          "comment" : [
+            "run <em>with</em> <em>scissors</em>"
+          ]
+        }
+      }
+    ]
+  }
+}
+----
+// TESTRESPONSE[s/\.\.\./"took" : $body.took,"timed_out" : $body.timed_out,"_shards" : $body._shards,/]
+
+The below request highlights "run" as well as "running" and "scissors",
+because the `matched_fields` parameter instructs that for highlighting
+we need to combine matches from the `comment` and `comment.english` fields.
+
+[source,console]
+--------------------------------------------------
+GET index2/_search
+{
+  "query": {
+    "query_string": {
+      "query": "running with scissors",
+      "fields": ["comment", "comment.english"]
+    }
+  },
+  "highlight": {
+    "order": "score",
+    "fields": {
+      "comment": {
+        "type" : "fvh",
+        "matched_fields": ["comment", "comment.english"]
+      }
+    }
+  }
+}
+--------------------------------------------------
+// TEST[continued]
+
+[source,console-result]
+----
+{
+  ...
+  "hits" : {
+    "total" : {
+      "value" : 2,
+      "relation" : "eq"
+    },
+    "max_score": 1.0577903,
+    "hits" : [
+      {
+        "_index" : "index2",
+        "_id" : "doc2",
+        "_score" : 1.0577903,
+        "_source" : {
+          "comment" : "running with scissors"
+        },
+        "highlight" : {
+          "comment" : [
+            "<em>running</em> <em>with</em> <em>scissors</em>"
+          ]
+        }
+      },
+      {
+        "_index" : "index2",
+        "_id" : "doc1",
+        "_score" : 0.36464313,
+        "_source" : {
+          "comment" : "run with scissors"
+        },
+        "highlight" : {
+          "comment" : [
+            "<em>run</em> <em>with</em> <em>scissors</em>"
+          ]
+        }
+      }
+    ]
+  }
+}
+----
+// TESTRESPONSE[s/\.\.\./"took" : $body.took,"timed_out" : $body.timed_out,"_shards" : $body._shards,/]
+
+The below request wouldn't highlight "run" or "scissor" but shows that
+it is just fine not to list the field to which the matches are combined
+(`comment.english`) in the matched fields.
+
+[source,console]
+--------------------------------------------------
+GET index2/_search
+{
+  "query": {
+    "query_string": {
+      "query": "running with scissors",
+      "fields": ["comment", "comment.english"]
+    }
+  },
+  "highlight": {
+    "order": "score",
+    "fields": {
+      "comment.english": {
+        "type" : "fvh",
+        "matched_fields": ["comment"]
+      }
+    }
+  }
+}
+--------------------------------------------------
+// TEST[continued]
+
+
+[source,console-result]
+----
+{
+  ...
+  "hits" : {
+    "total" : {
+      "value" : 2,
+      "relation" : "eq"
+    },
+    "max_score": 1.0577903,
+    "hits" : [
+      {
+        "_index" : "index2",
+        "_id" : "doc2",
+        "_score" : 1.0577903,
+        "_source" : {
+          "comment" : "running with scissors"
+        },
+        "highlight" : {
+          "comment.english" : [
+            "<em>running</em> <em>with</em> <em>scissors</em>"
+          ]
+        }
+      },
+      {
+        "_index" : "index2",
+        "_id" : "doc1",
+        "_score" : 0.36464313,
+        "_source" : {
+          "comment" : "run with scissors"
+        },
+        "highlight" : {
+          "comment.english" : [
+            "run <em>with</em> <em>scissors</em>"
+          ]
+        }
+      }
+    ]
+  }
+}
+----
+// TESTRESPONSE[s/\.\.\./"took" : $body.took,"timed_out" : $body.timed_out,"_shards" : $body._shards,/]
+
+[NOTE]
+===================================================================
+There is a small amount of overhead involved with setting
+`matched_fields` to a non-empty array so always prefer
+[source,js]
+--------------------------------------------------
+    "highlight": {
+        "fields": {
+            "comment": {}
+        }
+    }
+--------------------------------------------------
+// NOTCONSOLE
+to
+[source,js]
+--------------------------------------------------
+    "highlight": {
+        "fields": {
+            "comment": {
+                "matched_fields": ["comment"],
+                "type" : "fvh"
+            }
+        }
+    }
+--------------------------------------------------
+// NOTCONSOLE
+
+// end::fvh[]
diff --git a/qa/ccs-common-rest/src/yamlRestTest/java/org/elasticsearch/test/rest/yaml/CcsCommonYamlTestSuiteIT.java b/qa/ccs-common-rest/src/yamlRestTest/java/org/elasticsearch/test/rest/yaml/CcsCommonYamlTestSuiteIT.java
index a8cff14ff6220..49db5e3a1cd99 100644
--- a/qa/ccs-common-rest/src/yamlRestTest/java/org/elasticsearch/test/rest/yaml/CcsCommonYamlTestSuiteIT.java
+++ b/qa/ccs-common-rest/src/yamlRestTest/java/org/elasticsearch/test/rest/yaml/CcsCommonYamlTestSuiteIT.java
@@ -79,6 +79,7 @@ public class CcsCommonYamlTestSuiteIT extends ESClientYamlSuiteTestCase {
 
     private static LocalClusterConfigProvider commonClusterConfig = cluster -> cluster.module("x-pack-async-search")
         .module("aggregations")
+        .module("analysis-common")
         .module("mapper-extras")
         .module("vector-tile")
         .module("x-pack-analytics")
diff --git a/rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/search.highlight/60_unified_matched_fields.yml b/rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/search.highlight/60_unified_matched_fields.yml
new file mode 100644
index 0000000000000..a0abff2d6726f
--- /dev/null
+++ b/rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/search.highlight/60_unified_matched_fields.yml
@@ -0,0 +1,108 @@
+setup:
+  - requires:
+      cluster_features: 'unified_highlighter_matched_fields'
+      reason: 'test requires unified highlighter to support matched_fields'
+
+  - do:
+      indices.create:
+          index: index1
+          body:
+            settings:
+              index:
+                number_of_shards: 1
+                number_of_replicas: 0
+              analysis:
+                filter:
+                  my_edge_ngram:
+                    type: edge_ngram
+                    min_gram: 2
+                    max_gram: 20
+                analyzer:
+                  my_analyzer:
+                    tokenizer: whitespace
+                    filter: [ my_edge_ngram ]
+            mappings:
+              properties:
+                title:
+                  type: text
+                  fields:
+                    english:
+                      type: text
+                      analyzer: english
+                    ngram:
+                      type: text
+                      analyzer: my_analyzer
+                body :
+                  type: text
+
+
+  - do:
+      bulk:
+        refresh: true
+        index: index1
+        body:
+          - '{"index": {"_id": 1 }}'
+          - '{"title": "dancing with the stars", "body": "Dancing with the Stars is a popular TV show"}'
+          - '{"index": {"_id": 2 }}'
+          - '{"title": "dance with star", "body": "Dancing with the Stars is a popular TV show"}'
+
+---
+"Highlight based on single masked field":
+  - do:
+      search:
+        index: index1
+        body:
+          query:
+            query_string:
+              query: "\"dancing with the stars\""
+              fields: ["title^5", "title.english"]
+              phrase_slop: 2
+          highlight:
+            fields:
+              title:
+                matched_fields: ["title.english"]
+
+  - length: {hits.hits: 2}
+  - match: {hits.hits.0.highlight.title.0: "<em>dancing with the stars</em>"}
+  - match: {hits.hits.1.highlight.title.0: "<em>dance with star</em>"}
+
+---
+"Highlight based on multiple masked fields":
+  - do:
+      search:
+        index: index1
+        body:
+          query:
+            query_string:
+              query: "dan with star"
+              fields: ["title^5", "title.ngram", "title.english"]
+          highlight:
+            fields:
+              title:
+                matched_fields: ["title.ngram", "title.english"]
+
+  - length: {hits.hits: 2}
+  - match: {hits.hits.0.highlight.title.0: "<em>dance</em> <em>with</em> <em>star</em>" }
+  - match: {hits.hits.1.highlight.title.0: "<em>dancing</em> <em>with</em> the <em>stars</em>"}
+
+
+---
+"Highlight using matched_fields is not allowed when require_field_match is set to false":
+  - do:
+      catch: bad_request
+      search:
+        index: index1
+        body:
+          query:
+            query_string:
+              query: "dan with star"
+              fields: ["title^5", "title.ngram", "title.english"]
+          highlight:
+            require_field_match: false
+            fields:
+              title:
+                matched_fields: ["title.ngram", "title.english"]
+
+  - match: { status: 400 }
+  - match: { error.root_cause.0.type: "illegal_argument_exception" }
+  - match: { error.root_cause.0.reason: "Matched fields are not supported when [require_field_match] is set to [false]" }
diff --git a/server/src/internalClusterTest/java/org/elasticsearch/search/fetch/subphase/highlight/HighlighterSearchIT.java b/server/src/internalClusterTest/java/org/elasticsearch/search/fetch/subphase/highlight/HighlighterSearchIT.java
index ab72dbd4db707..0a6fceea9a3f1 100644
--- a/server/src/internalClusterTest/java/org/elasticsearch/search/fetch/subphase/highlight/HighlighterSearchIT.java
+++ b/server/src/internalClusterTest/java/org/elasticsearch/search/fetch/subphase/highlight/HighlighterSearchIT.java
@@ -1037,14 +1037,19 @@ public void testFVHManyMatches() throws Exception {
     }
 
     public void testMatchedFieldsFvhRequireFieldMatch() throws Exception {
-        checkMatchedFieldsCase(true);
+        checkMatchedFieldsCase(true, "fvh");
     }
 
     public void testMatchedFieldsFvhNoRequireFieldMatch() throws Exception {
-        checkMatchedFieldsCase(false);
+        checkMatchedFieldsCase(false, "fvh");
     }
 
-    private void checkMatchedFieldsCase(boolean requireFieldMatch) throws Exception {
+    public void testMatchedFieldsUnified() throws Exception {
+        // unified highlighter requires that "require_field_match" is true when matched fields are used
+        checkMatchedFieldsCase(true, "unified");
+    }
+
+    private void checkMatchedFieldsCase(boolean requireFieldMatch, String type) throws Exception {
         Settings.Builder settings = Settings.builder();
         settings.put(indexSettings());
         settings.put("index.analysis.analyzer.mock_english.tokenizer", "standard");
@@ -1104,7 +1109,7 @@ private void checkMatchedFieldsCase(boolean requireFieldMatch) throws Exception
         Field fooField = new Field("foo").numOfFragments(1)
             .order("score")
             .fragmentSize(25)
-            .highlighterType("fvh")
+            .highlighterType(type)
             .requireFieldMatch(requireFieldMatch);
         SearchRequestBuilder req = prepareSearch("test").highlighter(new HighlightBuilder().field(fooField));
 
@@ -1125,7 +1130,7 @@ private void checkMatchedFieldsCase(boolean requireFieldMatch) throws Exception
         fooField = new Field("foo").numOfFragments(1)
             .order("score")
             .fragmentSize(25)
-            .highlighterType("fvh")
+            .highlighterType(type)
             .requireFieldMatch(requireFieldMatch);
         fooField.matchedFields("foo", "foo.plain");
         req = prepareSearch("test").highlighter(new HighlightBuilder().field(fooField));
@@ -1144,20 +1149,22 @@ private void checkMatchedFieldsCase(boolean requireFieldMatch) throws Exception
         fooField = new Field("foo").numOfFragments(1)
             .order("score")
             .fragmentSize(25)
-            .highlighterType("fvh")
+            .highlighterType(type)
             .requireFieldMatch(requireFieldMatch);
         fooField.matchedFields("foo.plain");
         req = prepareSearch("test").highlighter(new HighlightBuilder().field(fooField));
+        // unified highlighter always keeps the original field in the list of matched fields
+        String expectedHighlight0 = type.equals("unified") ? "<em>running</em> with <em>scissors</em>" : "<em>running</em> with scissors";
         assertResponse(
             req.setQuery(queryStringQuery("foo.plain:running scissors").field("foo")),
-            response -> assertHighlight(response, 0, "foo", 0, equalTo("<em>running</em> with scissors"))
+            response -> assertHighlight(response, 0, "foo", 0, equalTo(expectedHighlight0))
         );
 
         // Now make sure boosted fields don't blow up when matched fields is both the subfield and stored field.
         fooField = new Field("foo").numOfFragments(1)
             .order("score")
             .fragmentSize(25)
-            .highlighterType("fvh")
+            .highlighterType(type)
             .requireFieldMatch(requireFieldMatch);
         fooField.matchedFields("foo", "foo.plain");
         req = prepareSearch("test").highlighter(new HighlightBuilder().field(fooField));
@@ -1184,16 +1191,19 @@ private void checkMatchedFieldsCase(boolean requireFieldMatch) throws Exception
             response -> assertHighlight(response, 0, "foo", 0, equalTo("<em>running</em> with <em>scissors</em>"))
         );
 
+        // Unified and FVH highlighters break text into fragments differently
+        String expectedHighlight1 = type.equals("unified") ? "junk junk junk <em>cats</em> junk" : "junk junk <em>cats</em> junk junk";
+
         // But we use the best found score when sorting fragments
         assertResponse(
             req.setQuery(queryStringQuery("cats foo.plain:cats^5").field("foo")),
-            response -> assertHighlight(response, 0, "foo", 0, equalTo("junk junk <em>cats</em> junk junk"))
+            response -> assertHighlight(response, 0, "foo", 0, equalTo(expectedHighlight1))
         );
 
         // which can also be written by searching on the subfield
         assertResponse(
             req.setQuery(queryStringQuery("cats").field("foo").field("foo.plain", 5)),
-            response -> assertHighlight(response, 0, "foo", 0, equalTo("junk junk <em>cats</em> junk junk"))
+            response -> assertHighlight(response, 0, "foo", 0, equalTo(expectedHighlight1))
         );
 
         // Speaking of two fields, you can have two fields, only one of which has matchedFields enabled
@@ -1201,23 +1211,23 @@ private void checkMatchedFieldsCase(boolean requireFieldMatch) throws Exception
         Field barField = new Field("bar").numOfFragments(1)
             .order("score")
             .fragmentSize(25)
-            .highlighterType("fvh")
+            .highlighterType(type)
             .requireFieldMatch(requireFieldMatch);
         assertResponse(req.setQuery(twoFieldsQuery).highlighter(new HighlightBuilder().field(fooField).field(barField)), response -> {
-            assertHighlight(response, 0, "foo", 0, equalTo("junk junk <em>cats</em> junk junk"));
+            assertHighlight(response, 0, "foo", 0, equalTo(expectedHighlight1));
             assertHighlight(response, 0, "bar", 0, equalTo("<em>cat</em> <em>cat</em> junk junk junk junk"));
         });
         // And you can enable matchedField highlighting on both
         barField.matchedFields("bar", "bar.plain");
         assertResponse(req.setQuery(twoFieldsQuery).highlighter(new HighlightBuilder().field(fooField).field(barField)), response -> {
-            assertHighlight(response, 0, "foo", 0, equalTo("junk junk <em>cats</em> junk junk"));
-            assertHighlight(response, 0, "bar", 0, equalTo("junk junk <em>cats</em> junk junk"));
+            assertHighlight(response, 0, "foo", 0, equalTo(expectedHighlight1));
+            assertHighlight(response, 0, "bar", 0, equalTo(expectedHighlight1));
         });
 
         // Setting a matchedField that isn't searched/doesn't exist is simply ignored.
         barField.matchedFields("bar", "candy");
         assertResponse(req.setQuery(twoFieldsQuery).highlighter(new HighlightBuilder().field(fooField).field(barField)), response -> {
-            assertHighlight(response, 0, "foo", 0, equalTo("junk junk <em>cats</em> junk junk"));
+            assertHighlight(response, 0, "foo", 0, equalTo(expectedHighlight1));
             assertHighlight(response, 0, "bar", 0, equalTo("<em>cat</em> <em>cat</em> junk junk junk junk"));
         });
 
@@ -1233,12 +1243,15 @@ private void checkMatchedFieldsCase(boolean requireFieldMatch) throws Exception
         );
 
         // If the stored field is found but the matched field isn't then you don't get a result either.
-        fooField.matchedFields("bar.plain");
-        assertResponse(
-            req.setQuery(queryStringQuery("running scissors").field("foo").field("foo.plain").field("bar").field("bar.plain"))
-                .highlighter(new HighlightBuilder().field(fooField).field(barField)),
-            response -> assertThat(response.getHits().getAt(0).getHighlightFields(), not(hasKey("foo")))
-        );
+        // only applicable to fvh highlighter, as unified highlighter always keeps the original field in the list of matched fields
+        if (type.equals("fvh")) {
+            fooField.matchedFields("bar.plain");
+            assertResponse(
+                req.setQuery(queryStringQuery("running scissors").field("foo").field("foo.plain").field("bar").field("bar.plain"))
+                    .highlighter(new HighlightBuilder().field(fooField).field(barField)),
+                response -> assertThat(response.getHits().getAt(0).getHighlightFields(), not(hasKey("foo")))
+            );
+        }
 
         // But if you add the stored field to the list of matched fields then you'll get a result again
         fooField.matchedFields("foo", "bar.plain");
@@ -1261,11 +1274,22 @@ private void checkMatchedFieldsCase(boolean requireFieldMatch) throws Exception
             }
         );
 
-        assertFailures(
-            req.setQuery(queryStringQuery("result").field("foo").field("foo.plain").field("bar").field("bar.plain")),
-            RestStatus.INTERNAL_SERVER_ERROR,
-            containsString("IndexOutOfBoundsException")
-        );
+        if (type.equals("unified")) {
+            assertResponse(
+                req.setQuery(queryStringQuery("result").field("foo").field("foo.plain").field("bar").field("bar.plain"))
+                    .highlighter(new HighlightBuilder().field(fooField).field(barField)),
+                response -> {
+                    assertHighlight(response, 0, "bar", 0, equalTo("<em>result</em>"));
+                }
+            );
+        } else {
+            assertFailures(
+                req.setQuery(queryStringQuery("result").field("foo").field("foo.plain").field("bar").field("bar.plain"))
+                    .highlighter(new HighlightBuilder().field(fooField).field(barField)),
+                RestStatus.INTERNAL_SERVER_ERROR,
+                containsString("IndexOutOfBoundsException")
+            );
+        }
     }
 
     public void testFastVectorHighlighterManyDocs() throws Exception {
diff --git a/server/src/main/java/org/elasticsearch/rest/RestFeatures.java b/server/src/main/java/org/elasticsearch/rest/RestFeatures.java
index 73b788d63b2ab..93cbd6376cbde 100644
--- a/server/src/main/java/org/elasticsearch/rest/RestFeatures.java
+++ b/server/src/main/java/org/elasticsearch/rest/RestFeatures.java
@@ -14,8 +14,16 @@
 import org.elasticsearch.rest.action.admin.cluster.RestClusterGetSettingsAction;
 
 import java.util.Map;
+import java.util.Set;
+
+import static org.elasticsearch.search.fetch.subphase.highlight.DefaultHighlighter.UNIFIED_HIGHLIGHTER_MATCHED_FIELDS;
 
 public class RestFeatures implements FeatureSpecification {
+    @Override
+    public Set<NodeFeature> getFeatures() {
+        return Set.of(UNIFIED_HIGHLIGHTER_MATCHED_FIELDS);
+    }
+
     @Override
     public Map<NodeFeature, Version> getHistoricalFeatures() {
         return Map.of(RestClusterGetSettingsAction.SUPPORTS_GET_SETTINGS_ACTION, Version.V_8_3_0);
diff --git a/server/src/main/java/org/elasticsearch/search/fetch/subphase/highlight/DefaultHighlighter.java b/server/src/main/java/org/elasticsearch/search/fetch/subphase/highlight/DefaultHighlighter.java
index da1be48e6b2c0..8f9bca2bbea93 100644
--- a/server/src/main/java/org/elasticsearch/search/fetch/subphase/highlight/DefaultHighlighter.java
+++ b/server/src/main/java/org/elasticsearch/search/fetch/subphase/highlight/DefaultHighlighter.java
@@ -21,6 +21,7 @@
 import org.elasticsearch.common.Strings;
 import org.elasticsearch.common.lucene.Lucene;
 import org.elasticsearch.common.text.Text;
+import org.elasticsearch.features.NodeFeature;
 import org.elasticsearch.index.IndexSettings;
 import org.elasticsearch.index.mapper.IdFieldMapper;
 import org.elasticsearch.index.mapper.MappedFieldType;
@@ -36,15 +37,20 @@
 import java.io.IOException;
 import java.text.BreakIterator;
 import java.util.ArrayList;
+import java.util.Collections;
 import java.util.HashMap;
 import java.util.List;
 import java.util.Locale;
 import java.util.Map;
+import java.util.Set;
 import java.util.function.Predicate;
 
 import static org.elasticsearch.lucene.search.uhighlight.CustomUnifiedHighlighter.MULTIVAL_SEP_CHAR;
 
 public class DefaultHighlighter implements Highlighter {
+
+    public static final NodeFeature UNIFIED_HIGHLIGHTER_MATCHED_FIELDS = new NodeFeature("unified_highlighter_matched_fields");
+
     @Override
     public boolean canHighlight(MappedFieldType fieldType) {
         return true;
@@ -142,8 +148,18 @@ CustomUnifiedHighlighter buildHighlighter(FieldHighlightContext fieldContext) {
         }
         Builder builder = UnifiedHighlighter.builder(searcher, analyzer);
         builder.withBreakIterator(() -> breakIterator);
-        builder.withFieldMatcher(fieldMatcher(fieldContext));
         builder.withFormatter(passageFormatter);
+
+        Set<String> matchedFields = fieldContext.field.fieldOptions().matchedFields();
+        if (matchedFields != null && matchedFields.isEmpty() == false) {
+            // Masked fields require that the default field matcher is used
+            if (fieldContext.field.fieldOptions().requireFieldMatch() == false) {
+                throw new IllegalArgumentException("Matched fields are not supported when [require_field_match] is set to [false]");
+            }
+            builder.withMaskedFieldsFunc((fieldName) -> fieldName.equals(fieldContext.fieldName) ? matchedFields : Collections.emptySet());
+        } else {
+            builder.withFieldMatcher(fieldMatcher(fieldContext));
+        }
         return new CustomUnifiedHighlighter(
             builder,
             offsetSource,