From 0f00284ff9b3e72983c4c6438968550fd8975d5a Mon Sep 17 00:00:00 2001 From: Mayya Sharipova Date: Thu, 4 Apr 2024 14:36:33 -0400 Subject: [PATCH] UnifiedHighlighter highlight on multiple fields Add ability to highlight on multiple matched fields for UnifiedHighlighter. FastVectorHighlighter for a long time has had an option to highlight a single field based on matches from several fields. But UnifiedHighlighter was missing this option. This adds this ability. --- .../tasks/SearchTravRetHighlightTask.java | 3 +- .../uhighlight/MultiFieldsOffsetStrategy.java | 62 +++++ .../search/uhighlight/UnifiedHighlighter.java | 71 ++++-- .../uhighlight/TestUnifiedHighlighter.java | 213 ++++++++++++++---- .../uhighlight/TestUnifiedHighlighterMTQ.java | 12 +- .../TestUnifiedHighlighterRanking.java | 8 +- .../TestUnifiedHighlighterStrictPhrases.java | 6 +- .../TestUnifiedHighlighterTermIntervals.java | 39 ++-- .../TestUnifiedHighlighterTermVec.java | 2 +- .../TestUnifiedHighlighterExtensibility.java | 14 +- 10 files changed, 330 insertions(+), 100 deletions(-) create mode 100644 lucene/highlighter/src/java/org/apache/lucene/search/uhighlight/MultiFieldsOffsetStrategy.java diff --git a/lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/SearchTravRetHighlightTask.java b/lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/SearchTravRetHighlightTask.java index dd6c647fae18..8b468096bff1 100644 --- a/lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/SearchTravRetHighlightTask.java +++ b/lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/SearchTravRetHighlightTask.java @@ -310,7 +310,8 @@ protected OffsetSource getOffsetSource(String field) { @Override public void withTopDocs(IndexSearcher searcher, Query q, TopDocs hits) throws Exception { reset(searcher); - Map result = highlighter.highlightFields(fields, q, hits, maxPassages); + Map result = + highlighter.highlightFields(fields, q, hits, maxPassages, null); preventOptimizeAway = result.size(); } } diff --git a/lucene/highlighter/src/java/org/apache/lucene/search/uhighlight/MultiFieldsOffsetStrategy.java b/lucene/highlighter/src/java/org/apache/lucene/search/uhighlight/MultiFieldsOffsetStrategy.java new file mode 100644 index 000000000000..94331f7770e3 --- /dev/null +++ b/lucene/highlighter/src/java/org/apache/lucene/search/uhighlight/MultiFieldsOffsetStrategy.java @@ -0,0 +1,62 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.lucene.search.uhighlight; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.List; +import org.apache.lucene.index.LeafReader; + +/** + * FieldOffsetStrategy that combines offsets from multiple fields. Used to highlight a single field + * based on matches from multiple fields. + * + * @lucene.internal + */ +public class MultiFieldsOffsetStrategy extends FieldOffsetStrategy { + private final List fieldsOffsetStrategies; + + public MultiFieldsOffsetStrategy(List fieldsOffsetStrategies) { + super(null); + this.fieldsOffsetStrategies = fieldsOffsetStrategies; + } + + @Override + public String getField() { + throw new IllegalStateException("MultiFieldsOffsetStrategy does not have a single field."); + } + + @Override + public UnifiedHighlighter.OffsetSource getOffsetSource() { + // TODO: what should be returned here as offset source? + return fieldsOffsetStrategies.getFirst().getOffsetSource(); + } + + @Override + public OffsetsEnum getOffsetsEnum(LeafReader reader, int docId, String content) + throws IOException { + List fieldsOffsetsEnums = new ArrayList<>(fieldsOffsetStrategies.size()); + for (FieldOffsetStrategy fieldOffsetStrategy : fieldsOffsetStrategies) { + OffsetsEnum offsetsEnum = fieldOffsetStrategy.getOffsetsEnum(reader, docId, content); + if (offsetsEnum != OffsetsEnum.EMPTY) { + fieldsOffsetsEnums.add(offsetsEnum); + } + } + return new OffsetsEnum.MultiOffsetsEnum(fieldsOffsetsEnums); + } +} diff --git a/lucene/highlighter/src/java/org/apache/lucene/search/uhighlight/UnifiedHighlighter.java b/lucene/highlighter/src/java/org/apache/lucene/search/uhighlight/UnifiedHighlighter.java index dfedf7974fd4..f72c64fed61e 100644 --- a/lucene/highlighter/src/java/org/apache/lucene/search/uhighlight/UnifiedHighlighter.java +++ b/lucene/highlighter/src/java/org/apache/lucene/search/uhighlight/UnifiedHighlighter.java @@ -701,7 +701,7 @@ protected FieldInfo getFieldInfo(String field) { * IndexOptions#DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS} */ public String[] highlight(String field, Query query, TopDocs topDocs) throws IOException { - return highlight(field, query, topDocs, 1); + return highlight(field, query, topDocs, 1, null); } /** @@ -712,6 +712,7 @@ public String[] highlight(String field, Query query, TopDocs topDocs) throws IOE * @param topDocs TopDocs containing the summary result documents to highlight. * @param maxPassages The maximum number of top-N ranked passages used to form the highlighted * snippets. + * @param matchedFields fields whose matched are combined to highlight the given field * @return Array of formatted snippets corresponding to the documents in topDocs. If * no highlights were found for a document, the first {@code maxPassages} sentences from the * field will be returned. @@ -719,10 +720,13 @@ public String[] highlight(String field, Query query, TopDocs topDocs) throws IOE * @throws IllegalArgumentException if field was indexed without {@link * IndexOptions#DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS} */ - public String[] highlight(String field, Query query, TopDocs topDocs, int maxPassages) + public String[] highlight( + String field, Query query, TopDocs topDocs, int maxPassages, Set matchedFields) throws IOException { + List> matchedFieldsIn = matchedFields == null ? null : List.of(matchedFields); Map res = - highlightFields(new String[] {field}, query, topDocs, new int[] {maxPassages}); + highlightFields( + new String[] {field}, query, topDocs, new int[] {maxPassages}, matchedFieldsIn); return res.get(field); } @@ -753,7 +757,7 @@ public Map highlightFields(String[] fields, Query query, TopDo throws IOException { int[] maxPassages = new int[fields.length]; Arrays.fill(maxPassages, 1); - return highlightFields(fields, query, topDocs, maxPassages); + return highlightFields(fields, query, topDocs, maxPassages, null); } /** @@ -774,6 +778,7 @@ public Map highlightFields(String[] fields, Query query, TopDo * @param topDocs TopDocs containing the summary result documents to highlight. * @param maxPassages The maximum number of top-N ranked passages per-field used to form the * highlighted snippets. + * @param matchedFields fields whose matched are combined to highlight, per-field * @return Map keyed on field name, containing the array of formatted snippets corresponding to * the documents in topDocs. If no highlights were found for a document, the * first {@code maxPassages} sentences from the field will be returned. @@ -782,14 +787,19 @@ public Map highlightFields(String[] fields, Query query, TopDo * IndexOptions#DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS} */ public Map highlightFields( - String[] fields, Query query, TopDocs topDocs, int[] maxPassages) throws IOException { + String[] fields, + Query query, + TopDocs topDocs, + int[] maxPassages, + List> matchedFields) + throws IOException { final ScoreDoc[] scoreDocs = topDocs.scoreDocs; int[] docids = new int[scoreDocs.length]; for (int i = 0; i < docids.length; i++) { docids[i] = scoreDocs[i].doc; } - return highlightFields(fields, query, docids, maxPassages); + return highlightFields(fields, query, docids, maxPassages, matchedFields); } /** @@ -800,6 +810,7 @@ public Map highlightFields( * @param docidsIn containing the document IDs to highlight. * @param maxPassagesIn The maximum number of top-N ranked passages per-field used to form the * highlighted snippets. + * @param matchedFieldsIn fields whose matched are combined to highlight, per-field * @return Map keyed on field name, containing the array of formatted snippets corresponding to * the documents in docidsIn. If no highlights were found for a document, the * first {@code maxPassages} from the field will be returned. @@ -808,10 +819,16 @@ public Map highlightFields( * IndexOptions#DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS} */ public Map highlightFields( - String[] fieldsIn, Query query, int[] docidsIn, int[] maxPassagesIn) throws IOException { + String[] fieldsIn, + Query query, + int[] docidsIn, + int[] maxPassagesIn, + List> matchedFieldsIn) + throws IOException { Map snippets = new HashMap<>(); for (Map.Entry ent : - highlightFieldsAsObjects(fieldsIn, query, docidsIn, maxPassagesIn).entrySet()) { + highlightFieldsAsObjects(fieldsIn, query, docidsIn, maxPassagesIn, matchedFieldsIn) + .entrySet()) { Object[] snippetObjects = ent.getValue(); String[] snippetStrings = new String[snippetObjects.length]; snippets.put(ent.getKey(), snippetStrings); @@ -836,6 +853,7 @@ public Map highlightFields( * @param docIdsIn containing the document IDs to highlight. * @param maxPassagesIn The maximum number of top-N ranked passages per-field used to form the * highlighted snippets. + * @param matchedFieldsIn fields whose matched are combined to highlight, per-field * @return Map keyed on field name, containing the array of formatted snippets corresponding to * the documents in docIdsIn. If no highlights were found for a document, the * first {@code maxPassages} from the field will be returned. @@ -844,7 +862,12 @@ public Map highlightFields( * IndexOptions#DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS} */ protected Map highlightFieldsAsObjects( - String[] fieldsIn, Query query, int[] docIdsIn, int[] maxPassagesIn) throws IOException { + String[] fieldsIn, + Query query, + int[] docIdsIn, + int[] maxPassagesIn, + List> matchedFieldsIn) + throws IOException { if (fieldsIn.length < 1) { throw new IllegalArgumentException("fieldsIn must not be empty"); } @@ -877,7 +900,12 @@ protected Map highlightFieldsAsObjects( int numPostings = 0; for (int f = 0; f < fields.length; f++) { FieldHighlighter fieldHighlighter = - getFieldHighlighter(fields[f], query, queryTerms, maxPassages[f]); + getFieldHighlighter( + fields[f], + query, + queryTerms, + maxPassages[f], + matchedFieldsIn == null ? null : matchedFieldsIn.get(f)); fieldHighlighters[f] = fieldHighlighter; switch (fieldHighlighter.getOffsetSource()) { @@ -1059,17 +1087,30 @@ public Object highlightWithoutSearcher(String field, Query query, String content } Objects.requireNonNull(content, "content is required"); Set queryTerms = extractTerms(query); - return getFieldHighlighter(field, query, queryTerms, maxPassages) + return getFieldHighlighter(field, query, queryTerms, maxPassages, null) .highlightFieldForDoc(null, -1, content); } protected FieldHighlighter getFieldHighlighter( - String field, Query query, Set allTerms, int maxPassages) { - UHComponents components = getHighlightComponents(field, query, allTerms); - OffsetSource offsetSource = getOptimizedOffsetSource(components); + String field, Query query, Set allTerms, int maxPassages, Set matchedFields) { + + FieldOffsetStrategy fieldOffsetStrategy; + if (matchedFields == null) { + UHComponents components = getHighlightComponents(field, query, allTerms); + OffsetSource offsetSource = getOptimizedOffsetSource(components); + fieldOffsetStrategy = getOffsetStrategy(offsetSource, components); + } else { + List fieldsOffsetStrategies = new ArrayList<>(matchedFields.size()); + for (String matchedField : matchedFields) { + UHComponents components = getHighlightComponents(matchedField, query, allTerms); + OffsetSource offsetSource = getOptimizedOffsetSource(components); + fieldsOffsetStrategies.add(getOffsetStrategy(offsetSource, components)); + } + fieldOffsetStrategy = new MultiFieldsOffsetStrategy(fieldsOffsetStrategies); + } return newFieldHighlighter( field, - getOffsetStrategy(offsetSource, components), + fieldOffsetStrategy, new SplittingBreakIterator(getBreakIterator(field), UnifiedHighlighter.MULTIVAL_SEP_CHAR), getScorer(field), maxPassages, diff --git a/lucene/highlighter/src/test/org/apache/lucene/search/uhighlight/TestUnifiedHighlighter.java b/lucene/highlighter/src/test/org/apache/lucene/search/uhighlight/TestUnifiedHighlighter.java index 091af3595f18..3c5c4835f2a2 100644 --- a/lucene/highlighter/src/test/org/apache/lucene/search/uhighlight/TestUnifiedHighlighter.java +++ b/lucene/highlighter/src/test/org/apache/lucene/search/uhighlight/TestUnifiedHighlighter.java @@ -30,13 +30,19 @@ import java.util.Map; import java.util.Objects; import java.util.Set; +import java.util.TreeMap; import java.util.function.Predicate; import org.apache.lucene.analysis.Analyzer; +import org.apache.lucene.analysis.DelegatingAnalyzerWrapper; +import org.apache.lucene.analysis.core.WhitespaceAnalyzer; +import org.apache.lucene.analysis.en.EnglishAnalyzer; import org.apache.lucene.document.Document; import org.apache.lucene.document.Field; import org.apache.lucene.document.FieldType; +import org.apache.lucene.index.DirectoryReader; import org.apache.lucene.index.IndexOptions; import org.apache.lucene.index.IndexReader; +import org.apache.lucene.index.IndexWriter; import org.apache.lucene.index.Term; import org.apache.lucene.search.BooleanClause; import org.apache.lucene.search.BooleanQuery; @@ -54,8 +60,13 @@ import org.apache.lucene.search.TopDocs; import org.apache.lucene.search.Weight; import org.apache.lucene.search.uhighlight.UnifiedHighlighter.HighlightFlag; +import org.apache.lucene.store.Directory; +import org.apache.lucene.tests.analysis.MockAnalyzer; import org.apache.lucene.tests.index.RandomIndexWriter; +import org.apache.lucene.util.QueryBuilder; import org.apache.lucene.util.automaton.Automata; +import org.apache.lucene.util.automaton.CharacterRunAutomaton; +import org.apache.lucene.util.automaton.RegExp; public class TestUnifiedHighlighter extends UnifiedHighlighterTestBase { @ParametersFactory @@ -126,6 +137,7 @@ static UnifiedHighlighter overrideFieldMatcherForTests( static UnifiedHighlighter.Builder overriddenBuilderForTests( UnifiedHighlighter.Builder uhBuilder, EnumSet mandatoryFlags) { + return new UnifiedHighlighter.Builder( uhBuilder.getIndexSearcher(), uhBuilder.getIndexAnalyzer()) { Set flags; @@ -321,7 +333,7 @@ public void testMaxLengthWithMultivalue() throws Exception { Query query = new TermQuery(new Term("body", "field")); TopDocs topDocs = searcher.search(query, 10, Sort.INDEXORDER); assertEquals(1, topDocs.totalHits.value); - String[] snippets = highlighter.highlight("body", query, topDocs, 10); + String[] snippets = highlighter.highlight("body", query, topDocs, 10, null); assertEquals(1, snippets.length); String highlightedValue = "This is a multivalued field. Sentencetwo field."; assertEquals(highlightedValue + "... " + highlightedValue, snippets[0]); @@ -423,7 +435,7 @@ public void testMultiplePassages() throws Exception { Query query = new TermQuery(new Term("body", "test")); TopDocs topDocs = searcher.search(query, 10, Sort.INDEXORDER); assertEquals(2, topDocs.totalHits.value); - String[] snippets = highlighter.highlight("body", query, topDocs, 2); + String[] snippets = highlighter.highlight("body", query, topDocs, 2, null); assertEquals(2, snippets.length); assertEquals( "This is a test. Just a test highlighting from postings. ", snippets[0]); @@ -464,7 +476,7 @@ public void testBuddhism() throws Exception { UnifiedHighlighter.Builder uhBuilder = new UnifiedHighlighter.Builder(searcher, indexAnalyzer).withHighlightPhrasesStrictly(false); UnifiedHighlighter highlighter = randomUnifiedHighlighter(uhBuilder); - String[] snippets = highlighter.highlight("body", query, topDocs, 2); + String[] snippets = highlighter.highlight("body", query, topDocs, 2, null); assertEquals(1, snippets.length); if (highlighter .getFlags("body") @@ -520,7 +532,7 @@ public void testCuriousGeorge() throws Exception { UnifiedHighlighter.Builder uhBuilder = new UnifiedHighlighter.Builder(searcher, indexAnalyzer).withHighlightPhrasesStrictly(false); UnifiedHighlighter highlighter = randomUnifiedHighlighter(uhBuilder); - String[] snippets = highlighter.highlight("body", query, topDocs, 2); + String[] snippets = highlighter.highlight("body", query, topDocs, 2, null); assertEquals(1, snippets.length); assertFalse(snippets[0].contains("CuriousCurious")); ir.close(); @@ -553,7 +565,7 @@ public void testCambridgeMA() throws Exception { new UnifiedHighlighter.Builder(searcher, indexAnalyzer) .withMaxLength(Integer.MAX_VALUE - 1); UnifiedHighlighter highlighter = randomUnifiedHighlighter(uhBuilder); - String[] snippets = highlighter.highlight("body", query, topDocs, 2); + String[] snippets = highlighter.highlight("body", query, topDocs, 2, null); assertEquals(1, snippets.length); assertTrue(snippets[0].contains("Square")); assertTrue(snippets[0].contains("Porter")); @@ -579,7 +591,7 @@ public void testPassageRanking() throws Exception { Query query = new TermQuery(new Term("body", "test")); TopDocs topDocs = searcher.search(query, 10, Sort.INDEXORDER); assertEquals(1, topDocs.totalHits.value); - String[] snippets = highlighter.highlight("body", query, topDocs, 2); + String[] snippets = highlighter.highlight("body", query, topDocs, 2, null); assertEquals(1, snippets.length); assertEquals( "This is a test. ... Feel free to test test test test test test test.", @@ -618,7 +630,7 @@ public void testBooleanMustNot() throws Exception { new UnifiedHighlighter.Builder(searcher, indexAnalyzer) .withMaxLength(Integer.MAX_VALUE - 1); UnifiedHighlighter highlighter = randomUnifiedHighlighter(uhBuilder); - String[] snippets = highlighter.highlight("body", query, topDocs, 2); + String[] snippets = highlighter.highlight("body", query, topDocs, 2, null); assertEquals(1, snippets.length); assertFalse(snippets[0].contains("both")); ir.close(); @@ -647,7 +659,7 @@ public void testHighlightAllText() throws Exception { Query query = new TermQuery(new Term("body", "test")); TopDocs topDocs = searcher.search(query, 10, Sort.INDEXORDER); assertEquals(1, topDocs.totalHits.value); - String[] snippets = highlighter.highlight("body", query, topDocs, 2); + String[] snippets = highlighter.highlight("body", query, topDocs, 2, null); assertEquals(1, snippets.length); assertEquals( "This is a test. Just highlighting from postings. This is also a much sillier test. Feel free to test test test test test test test.", @@ -683,7 +695,7 @@ public void testSpecificDocIDs() throws Exception { docIDs[1] = hits[1].doc; String[] snippets = highlighter - .highlightFields(new String[] {"body"}, query, docIDs, new int[] {1}) + .highlightFields(new String[] {"body"}, query, docIDs, new int[] {1}, null) .get("body"); assertEquals(2, snippets.length); assertEquals("Just a test highlighting from postings. ", snippets[0]); @@ -734,7 +746,7 @@ protected BreakIterator getBreakIterator(String field) { Query query = new TermQuery(new Term("body", "test")); TopDocs topDocs = searcher.search(query, 10, Sort.INDEXORDER); assertEquals(1, topDocs.totalHits.value); - String[] snippets = highlighter.highlight("body", query, topDocs, 2); + String[] snippets = highlighter.highlight("body", query, topDocs, 2, null); assertEquals(1, snippets.length); assertEquals( "This is a test. Just highlighting from postings. This is also a much sillier test. Feel free to test test test test test test test.", @@ -766,7 +778,7 @@ public void testEmptyHighlights() throws Exception { int[] docIDs = new int[] {0}; String[] snippets = highlighter - .highlightFields(new String[] {"body"}, query, docIDs, new int[] {2}) + .highlightFields(new String[] {"body"}, query, docIDs, new int[] {2}, null) .get("body"); assertEquals(1, snippets.length); assertEquals("test this is. another sentence this test has. ", snippets[0]); @@ -799,7 +811,7 @@ public void testNothingAnalyzes() throws Exception { docIDs[0] = docID; String[] snippets = highlighter - .highlightFields(new String[] {"body"}, query, docIDs, new int[] {2}) + .highlightFields(new String[] {"body"}, query, docIDs, new int[] {2}, null) .get("body"); assertEquals(1, snippets.length); assertEquals(" ", snippets[0]); @@ -834,7 +846,7 @@ public void testCustomEmptyHighlights() throws Exception { int[] docIDs = new int[] {0}; String[] snippets = highlighter - .highlightFields(new String[] {"body"}, query, docIDs, new int[] {2}) + .highlightFields(new String[] {"body"}, query, docIDs, new int[] {2}, null) .get("body"); assertEquals(1, snippets.length); assertNull(snippets[0]); @@ -868,7 +880,7 @@ public void testEmptyHighlightsWhole() throws Exception { int[] docIDs = new int[] {0}; String[] snippets = highlighter - .highlightFields(new String[] {"body"}, query, docIDs, new int[] {2}) + .highlightFields(new String[] {"body"}, query, docIDs, new int[] {2}, null) .get("body"); assertEquals(1, snippets.length); assertEquals( @@ -900,7 +912,7 @@ public void testFieldIsMissing() throws Exception { int[] docIDs = new int[] {0}; String[] snippets = highlighter - .highlightFields(new String[] {"bogus"}, query, docIDs, new int[] {2}) + .highlightFields(new String[] {"bogus"}, query, docIDs, new int[] {2}, null) .get("bogus"); assertEquals(1, snippets.length); assertNull(snippets[0]); @@ -932,7 +944,7 @@ public void testFieldIsJustSpace() throws Exception { docIDs[0] = docID; String[] snippets = highlighter - .highlightFields(new String[] {"body"}, query, docIDs, new int[] {2}) + .highlightFields(new String[] {"body"}, query, docIDs, new int[] {2}, null) .get("body"); assertEquals(1, snippets.length); assertEquals(" ", snippets[0]); @@ -964,7 +976,7 @@ public void testFieldIsEmptyString() throws Exception { docIDs[0] = docID; String[] snippets = highlighter - .highlightFields(new String[] {"body"}, query, docIDs, new int[] {2}) + .highlightFields(new String[] {"body"}, query, docIDs, new int[] {2}, null) .get("body"); assertEquals(1, snippets.length); assertNull(snippets[0]); @@ -1046,7 +1058,7 @@ public void testMultipleSnippetSizes() throws Exception { .build(); Map snippets = highlighter.highlightFields( - new String[] {"title", "body"}, query, new int[] {0}, new int[] {1, 2}); + new String[] {"title", "body"}, query, new int[] {0}, new int[] {1, 2}, null); String titleHighlight = snippets.get("title")[0]; String bodyHighlight = snippets.get("body")[0]; assertEquals("This is a test. ", titleHighlight); @@ -1123,7 +1135,8 @@ public String[] format(Passage[] passages, String content) { int[] docIDs = new int[1]; docIDs[0] = topDocs.scoreDocs[0].doc; Map snippets = - highlighter.highlightFieldsAsObjects(new String[] {"body"}, query, docIDs, new int[] {1}); + highlighter.highlightFieldsAsObjects( + new String[] {"body"}, query, docIDs, new int[] {1}, null); Object[] bodySnippets = snippets.get("body"); assertEquals(1, bodySnippets.length); assertTrue( @@ -1185,17 +1198,17 @@ public void testFieldMatcherTermQuery() throws Exception { { TopDocs topDocs = searcher.search(query, 10, Sort.INDEXORDER); assertEquals(1, topDocs.totalHits.value); - String[] snippets = highlighterNoFieldMatch.highlight("title", query, topDocs, 10); + String[] snippets = highlighterNoFieldMatch.highlight("title", query, topDocs, 10, null); assertEquals(1, snippets.length); assertEquals("This is the title field.", snippets[0]); - snippets = highlighterFieldMatch.highlight("title", query, topDocs, 10); + snippets = highlighterFieldMatch.highlight("title", query, topDocs, 10, null); assertEquals(1, snippets.length); assertEquals("This is the title field.", snippets[0]); highlighterFieldMatch = overrideFieldMatcherForTests(highlighterFieldMatch, "text"::equals, "text"); - snippets = highlighterFieldMatch.highlight("title", query, topDocs, 10); + snippets = highlighterFieldMatch.highlight("title", query, topDocs, 10, null); assertEquals(1, snippets.length); assertEquals("This is the title field.", snippets[0]); highlighterFieldMatch = overrideFieldMatcherForTests(highlighterFieldMatch, null, "text"); @@ -1205,13 +1218,13 @@ public void testFieldMatcherTermQuery() throws Exception { { TopDocs topDocs = searcher.search(query, 10, Sort.INDEXORDER); assertEquals(1, topDocs.totalHits.value); - String[] snippets = highlighterNoFieldMatch.highlight("text", query, topDocs, 10); + String[] snippets = highlighterNoFieldMatch.highlight("text", query, topDocs, 10, null); assertEquals(1, snippets.length); assertEquals( "This is the text field. You can put some text if you want.", snippets[0]); - snippets = highlighterFieldMatch.highlight("text", query, topDocs, 10); + snippets = highlighterFieldMatch.highlight("text", query, topDocs, 10, null); assertEquals(1, snippets.length); assertEquals( "This is the text field. You can put some text if you want.", @@ -1219,7 +1232,7 @@ public void testFieldMatcherTermQuery() throws Exception { highlighterFieldMatch = overrideFieldMatcherForTests(highlighterFieldMatch, "title"::equals, "title"); - snippets = highlighterFieldMatch.highlight("text", query, topDocs, 10); + snippets = highlighterFieldMatch.highlight("text", query, topDocs, 10, null); assertEquals(1, snippets.length); assertEquals("This is the text field. ", snippets[0]); highlighterFieldMatch = overrideFieldMatcherForTests(highlighterFieldMatch, null, "title"); @@ -1229,17 +1242,17 @@ public void testFieldMatcherTermQuery() throws Exception { { TopDocs topDocs = searcher.search(query, 10, Sort.INDEXORDER); assertEquals(1, topDocs.totalHits.value); - String[] snippets = highlighterNoFieldMatch.highlight("category", query, topDocs, 10); + String[] snippets = highlighterNoFieldMatch.highlight("category", query, topDocs, 10, null); assertEquals(1, snippets.length); assertEquals("This is the category field.", snippets[0]); - snippets = highlighterFieldMatch.highlight("category", query, topDocs, 10); + snippets = highlighterFieldMatch.highlight("category", query, topDocs, 10, null); assertEquals(1, snippets.length); assertEquals("This is the category field.", snippets[0]); highlighterFieldMatch = overrideFieldMatcherForTests(highlighterFieldMatch, "title"::equals, "title"); - snippets = highlighterFieldMatch.highlight("category", query, topDocs, 10); + snippets = highlighterFieldMatch.highlight("category", query, topDocs, 10, null); assertEquals(1, snippets.length); assertEquals("This is the category field.", snippets[0]); highlighterFieldMatch = overrideFieldMatcherForTests(highlighterFieldMatch, null, "title"); @@ -1247,6 +1260,110 @@ public void testFieldMatcherTermQuery() throws Exception { ir.close(); } + public void testMatchedFields() throws IOException { + final Map fieldAnalyzers = new TreeMap<>(); + fieldAnalyzers.put("field", new WhitespaceAnalyzer()); + fieldAnalyzers.put("field_english", new EnglishAnalyzer()); // English stemming and stopwords + fieldAnalyzers.put( // Each letter is a token + "field_characters", + new MockAnalyzer(random(), new CharacterRunAutomaton(new RegExp(".").toAutomaton()), true)); + fieldAnalyzers.put( // Every three letters is a token + "field_tripples", + new MockAnalyzer( + random(), new CharacterRunAutomaton(new RegExp("...").toAutomaton()), true)); + Analyzer analyzer = + new DelegatingAnalyzerWrapper(Analyzer.PER_FIELD_REUSE_STRATEGY) { + @Override + public Analyzer getWrappedAnalyzer(String fieldName) { + return fieldAnalyzers.get(fieldName); + } + }; + FieldType fieldTypeMatched = new FieldType(fieldType); + fieldTypeMatched.setStored(false); // matched fields don't need to be stored + fieldTypeMatched.freeze(); + + try (Directory dir = newDirectory()) { + try (IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig(analyzer))) { + Document doc = new Document(); + doc.add(new Field("field", "dance with star", fieldType)); + doc.add(new Field("field_english", "dance with star", fieldTypeMatched)); + doc.add(new Field("field_characters", "dance with star", fieldTypeMatched)); + doc.add(new Field("field_tripples", "dance with star", fieldTypeMatched)); + writer.addDocument(doc); + } + + try (IndexReader reader = DirectoryReader.open(dir)) { + IndexSearcher searcher = newSearcher(reader); + UnifiedHighlighter highlighter = + randomUnifiedHighlighter( + searcher, analyzer, EnumSet.of(HighlightFlag.PHRASES), random().nextBoolean()); + boolean ifWeightMatches = + highlighter.getFlags("field").contains(HighlightFlag.WEIGHT_MATCHES); + + // field is highlighted based on the matches from the "field_english" + matchedFieldsTestCase( + analyzer, + searcher, + highlighter, + Set.of("field", "field_english"), + "dancing with the stars", + ifWeightMatches ? "dance with star" : "dance with star"); + + // field is highlighted based on the matches from the "field_characters" + matchedFieldsTestCase( + analyzer, + searcher, + highlighter, + Set.of("field", "field_characters"), + "danc", + ifWeightMatches + ? "dance with star" + : "dance with star"); + + // field is highlighted based on the matches from the "field_tripples" + matchedFieldsTestCase( + analyzer, + searcher, + highlighter, + Set.of("field", "field_tripples"), + "danc", + "dance with star"); + + // field is highlighted based on the matches from the "field_characters" and + // "field_tripples" + matchedFieldsTestCase( + analyzer, + searcher, + highlighter, + Set.of("field", "field_tripples", "field_characters"), + "danc", + ifWeightMatches ? "dance with star" : "dance with star"); + } + } + } + + private static void matchedFieldsTestCase( + Analyzer analyzer, + IndexSearcher searcher, + UnifiedHighlighter highlighter, + Set matchedFields, + String queryText, + String expectedSnippet) + throws IOException { + QueryBuilder queryBuilder = new QueryBuilder(analyzer); + BooleanQuery.Builder boolQueryBuilder = new BooleanQuery.Builder(); + for (String matchedField : matchedFields) { + Query fieldPhraseQuery = queryBuilder.createPhraseQuery(matchedField, queryText, 2); + boolQueryBuilder.add(fieldPhraseQuery, BooleanClause.Occur.SHOULD); + } + Query query = boolQueryBuilder.build(); + TopDocs topDocs = searcher.search(query, 10); + assertEquals(1, topDocs.totalHits.value); + String[] snippets = highlighter.highlight("field", query, topDocs, 10, matchedFields); + assertEquals(1, snippets.length); + assertEquals(expectedSnippet, snippets[0]); + } + public void testFieldMatcherMultiTermQuery() throws Exception { IndexReader ir = indexSomeFields(); IndexSearcher searcher = newSearcher(ir); @@ -1275,19 +1392,19 @@ public void testFieldMatcherMultiTermQuery() throws Exception { { TopDocs topDocs = searcher.search(query, 10, Sort.INDEXORDER); assertEquals(1, topDocs.totalHits.value); - String[] snippets = highlighterNoFieldMatch.highlight("title", query, topDocs, 10); + String[] snippets = highlighterNoFieldMatch.highlight("title", query, topDocs, 10, null); assertEquals(1, snippets.length); assertEquals("This is the title field.", snippets[0]); - snippets = highlighterFieldMatch.highlight("title", query, topDocs, 10); + snippets = highlighterFieldMatch.highlight("title", query, topDocs, 10, null); assertEquals(1, snippets.length); assertEquals("This is the title field.", snippets[0]); highlighterFieldMatch = overrideFieldMatcherForTests(highlighterFieldMatch, "text"::equals, "text"); - snippets = highlighterFieldMatch.highlight("title", query, topDocs, 10); + snippets = highlighterFieldMatch.highlight("title", query, topDocs, 10, null); assertEquals(1, snippets.length); - assertEquals("This is the title field.", snippets[0]); + assertEquals("This is the title field.", snippets[0], null); highlighterFieldMatch = overrideFieldMatcherForTests(highlighterFieldMatch, null, "text"); } @@ -1295,13 +1412,13 @@ public void testFieldMatcherMultiTermQuery() throws Exception { { TopDocs topDocs = searcher.search(query, 10, Sort.INDEXORDER); assertEquals(1, topDocs.totalHits.value); - String[] snippets = highlighterNoFieldMatch.highlight("text", query, topDocs, 10); + String[] snippets = highlighterNoFieldMatch.highlight("text", query, topDocs, 10, null); assertEquals(1, snippets.length); assertEquals( "This is the text field. You can put some text if you want.", snippets[0]); - snippets = highlighterFieldMatch.highlight("text", query, topDocs, 10); + snippets = highlighterFieldMatch.highlight("text", query, topDocs, 10, null); assertEquals(1, snippets.length); assertEquals( "This is the text field. You can put some text if you want.", @@ -1309,7 +1426,7 @@ public void testFieldMatcherMultiTermQuery() throws Exception { highlighterFieldMatch = overrideFieldMatcherForTests(highlighterFieldMatch, "title"::equals, "title"); - snippets = highlighterFieldMatch.highlight("text", query, topDocs, 10); + snippets = highlighterFieldMatch.highlight("text", query, topDocs, 10, null); assertEquals(1, snippets.length); assertEquals("This is the text field. ", snippets[0]); highlighterFieldMatch = overrideFieldMatcherForTests(highlighterFieldMatch, null, "title"); @@ -1319,17 +1436,17 @@ public void testFieldMatcherMultiTermQuery() throws Exception { { TopDocs topDocs = searcher.search(query, 10, Sort.INDEXORDER); assertEquals(1, topDocs.totalHits.value); - String[] snippets = highlighterNoFieldMatch.highlight("category", query, topDocs, 10); + String[] snippets = highlighterNoFieldMatch.highlight("category", query, topDocs, 10, null); assertEquals(1, snippets.length); assertEquals("This is the category field.", snippets[0]); - snippets = highlighterFieldMatch.highlight("category", query, topDocs, 10); + snippets = highlighterFieldMatch.highlight("category", query, topDocs, 10, null); assertEquals(1, snippets.length); assertEquals("This is the category field.", snippets[0]); highlighterFieldMatch = overrideFieldMatcherForTests(highlighterFieldMatch, "title"::equals, "title"); - snippets = highlighterFieldMatch.highlight("category", query, topDocs, 10); + snippets = highlighterFieldMatch.highlight("category", query, topDocs, 10, null); assertEquals(1, snippets.length); assertEquals("This is the category field.", snippets[0]); highlighterFieldMatch = overrideFieldMatcherForTests(highlighterFieldMatch, null, "title"); @@ -1344,7 +1461,7 @@ public void testMatchesSlopBug() throws IOException { Query query = new PhraseQuery(2, "title", "this", "is", "the", "field"); TopDocs topDocs = searcher.search(query, 10, Sort.INDEXORDER); assertEquals(1, topDocs.totalHits.value); - String[] snippets = highlighter.highlight("title", query, topDocs, 10); + String[] snippets = highlighter.highlight("title", query, topDocs, 10, null); assertEquals(1, snippets.length); if (highlighter.getFlags("title").contains(HighlightFlag.WEIGHT_MATCHES)) { assertEquals("This is the title field.", snippets[0]); @@ -1387,7 +1504,7 @@ public void testFieldMatcherPhraseQuery() throws Exception { { TopDocs topDocs = searcher.search(query, 10, Sort.INDEXORDER); assertEquals(1, topDocs.totalHits.value); - String[] snippets = highlighterNoFieldMatch.highlight("title", query, topDocs, 10); + String[] snippets = highlighterNoFieldMatch.highlight("title", query, topDocs, 10, null); assertEquals(1, snippets.length); if (highlighterNoFieldMatch.getFlags("title").contains(HighlightFlag.WEIGHT_MATCHES)) { assertEquals("This is the title field.", snippets[0]); @@ -1395,7 +1512,7 @@ public void testFieldMatcherPhraseQuery() throws Exception { assertEquals("This is the title field.", snippets[0]); } - snippets = highlighterFieldMatch.highlight("title", query, topDocs, 10); + snippets = highlighterFieldMatch.highlight("title", query, topDocs, 10, null); assertEquals(1, snippets.length); if (highlighterFieldMatch.getFlags("title").contains(HighlightFlag.WEIGHT_MATCHES)) { assertEquals("This is the title field.", snippets[0]); @@ -1405,7 +1522,7 @@ public void testFieldMatcherPhraseQuery() throws Exception { highlighterFieldMatch = overrideFieldMatcherForTests(highlighterFieldMatch, "text"::equals, "text"); - snippets = highlighterFieldMatch.highlight("title", query, topDocs, 10); + snippets = highlighterFieldMatch.highlight("title", query, topDocs, 10, null); assertEquals(1, snippets.length); if (highlighterFieldMatch.getFlags("title").contains(HighlightFlag.WEIGHT_MATCHES)) { assertEquals("This is the title field.", snippets[0]); @@ -1419,7 +1536,7 @@ public void testFieldMatcherPhraseQuery() throws Exception { { TopDocs topDocs = searcher.search(query, 10, Sort.INDEXORDER); assertEquals(1, topDocs.totalHits.value); - String[] snippets = highlighterNoFieldMatch.highlight("text", query, topDocs, 10); + String[] snippets = highlighterNoFieldMatch.highlight("text", query, topDocs, 10, null); assertEquals(1, snippets.length); if (highlighterNoFieldMatch.getFlags("text").contains(HighlightFlag.WEIGHT_MATCHES)) { assertEquals( @@ -1431,7 +1548,7 @@ public void testFieldMatcherPhraseQuery() throws Exception { snippets[0]); } - snippets = highlighterFieldMatch.highlight("text", query, topDocs, 10); + snippets = highlighterFieldMatch.highlight("text", query, topDocs, 10, null); assertEquals(1, snippets.length); if (highlighterFieldMatch.getFlags("text").contains(HighlightFlag.WEIGHT_MATCHES)) { assertEquals( @@ -1448,7 +1565,7 @@ public void testFieldMatcherPhraseQuery() throws Exception { highlighterFieldMatch = overrideFieldMatcherForTests(highlighterFieldMatch, "title"::equals, "title"); - snippets = highlighterFieldMatch.highlight("text", query, topDocs, 10); + snippets = highlighterFieldMatch.highlight("text", query, topDocs, 10, null); assertEquals(1, snippets.length); assertEquals("This is the text field. You can put some text if you want.", snippets[0]); highlighterFieldMatch = overrideFieldMatcherForTests(highlighterFieldMatch, null, "title"); @@ -1458,7 +1575,7 @@ public void testFieldMatcherPhraseQuery() throws Exception { { TopDocs topDocs = searcher.search(query, 10, Sort.INDEXORDER); assertEquals(1, topDocs.totalHits.value); - String[] snippets = highlighterNoFieldMatch.highlight("category", query, topDocs, 10); + String[] snippets = highlighterNoFieldMatch.highlight("category", query, topDocs, 10, null); assertEquals(1, snippets.length); if (highlighterNoFieldMatch.getFlags("category").contains(HighlightFlag.WEIGHT_MATCHES)) { assertEquals("This is the category field.", snippets[0]); @@ -1466,7 +1583,7 @@ public void testFieldMatcherPhraseQuery() throws Exception { assertEquals("This is the category field.", snippets[0]); } - snippets = highlighterFieldMatch.highlight("category", query, topDocs, 10); + snippets = highlighterFieldMatch.highlight("category", query, topDocs, 10, null); assertEquals(1, snippets.length); if (highlighterFieldMatch.getFlags("category").contains(HighlightFlag.WEIGHT_MATCHES)) { assertEquals("This is the category field.", snippets[0]); @@ -1476,7 +1593,7 @@ public void testFieldMatcherPhraseQuery() throws Exception { highlighterFieldMatch = overrideFieldMatcherForTests(highlighterFieldMatch, "text"::equals, "text"); - snippets = highlighterFieldMatch.highlight("category", query, topDocs, 10); + snippets = highlighterFieldMatch.highlight("category", query, topDocs, 10, null); assertEquals(1, snippets.length); if (highlighterFieldMatch.getFlags("category").contains(HighlightFlag.WEIGHT_MATCHES)) { assertEquals("This is the category field.", snippets[0]); diff --git a/lucene/highlighter/src/test/org/apache/lucene/search/uhighlight/TestUnifiedHighlighterMTQ.java b/lucene/highlighter/src/test/org/apache/lucene/search/uhighlight/TestUnifiedHighlighterMTQ.java index 76193a8e62a0..76e38e15a979 100644 --- a/lucene/highlighter/src/test/org/apache/lucene/search/uhighlight/TestUnifiedHighlighterMTQ.java +++ b/lucene/highlighter/src/test/org/apache/lucene/search/uhighlight/TestUnifiedHighlighterMTQ.java @@ -802,7 +802,7 @@ public void testWithMaxLen() throws IOException { .build(); TopDocs topDocs = searcher.search(query, 10, Sort.INDEXORDER); String[] snippets = - highlighter.highlight("body", query, topDocs, 2); // ask for 2 but we'll only get 1 + highlighter.highlight("body", query, topDocs, 2, null); // ask for 2 but we'll only get 1 assertArrayEquals(new String[] {"Alpha Bravo foo foo foo. "}, snippets); ir.close(); @@ -838,7 +838,7 @@ public void testWithMaxLenAndMultipleWildcardMatches() throws IOException { .build(); TopDocs topDocs = searcher.search(query, 10, Sort.INDEXORDER); String[] snippets = - highlighter.highlight("body", query, topDocs, 2); // ask for 2 but we'll only get 1 + highlighter.highlight("body", query, topDocs, 2, null); // ask for 2 but we'll only get 1 assertArrayEquals( new String[] {"Alpha Bravo Bravado foo foo foo."}, snippets); @@ -898,7 +898,7 @@ public boolean incrementToken() throws IOException { BooleanQuery query = queryBuilder.build(); TopDocs topDocs = searcher.search(query, 10, Sort.INDEXORDER); try { - highlighter.highlight("body", query, topDocs, 2); + highlighter.highlight("body", query, topDocs, 2, null); // don't even care what the results are; just want to test exception behavior if (fieldType == reanalysisType) { fail("Expecting EXPECTED IOException"); @@ -944,7 +944,7 @@ public void testNothingAnalyzes() throws Exception { docIDs[0] = docID; String[] snippets = highlighter - .highlightFields(new String[] {"body"}, query, docIDs, new int[] {2}) + .highlightFields(new String[] {"body"}, query, docIDs, new int[] {2}, null) .get("body"); assertEquals(1, snippets.length); assertEquals(" ", snippets[0]); @@ -1012,7 +1012,7 @@ public void testPositionSensitiveWithWildcardDoesNotHighlight() throws Exception String[] snippets = highlighter - .highlightFields(new String[] {"body"}, query, docIds, new int[] {2}) + .highlightFields(new String[] {"body"}, query, docIds, new int[] {2}, null) .get("body"); assertEquals(1, snippets.length); assertEquals("iterate insect ipswitch illinois indirect", snippets[0]); @@ -1049,7 +1049,7 @@ public void testCustomSpanQueryHighlighting() throws Exception { String[] snippets = highlighter - .highlightFields(new String[] {"body"}, query, docIds, new int[] {2}) + .highlightFields(new String[] {"body"}, query, docIds, new int[] {2}, null) .get("body"); assertEquals(1, snippets.length); assertEquals( diff --git a/lucene/highlighter/src/test/org/apache/lucene/search/uhighlight/TestUnifiedHighlighterRanking.java b/lucene/highlighter/src/test/org/apache/lucene/search/uhighlight/TestUnifiedHighlighterRanking.java index 83ce335e5a91..5d2a87829672 100644 --- a/lucene/highlighter/src/test/org/apache/lucene/search/uhighlight/TestUnifiedHighlighterRanking.java +++ b/lucene/highlighter/src/test/org/apache/lucene/search/uhighlight/TestUnifiedHighlighterRanking.java @@ -133,8 +133,8 @@ private void checkQuery(IndexSearcher is, Query query, int doc, int maxTopN) thr new TermQuery(new Term("id", Integer.toString(doc))), BooleanClause.Occur.MUST); BooleanQuery bq = queryBuilder.build(); TopDocs td = is.search(bq, 1); - p1.highlight("body", bq, td, n); - p2.highlight("body", bq, td, n + 1); + p1.highlight("body", bq, td, n, null); + p2.highlight("body", bq, td, n + 1, null); assertTrue(f2.seen.containsAll(f1.seen)); } } @@ -290,7 +290,7 @@ protected PassageScorer getScorer(String field) { Query query = new TermQuery(new Term("body", "test")); TopDocs topDocs = searcher.search(query, 10, Sort.INDEXORDER); assertEquals(1, topDocs.totalHits.value); - String[] snippets = highlighter.highlight("body", query, topDocs, 1); + String[] snippets = highlighter.highlight("body", query, topDocs, 1, null); assertEquals(1, snippets.length); assertTrue(snippets[0].startsWith("This test is a better test")); @@ -348,7 +348,7 @@ protected PassageScorer getScorer(String field) { .build(); TopDocs topDocs = searcher.search(query, 10, Sort.INDEXORDER); assertEquals(1, topDocs.totalHits.value); - String[] snippets = highlighter.highlight("body", query, topDocs, 1); + String[] snippets = highlighter.highlight("body", query, topDocs, 1, null); assertEquals(1, snippets.length); assertTrue(snippets[0].startsWith("On the other hand")); diff --git a/lucene/highlighter/src/test/org/apache/lucene/search/uhighlight/TestUnifiedHighlighterStrictPhrases.java b/lucene/highlighter/src/test/org/apache/lucene/search/uhighlight/TestUnifiedHighlighterStrictPhrases.java index c299c63e57d5..cc1be6ccbe3b 100644 --- a/lucene/highlighter/src/test/org/apache/lucene/search/uhighlight/TestUnifiedHighlighterStrictPhrases.java +++ b/lucene/highlighter/src/test/org/apache/lucene/search/uhighlight/TestUnifiedHighlighterStrictPhrases.java @@ -432,7 +432,7 @@ public void testMultiValued() throws IOException { .build(); TopDocs topDocs = searcher.search(query, 10, Sort.INDEXORDER); - String[] snippets = highlighter.highlight("body", query, topDocs, 2); + String[] snippets = highlighter.highlight("body", query, topDocs, 2, null); if (highlighter.getFlags("body").contains(HighlightFlag.WEIGHT_MATCHES)) { assertArrayEquals(new String[] {"one bravo three... four bravo six"}, snippets); @@ -455,7 +455,7 @@ public void testMultiValued() throws IOException { .build(); topDocs = searcher.search(query, 10); - snippets = highlighter.highlight("body", query, topDocs, 2); + snippets = highlighter.highlight("body", query, topDocs, 2, null); assertEquals("one bravo three... four bravo six", snippets[0]); // and add just enough slop to cross the values: @@ -469,7 +469,7 @@ public void testMultiValued() throws IOException { .build(); topDocs = searcher.search(query, 10); assertEquals(1, topDocs.totalHits.value); - snippets = highlighter.highlight("body", query, topDocs, 2); + snippets = highlighter.highlight("body", query, topDocs, 2, null); if (highlighter.getFlags("body").contains(HighlightFlag.WEIGHT_MATCHES)) { assertEquals("one bravo three... four bravo six", snippets[0]); } else { diff --git a/lucene/highlighter/src/test/org/apache/lucene/search/uhighlight/TestUnifiedHighlighterTermIntervals.java b/lucene/highlighter/src/test/org/apache/lucene/search/uhighlight/TestUnifiedHighlighterTermIntervals.java index 4ae9eba826f5..1b41b3dd347b 100644 --- a/lucene/highlighter/src/test/org/apache/lucene/search/uhighlight/TestUnifiedHighlighterTermIntervals.java +++ b/lucene/highlighter/src/test/org/apache/lucene/search/uhighlight/TestUnifiedHighlighterTermIntervals.java @@ -245,7 +245,7 @@ public void testMaxLengthWithMultivalue() throws Exception { Query query = new IntervalQuery("body", Intervals.term("field")); TopDocs topDocs = searcher.search(query, 10, Sort.INDEXORDER); assertEquals(1, topDocs.totalHits.value); - String[] snippets = highlighter.highlight("body", query, topDocs, 10); + String[] snippets = highlighter.highlight("body", query, topDocs, 10, null); assertEquals(1, snippets.length); String highlightedValue = "This is a multivalued field. Sentencetwo field."; assertEquals(highlightedValue + "... " + highlightedValue, snippets[0]); @@ -305,7 +305,7 @@ public void testMultiplePassages() throws Exception { Query query = new IntervalQuery("body", Intervals.term("test")); TopDocs topDocs = searcher.search(query, 10, Sort.INDEXORDER); assertEquals(2, topDocs.totalHits.value); - String[] snippets = highlighter.highlight("body", query, topDocs, 2); + String[] snippets = highlighter.highlight("body", query, topDocs, 2, null); assertEquals(2, snippets.length); assertEquals( "This is a test. Just a test highlighting from postings. ", snippets[0]); @@ -341,7 +341,7 @@ public void testBuddhism() throws Exception { UnifiedHighlighter.Builder uhBuilder = new UnifiedHighlighter.Builder(searcher, indexAnalyzer).withHighlightPhrasesStrictly(false); UnifiedHighlighter highlighter = randomUnifiedHighlighter(uhBuilder); - String[] snippets = highlighter.highlight("body", query, topDocs, 2); + String[] snippets = highlighter.highlight("body", query, topDocs, 2, null); assertEquals(1, snippets.length); // highlighter.getFlags("body").containsAll(EnumSet.of(HighlightFlag.WEIGHT_MATCHES, // HighlightFlag.PHRASES))) { @@ -371,7 +371,7 @@ public void testCuriousGeorge() throws Exception { UnifiedHighlighter.Builder uhBuilder = new UnifiedHighlighter.Builder(searcher, indexAnalyzer).withHighlightPhrasesStrictly(false); UnifiedHighlighter highlighter = randomUnifiedHighlighter(uhBuilder); - String[] snippets = highlighter.highlight("body", query, topDocs, 2); + String[] snippets = highlighter.highlight("body", query, topDocs, 2, null); assertEquals(1, snippets.length); assertFalse(snippets[0].contains("CuriousCurious")); int matches = 0; @@ -412,7 +412,7 @@ public void testCambridgeMA() throws Exception { new UnifiedHighlighter.Builder(searcher, indexAnalyzer) .withMaxLength(Integer.MAX_VALUE - 1); UnifiedHighlighter highlighter = randomUnifiedHighlighter(uhBuilder); - String[] snippets = highlighter.highlight("body", query, topDocs, 2); + String[] snippets = highlighter.highlight("body", query, topDocs, 2, null); assertEquals(1, snippets.length); assertTrue(snippets[0].contains("Square")); assertTrue(snippets[0].contains("Porter")); @@ -440,7 +440,7 @@ public void testPassageRanking() throws Exception { Query query = new IntervalQuery("body", Intervals.term("test")); TopDocs topDocs = searcher.search(query, 10, Sort.INDEXORDER); assertEquals(1, topDocs.totalHits.value); - String[] snippets = highlighter.highlight("body", query, topDocs, 2); + String[] snippets = highlighter.highlight("body", query, topDocs, 2, null); assertEquals(1, snippets.length); assertEquals( "This is a test. ... Feel free to test test test test test test test.", @@ -470,7 +470,7 @@ public void testBooleanMustNot() throws Exception { new UnifiedHighlighter.Builder(searcher, indexAnalyzer) .withMaxLength(Integer.MAX_VALUE - 1); UnifiedHighlighter highlighter = randomUnifiedHighlighter(uhBuilder); - String[] snippets = highlighter.highlight("body", query, topDocs, 2); + String[] snippets = highlighter.highlight("body", query, topDocs, 2, null); assertEquals(1, snippets.length); assertFalse(snippets[0].contains("both")); assertTrue(snippets[0].contains("terms")); @@ -500,7 +500,7 @@ public void testHighlightAllText() throws Exception { Query query = new IntervalQuery("body", Intervals.term("test")); TopDocs topDocs = searcher.search(query, 10, Sort.INDEXORDER); assertEquals(1, topDocs.totalHits.value); - String[] snippets = highlighter.highlight("body", query, topDocs, 2); + String[] snippets = highlighter.highlight("body", query, topDocs, 2, null); assertEquals(1, snippets.length); assertEquals( "This is a test. Just highlighting from postings. This is also a much sillier test. Feel free to test test test test test test test.", @@ -535,7 +535,7 @@ public void testSpecificDocIDs() throws Exception { docIDs[1] = hits[1].doc; String[] snippets = highlighter - .highlightFields(new String[] {"body"}, query, docIDs, new int[] {1}) + .highlightFields(new String[] {"body"}, query, docIDs, new int[] {1}, null) .get("body"); assertEquals(2, snippets.length); assertEquals("Just a test highlighting from postings. ", snippets[0]); @@ -580,7 +580,7 @@ protected BreakIterator getBreakIterator(String field) { Query query = new IntervalQuery("body", Intervals.term("test")); TopDocs topDocs = searcher.search(query, 10, Sort.INDEXORDER); assertEquals(1, topDocs.totalHits.value); - String[] snippets = highlighter.highlight("body", query, topDocs, 2); + String[] snippets = highlighter.highlight("body", query, topDocs, 2, null); assertEquals(1, snippets.length); assertEquals( "This is a test. Just highlighting from postings. This is also a much sillier test. Feel free to test test test test test test test.", @@ -611,7 +611,7 @@ public void testEmptyHighlights() throws Exception { int[] docIDs = new int[] {0}; String[] snippets = highlighter - .highlightFields(new String[] {"body"}, query, docIDs, new int[] {2}) + .highlightFields(new String[] {"body"}, query, docIDs, new int[] {2}, null) .get("body"); assertEquals(1, snippets.length); assertEquals("test this is. another sentence this test has. ", snippets[0]); @@ -642,7 +642,7 @@ public void testNothingAnalyzes() throws Exception { docIDs[0] = docID; String[] snippets = highlighter - .highlightFields(new String[] {"body"}, query, docIDs, new int[] {2}) + .highlightFields(new String[] {"body"}, query, docIDs, new int[] {2}, null) .get("body"); assertEquals(1, snippets.length); assertEquals(" ", snippets[0]); @@ -676,7 +676,7 @@ public void testCustomEmptyHighlights() throws Exception { int[] docIDs = new int[] {0}; String[] snippets = highlighter - .highlightFields(new String[] {"body"}, query, docIDs, new int[] {2}) + .highlightFields(new String[] {"body"}, query, docIDs, new int[] {2}, null) .get("body"); assertEquals(1, snippets.length); assertNull(snippets[0]); @@ -709,7 +709,7 @@ public void testEmptyHighlightsWhole() throws Exception { int[] docIDs = new int[] {0}; String[] snippets = highlighter - .highlightFields(new String[] {"body"}, query, docIDs, new int[] {2}) + .highlightFields(new String[] {"body"}, query, docIDs, new int[] {2}, null) .get("body"); assertEquals(1, snippets.length); assertEquals( @@ -740,7 +740,7 @@ public void testFieldIsMissing() throws Exception { int[] docIDs = new int[] {0}; String[] snippets = highlighter - .highlightFields(new String[] {"bogus"}, query, docIDs, new int[] {2}) + .highlightFields(new String[] {"bogus"}, query, docIDs, new int[] {2}, null) .get("bogus"); assertEquals(1, snippets.length); assertNull(snippets[0]); @@ -771,7 +771,7 @@ public void testFieldIsJustSpace() throws Exception { docIDs[0] = docID; String[] snippets = highlighter - .highlightFields(new String[] {"body"}, query, docIDs, new int[] {2}) + .highlightFields(new String[] {"body"}, query, docIDs, new int[] {2}, null) .get("body"); assertEquals(1, snippets.length); assertEquals(" ", snippets[0]); @@ -802,7 +802,7 @@ public void testFieldIsEmptyString() throws Exception { docIDs[0] = docID; String[] snippets = highlighter - .highlightFields(new String[] {"body"}, query, docIDs, new int[] {2}) + .highlightFields(new String[] {"body"}, query, docIDs, new int[] {2}, null) .get("body"); assertEquals(1, snippets.length); assertNull(snippets[0]); @@ -924,7 +924,8 @@ public String[] format(Passage[] passages, String content) { int[] docIDs = new int[1]; docIDs[0] = topDocs.scoreDocs[0].doc; Map snippets = - highlighter.highlightFieldsAsObjects(new String[] {"body"}, query, docIDs, new int[] {1}); + highlighter.highlightFieldsAsObjects( + new String[] {"body"}, query, docIDs, new int[] {1}, null); Object[] bodySnippets = snippets.get("body"); assertEquals(1, bodySnippets.length); assertTrue( @@ -976,7 +977,7 @@ public void testMatchesSlopBug() throws IOException { Intervals.term("field")))); TopDocs topDocs = searcher.search(query, 10, Sort.INDEXORDER); assertEquals(1, topDocs.totalHits.value); - String[] snippets = highlighter.highlight("title", query, topDocs, 10); + String[] snippets = highlighter.highlight("title", query, topDocs, 10, null); assertEquals(1, snippets.length); // All flags are enabled. assertEquals( diff --git a/lucene/highlighter/src/test/org/apache/lucene/search/uhighlight/TestUnifiedHighlighterTermVec.java b/lucene/highlighter/src/test/org/apache/lucene/search/uhighlight/TestUnifiedHighlighterTermVec.java index 35a638e2aa12..d73e5fb67373 100644 --- a/lucene/highlighter/src/test/org/apache/lucene/search/uhighlight/TestUnifiedHighlighterTermVec.java +++ b/lucene/highlighter/src/test/org/apache/lucene/search/uhighlight/TestUnifiedHighlighterTermVec.java @@ -184,7 +184,7 @@ protected Set getFlags(String field) { TermQuery query = new TermQuery(new Term("body", "vectors")); TopDocs topDocs = searcher.search(query, 10, Sort.INDEXORDER); try { - highlighter.highlight("body", query, topDocs, 1); // should throw + highlighter.highlight("body", query, topDocs, 1, null); // should throw } finally { ir.close(); } diff --git a/lucene/highlighter/src/test/org/apache/lucene/search/uhighlight/visibility/TestUnifiedHighlighterExtensibility.java b/lucene/highlighter/src/test/org/apache/lucene/search/uhighlight/visibility/TestUnifiedHighlighterExtensibility.java index d6ce6464d4cb..9f0536dbad98 100644 --- a/lucene/highlighter/src/test/org/apache/lucene/search/uhighlight/visibility/TestUnifiedHighlighterExtensibility.java +++ b/lucene/highlighter/src/test/org/apache/lucene/search/uhighlight/visibility/TestUnifiedHighlighterExtensibility.java @@ -104,9 +104,13 @@ public void testUnifiedHighlighterExtensibility() { @Override protected Map highlightFieldsAsObjects( - String[] fieldsIn, Query query, int[] docIdsIn, int[] maxPassagesIn) + String[] fieldsIn, + Query query, + int[] docIdsIn, + int[] maxPassagesIn, + List> matchedFieldsIn) throws IOException { - return super.highlightFieldsAsObjects(fieldsIn, query, docIdsIn, maxPassagesIn); + return super.highlightFieldsAsObjects(fieldsIn, query, docIdsIn, maxPassagesIn, null); } @Override @@ -163,7 +167,11 @@ protected List loadFieldValues( @Override protected FieldHighlighter getFieldHighlighter( - String field, Query query, Set allTerms, int maxPassages) { + String field, + Query query, + Set allTerms, + int maxPassages, + Set matchedFields) { // THIS IS A COPY of the superclass impl; but use CustomFieldHighlighter UHComponents components = getHighlightComponents(field, query, allTerms); OffsetSource offsetSource = getOptimizedOffsetSource(components);