Introduce IndexSearcher#searchLeaf(LeafReaderContext, Weight, Collect…

…or) method (apache#13603) There's a couple of places in the codebase where we extend `IndexSearcher` to customize per leaf behaviour, and in order to do that, we need to override the entire search method that loops through the leaves. A good example is `ScorerIndexSearcher`. Adding a `searchLeaf` method that provides the per leaf behaviour makes those cases a little easier to deal with.
javanna · Jul 30, 2024 · d650dab · d650dab
1 parent 7edd646
commit d650dab
Show file tree

Hide file tree

Showing 5 changed files with 59 additions and 41 deletions.
diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt
@@ -14,6 +14,10 @@ API Changes
 
 * GITHUB#13612: Hunspell: add Suggester#proceedPastRep to avoid losing relevant suggestions. (Peter Gromov)
 
+* GITHUB#13603: Introduced `IndexSearcher#searchLeaf(LeafReaderContext, Weight, Collector)` protected method to
+  facilitate customizing per-leaf behavior of search without requiring to override
+  `search(LeafReaderContext[], Weight, Collector)` which requires overriding the entire loop across the leaves (Luca Cavanna)
+
 New Features
 ---------------------
 

diff --git a/lucene/core/src/java/org/apache/lucene/search/IndexSearcher.java b/lucene/core/src/java/org/apache/lucene/search/IndexSearcher.java
@@ -712,7 +712,8 @@ private <C extends Collector, T> T search(
   /**
    * Lower-level search API.
    *
-   * <p>{@link LeafCollector#collect(int)} is called for every document. <br>
+   * <p>{@link #searchLeaf(LeafReaderContext, Weight, Collector)} is called for every leaf
+   * partition. <br>
    *
    * <p>NOTE: this method executes the searches on all given leaves exclusively. To search across
    * all the searchers leaves use {@link #leafContexts}.
@@ -732,38 +733,54 @@ protected void search(List<LeafReaderContext> leaves, Weight weight, Collector c
     // threaded...? the Collector could be sync'd?
     // always use single thread:
     for (LeafReaderContext ctx : leaves) { // search each subreader
-      final LeafCollector leafCollector;
+      searchLeaf(ctx, weight, collector);
+    }
+  }
+
+  /**
+   * Lower-level search API
+   *
+   * <p>{@link LeafCollector#collect(int)} is called for every document. <br>
+   *
+   * @param ctx the leaf to execute the search against
+   * @param weight to match document
+   * @param collector to receive hits
+   * @throws TooManyClauses If a query would exceed {@link IndexSearcher#getMaxClauseCount()}
+   *     clauses.
+   */
+  protected void searchLeaf(LeafReaderContext ctx, Weight weight, Collector collector)  throws IOException {
+    final LeafCollector leafCollector;
+    try {
+      leafCollector = collector.getLeafCollector(ctx);
+    } catch (
+            @SuppressWarnings("unused")
+            CollectionTerminatedException e) {
+      // there is no doc of interest in this reader context
+      // continue with the following leaf
+      return;
+    }
+    BulkScorer scorer = weight.bulkScorer(ctx);
+    if (scorer != null) {
+      if (queryTimeout != null) {
+        scorer = new TimeLimitingBulkScorer(scorer, queryTimeout);
+      }
       try {
-        leafCollector = collector.getLeafCollector(ctx);
+        scorer.score(leafCollector, ctx.reader().getLiveDocs());
       } catch (
-          @SuppressWarnings("unused")
-          CollectionTerminatedException e) {
-        // there is no doc of interest in this reader context
+              @SuppressWarnings("unused")
+              CollectionTerminatedException e) {
+        // collection was terminated prematurely
         // continue with the following leaf
-        continue;
-      }
-      BulkScorer scorer = weight.bulkScorer(ctx);
-      if (scorer != null) {
-        if (queryTimeout != null) {
-          scorer = new TimeLimitingBulkScorer(scorer, queryTimeout);
-        }
-        try {
-          scorer.score(leafCollector, ctx.reader().getLiveDocs());
-        } catch (
-            @SuppressWarnings("unused")
-            CollectionTerminatedException e) {
-          // collection was terminated prematurely
-          // continue with the following leaf
-        } catch (
-            @SuppressWarnings("unused")
-            TimeLimitingBulkScorer.TimeExceededException e) {
-          partialResult = true;
-        }
+      } catch (
+              @SuppressWarnings("unused")
+              TimeLimitingBulkScorer.TimeExceededException e) {
+        partialResult = true;
       }
-      // Note: this is called if collection ran successfully, including the above special cases of
-      // CollectionTerminatedException and TimeExceededException, but no other exception.
-      leafCollector.finish();
     }
+    // Note: this is called if collection ran successfully, including the above special cases of
+    // CollectionTerminatedException and TimeExceededException, but no other exception.
+    leafCollector.finish();
+
   }
 
   /**

diff --git a/lucene/core/src/test/org/apache/lucene/search/TestTopDocsMerge.java b/lucene/core/src/test/org/apache/lucene/search/TestTopDocsMerge.java
@@ -43,26 +43,26 @@
 public class TestTopDocsMerge extends LuceneTestCase {
 
   private static class ShardSearcher extends IndexSearcher {
-    private final List<LeafReaderContext> ctx;
+    private final LeafReaderContext ctx;
 
     public ShardSearcher(LeafReaderContext ctx, IndexReaderContext parent) {
       super(parent);
-      this.ctx = Collections.singletonList(ctx);
+      this.ctx = ctx;
     }
 
     public void search(Weight weight, Collector collector) throws IOException {
-      search(ctx, weight, collector);
+      searchLeaf(ctx, weight, collector);
     }
 
     public TopDocs search(Weight weight, int topN) throws IOException {
       TopScoreDocCollector collector = TopScoreDocCollector.create(topN, Integer.MAX_VALUE);
-      search(ctx, weight, collector);
+      searchLeaf(ctx, weight, collector);
       return collector.topDocs();
     }
 
     @Override
     public String toString() {
-      return "ShardSearcher(" + ctx.get(0) + ")";
+      return "ShardSearcher(" + ctx + ")";
     }
   }
 

diff --git a/lucene/grouping/src/test/org/apache/lucene/search/grouping/TestGrouping.java b/lucene/grouping/src/test/org/apache/lucene/search/grouping/TestGrouping.java
@@ -1571,20 +1571,20 @@ private void assertEquals(
   }
 
   private static class ShardSearcher extends IndexSearcher {
-    private final List<LeafReaderContext> ctx;
+    private final LeafReaderContext ctx;
 
     public ShardSearcher(LeafReaderContext ctx, IndexReaderContext parent) {
       super(parent);
-      this.ctx = Collections.singletonList(ctx);
+      this.ctx = ctx;
     }
 
     public void search(Weight weight, Collector collector) throws IOException {
-      search(ctx, weight, collector);
+      searchLeaf(ctx, weight, collector);
     }
 
     @Override
     public String toString() {
-      return "ShardSearcher(" + ctx.get(0).reader() + ")";
+      return "ShardSearcher(" + ctx.reader() + ")";
     }
   }
 }
diff --git a/lucene/test-framework/src/java/org/apache/lucene/tests/search/ScorerIndexSearcher.java b/lucene/test-framework/src/java/org/apache/lucene/tests/search/ScorerIndexSearcher.java
@@ -53,9 +53,7 @@ public ScorerIndexSearcher(IndexReader r) {
   }
 
   @Override
-  protected void search(List<LeafReaderContext> leaves, Weight weight, Collector collector)
-      throws IOException {
-    for (LeafReaderContext ctx : leaves) { // search each subreader
+  protected void searchLeaf(LeafReaderContext ctx, Weight weight, Collector collector) throws IOException {
       // we force the use of Scorer (not BulkScorer) to make sure
       // that the scorer passed to LeafCollector.setScorer supports
       // Scorer.getChildren
@@ -74,5 +72,4 @@ protected void search(List<LeafReaderContext> leaves, Weight weight, Collector c
         }
       }
     }
-  }
 }