Skip to content

Commit

Permalink
Introduce IndexSearcher#searchLeaf(LeafReaderContext, Weight, Collect…
Browse files Browse the repository at this point in the history
…or) method (apache#13603)

There's a couple of places in the codebase where we extend `IndexSearcher` to customize
per leaf behaviour, and in order to do that, we need to override the entire search method
that loops through the leaves. A good example is `ScorerIndexSearcher`.

Adding a `searchLeaf` method that provides the per leaf behaviour makes those cases a little
easier to deal with.
  • Loading branch information
javanna committed Jul 30, 2024
1 parent 7edd646 commit d650dab
Show file tree
Hide file tree
Showing 5 changed files with 59 additions and 41 deletions.
4 changes: 4 additions & 0 deletions lucene/CHANGES.txt
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,10 @@ API Changes

* GITHUB#13612: Hunspell: add Suggester#proceedPastRep to avoid losing relevant suggestions. (Peter Gromov)

* GITHUB#13603: Introduced `IndexSearcher#searchLeaf(LeafReaderContext, Weight, Collector)` protected method to
facilitate customizing per-leaf behavior of search without requiring to override
`search(LeafReaderContext[], Weight, Collector)` which requires overriding the entire loop across the leaves (Luca Cavanna)

New Features
---------------------

Expand Down
73 changes: 45 additions & 28 deletions lucene/core/src/java/org/apache/lucene/search/IndexSearcher.java
Original file line number Diff line number Diff line change
Expand Up @@ -712,7 +712,8 @@ private <C extends Collector, T> T search(
/**
* Lower-level search API.
*
* <p>{@link LeafCollector#collect(int)} is called for every document. <br>
* <p>{@link #searchLeaf(LeafReaderContext, Weight, Collector)} is called for every leaf
* partition. <br>
*
* <p>NOTE: this method executes the searches on all given leaves exclusively. To search across
* all the searchers leaves use {@link #leafContexts}.
Expand All @@ -732,38 +733,54 @@ protected void search(List<LeafReaderContext> leaves, Weight weight, Collector c
// threaded...? the Collector could be sync'd?
// always use single thread:
for (LeafReaderContext ctx : leaves) { // search each subreader
final LeafCollector leafCollector;
searchLeaf(ctx, weight, collector);
}
}

/**
* Lower-level search API
*
* <p>{@link LeafCollector#collect(int)} is called for every document. <br>
*
* @param ctx the leaf to execute the search against
* @param weight to match document
* @param collector to receive hits
* @throws TooManyClauses If a query would exceed {@link IndexSearcher#getMaxClauseCount()}
* clauses.
*/
protected void searchLeaf(LeafReaderContext ctx, Weight weight, Collector collector) throws IOException {
final LeafCollector leafCollector;
try {
leafCollector = collector.getLeafCollector(ctx);
} catch (
@SuppressWarnings("unused")
CollectionTerminatedException e) {
// there is no doc of interest in this reader context
// continue with the following leaf
return;
}
BulkScorer scorer = weight.bulkScorer(ctx);
if (scorer != null) {
if (queryTimeout != null) {
scorer = new TimeLimitingBulkScorer(scorer, queryTimeout);
}
try {
leafCollector = collector.getLeafCollector(ctx);
scorer.score(leafCollector, ctx.reader().getLiveDocs());
} catch (
@SuppressWarnings("unused")
CollectionTerminatedException e) {
// there is no doc of interest in this reader context
@SuppressWarnings("unused")
CollectionTerminatedException e) {
// collection was terminated prematurely
// continue with the following leaf
continue;
}
BulkScorer scorer = weight.bulkScorer(ctx);
if (scorer != null) {
if (queryTimeout != null) {
scorer = new TimeLimitingBulkScorer(scorer, queryTimeout);
}
try {
scorer.score(leafCollector, ctx.reader().getLiveDocs());
} catch (
@SuppressWarnings("unused")
CollectionTerminatedException e) {
// collection was terminated prematurely
// continue with the following leaf
} catch (
@SuppressWarnings("unused")
TimeLimitingBulkScorer.TimeExceededException e) {
partialResult = true;
}
} catch (
@SuppressWarnings("unused")
TimeLimitingBulkScorer.TimeExceededException e) {
partialResult = true;
}
// Note: this is called if collection ran successfully, including the above special cases of
// CollectionTerminatedException and TimeExceededException, but no other exception.
leafCollector.finish();
}
// Note: this is called if collection ran successfully, including the above special cases of
// CollectionTerminatedException and TimeExceededException, but no other exception.
leafCollector.finish();

}

/**
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -43,26 +43,26 @@
public class TestTopDocsMerge extends LuceneTestCase {

private static class ShardSearcher extends IndexSearcher {
private final List<LeafReaderContext> ctx;
private final LeafReaderContext ctx;

public ShardSearcher(LeafReaderContext ctx, IndexReaderContext parent) {
super(parent);
this.ctx = Collections.singletonList(ctx);
this.ctx = ctx;
}

public void search(Weight weight, Collector collector) throws IOException {
search(ctx, weight, collector);
searchLeaf(ctx, weight, collector);
}

public TopDocs search(Weight weight, int topN) throws IOException {
TopScoreDocCollector collector = TopScoreDocCollector.create(topN, Integer.MAX_VALUE);
search(ctx, weight, collector);
searchLeaf(ctx, weight, collector);
return collector.topDocs();
}

@Override
public String toString() {
return "ShardSearcher(" + ctx.get(0) + ")";
return "ShardSearcher(" + ctx + ")";
}
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1571,20 +1571,20 @@ private void assertEquals(
}

private static class ShardSearcher extends IndexSearcher {
private final List<LeafReaderContext> ctx;
private final LeafReaderContext ctx;

public ShardSearcher(LeafReaderContext ctx, IndexReaderContext parent) {
super(parent);
this.ctx = Collections.singletonList(ctx);
this.ctx = ctx;
}

public void search(Weight weight, Collector collector) throws IOException {
search(ctx, weight, collector);
searchLeaf(ctx, weight, collector);
}

@Override
public String toString() {
return "ShardSearcher(" + ctx.get(0).reader() + ")";
return "ShardSearcher(" + ctx.reader() + ")";
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -53,9 +53,7 @@ public ScorerIndexSearcher(IndexReader r) {
}

@Override
protected void search(List<LeafReaderContext> leaves, Weight weight, Collector collector)
throws IOException {
for (LeafReaderContext ctx : leaves) { // search each subreader
protected void searchLeaf(LeafReaderContext ctx, Weight weight, Collector collector) throws IOException {
// we force the use of Scorer (not BulkScorer) to make sure
// that the scorer passed to LeafCollector.setScorer supports
// Scorer.getChildren
Expand All @@ -74,5 +72,4 @@ protected void search(List<LeafReaderContext> leaves, Weight weight, Collector c
}
}
}
}
}

0 comments on commit d650dab

Please sign in to comment.