Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add codec compare in shouldUpgrade method of OpenSearchMergePolicy #17491

Draft
wants to merge 2 commits into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions CHANGELOG-3.0.md
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
- Use Lucene `BM25Similarity` as default since the `LegacyBM25Similarity` is marked as deprecated ([#17306](https://github.com/opensearch-project/OpenSearch/pull/17306))
- Wildcard field index only 3gram of the input data [#17349](https://github.com/opensearch-project/OpenSearch/pull/17349)
- Use BC libraries to parse PEM files, increase key length, allow general use of known cryptographic binary extensions, remove unused BC dependencies ([#3420](https://github.com/opensearch-project/OpenSearch/pull/14912))
- Add segments codec check in UpgradeRequest to decide should upgrade segment or not ([#17491](https://github.com/opensearch-project/OpenSearch/pull/17491))

### Deprecated

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2371,7 +2371,7 @@ private IndexWriterConfig getIndexWriterConfig() {
}

iwc.setCheckPendingFlushUpdate(config().getIndexSettings().isCheckPendingFlushEnabled());
iwc.setMergePolicy(new OpenSearchMergePolicy(mergePolicy));
iwc.setMergePolicy(new OpenSearchMergePolicy(mergePolicy, engineConfig.getCodec()));
iwc.setSimilarity(engineConfig.getSimilarity());
iwc.setRAMBufferSizeMB(engineConfig.getIndexingBufferSize().getMbFrac());
iwc.setCodec(engineConfig.getCodec());
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,8 @@

import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
import org.apache.lucene.codecs.Codec;
import org.apache.lucene.codecs.StoredFieldsWriter;
import org.apache.lucene.index.FilterMergePolicy;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.MergePolicy;
Expand Down Expand Up @@ -71,9 +73,12 @@ public final class OpenSearchMergePolicy extends FilterMergePolicy {

private static final int MAX_CONCURRENT_UPGRADE_MERGES = 5;

private final Codec actualCodec;

/** @param delegate the merge policy to wrap */
public OpenSearchMergePolicy(MergePolicy delegate) {
public OpenSearchMergePolicy(MergePolicy delegate, Codec codec) {
super(delegate);
this.actualCodec = codec;
}

/** return the wrapped merge policy */
Expand All @@ -92,11 +97,34 @@ private boolean shouldUpgrade(SegmentCommitInfo info) {
// Always upgrade segment if Lucene's major version is too old
return true;
}

if (upgradeOnlyAncientSegments == false && cur.minor > old.minor) {
// If it's only a minor version difference, and we are not upgrading only ancient segments,
// also upgrade:
return true;
}

if (!info.info.getCodec().getName().equals(actualCodec.getName())) {
// If difference in Codec we should upgrade segment
return true;
}

if (info.info.getAttributes() != null) {
// Only way to compare Codec.mode(BEST_SPEED or BEST_COMPRESSION) is try to create fields writer
// and catch an IllegalStateException if they are different, and ignore others exceptions
try {
StoredFieldsWriter writer = actualCodec.storedFieldsFormat().fieldsWriter(info.info.dir, info.info, null);
writer.close();
} catch (IllegalStateException e) {
// fieldsWriter throw an exception if previous mode not equal to current
logger.debug("Received expected IllegalStateException", e);
return true;
} catch (IOException e) {
// Most of the time it will be java.nio.file.FileAlreadyExistsException, which means codec mode same as should be,
// otherwise it is something wrong and will be processed somewhere else
}
}

// Version matches, or segment is not ancient and we are only upgrading ancient segments:
return false;
}
Expand All @@ -115,7 +143,7 @@ public MergeSpecification findForcedMerges(

if (shouldUpgrade(info)) {

// TODO: Use IndexUpgradeMergePolicy instead. We should be comparing codecs,
// TODO: Use IndexUpgradeMergePolicy instead
// for now we just assume every minor upgrade has a new format.
logger.debug("Adding segment {} to be upgraded", info.info.name);
spec.add(new OneMerge(Collections.singletonList(info)));
Expand Down
Loading