From a3270ac6e64012ec0a5b6864cdfcf190a1a36346 Mon Sep 17 00:00:00 2001 From: Mike McCandless Date: Fri, 13 May 2016 04:29:48 -0400 Subject: [PATCH] LUCENE-6766: keep SortingMergePolicy for solr back-compat; fix Solr tests; fix precommit failures --- .../lucene/codecs/lucene60/package.html | 25 +++++++++++++++++++ .../org/apache/lucene/index/CheckIndex.java | 6 +++++ .../org/apache/lucene/index/DocIDMerger.java | 7 +++++- .../org/apache/lucene/index/MergeState.java | 4 +++ .../lucene/index/MultiPostingsEnum.java | 3 +++ .../org/apache/lucene/index/MultiSorter.java | 3 ++- .../index/SlowCompositeReaderWrapper.java | 5 ---- .../java/org/apache/lucene/index/package.html | 18 ------------- .../apache/solr/core/SchemaCodecFactory.java | 6 ++--- .../apache/solr/index/SortingMergePolicy.java | 20 +-------------- .../solr/index/SortingMergePolicyFactory.java | 1 - .../apache/solr/search/SolrIndexSearcher.java | 2 +- .../solr/update/DefaultSolrCoreState.java | 4 +-- .../apache/solr/update/SolrIndexConfig.java | 17 ++++++++++--- .../org/apache/solr/search/TestDocSet.java | 8 +++++- .../solr/update/SolrIndexConfigTest.java | 2 +- 16 files changed, 74 insertions(+), 57 deletions(-) create mode 100644 lucene/backward-codecs/src/java/org/apache/lucene/codecs/lucene60/package.html diff --git a/lucene/backward-codecs/src/java/org/apache/lucene/codecs/lucene60/package.html b/lucene/backward-codecs/src/java/org/apache/lucene/codecs/lucene60/package.html new file mode 100644 index 00000000000..6b4e234826d --- /dev/null +++ b/lucene/backward-codecs/src/java/org/apache/lucene/codecs/lucene60/package.html @@ -0,0 +1,25 @@ + + + + + + + +Lucene 6.0 file format. + + diff --git a/lucene/core/src/java/org/apache/lucene/index/CheckIndex.java b/lucene/core/src/java/org/apache/lucene/index/CheckIndex.java index d752c257c1e..9dee2d14e1c 100644 --- a/lucene/core/src/java/org/apache/lucene/index/CheckIndex.java +++ b/lucene/core/src/java/org/apache/lucene/index/CheckIndex.java @@ -385,6 +385,8 @@ public final class CheckIndex implements Closeable { * Status from testing index sort */ public static final class IndexSortStatus { + IndexSortStatus() { + } /** Exception thrown during term index test (null on success) */ public Throwable error = null; @@ -822,6 +824,10 @@ public final class CheckIndex implements Closeable { return result; } + /** + * Tests index sort order. + * @lucene.experimental + */ public static Status.IndexSortStatus testSort(CodecReader reader, Sort sort, PrintStream infoStream, boolean failFast) throws IOException { // This segment claims its documents are sorted according to the incoming sort ... let's make sure: diff --git a/lucene/core/src/java/org/apache/lucene/index/DocIDMerger.java b/lucene/core/src/java/org/apache/lucene/index/DocIDMerger.java index 84f08c7cf6c..07c9e725270 100644 --- a/lucene/core/src/java/org/apache/lucene/index/DocIDMerger.java +++ b/lucene/core/src/java/org/apache/lucene/index/DocIDMerger.java @@ -42,10 +42,14 @@ public class DocIDMerger { private T current; private int nextIndex; + /** Represents one sub-reader being merged */ public static abstract class Sub { + /** Mapped doc ID */ public int mappedDocID; + final MergeState.DocMap docMap; + /** Sole constructor */ public Sub(MergeState.DocMap docMap) { this.docMap = docMap; } @@ -54,10 +58,10 @@ public class DocIDMerger { public abstract int nextDoc(); } + /** Construct this from the provided subs, specifying the maximum sub count */ public DocIDMerger(List subs, int maxCount, boolean indexIsSorted) { this.subs = subs; - // nocommit safe? if (indexIsSorted && maxCount > 1) { queue = new PriorityQueue(maxCount) { @Override @@ -74,6 +78,7 @@ public class DocIDMerger { reset(); } + /** Construct this from the provided subs */ public DocIDMerger(List subs, boolean indexIsSorted) { this(subs, subs.size(), indexIsSorted); } diff --git a/lucene/core/src/java/org/apache/lucene/index/MergeState.java b/lucene/core/src/java/org/apache/lucene/index/MergeState.java index 12310c6668d..3723f19476f 100644 --- a/lucene/core/src/java/org/apache/lucene/index/MergeState.java +++ b/lucene/core/src/java/org/apache/lucene/index/MergeState.java @@ -264,6 +264,10 @@ public class MergeState { /** A map of doc IDs. */ public static abstract class DocMap { + /** Sole constructor */ + public DocMap() { + } + /** Return the mapped docID or -1 if the given doc is not mapped. */ public abstract int get(int docID); } diff --git a/lucene/core/src/java/org/apache/lucene/index/MultiPostingsEnum.java b/lucene/core/src/java/org/apache/lucene/index/MultiPostingsEnum.java index 42e3f41cdd7..062fc303c09 100644 --- a/lucene/core/src/java/org/apache/lucene/index/MultiPostingsEnum.java +++ b/lucene/core/src/java/org/apache/lucene/index/MultiPostingsEnum.java @@ -171,6 +171,9 @@ public final class MultiPostingsEnum extends PostingsEnum { /** {@link ReaderSlice} describing how this sub-reader * fits into the composite reader. */ public ReaderSlice slice; + + EnumWithSlice() { + } @Override public String toString() { diff --git a/lucene/core/src/java/org/apache/lucene/index/MultiSorter.java b/lucene/core/src/java/org/apache/lucene/index/MultiSorter.java index 6a5eb5a0d38..8f5be86ede2 100644 --- a/lucene/core/src/java/org/apache/lucene/index/MultiSorter.java +++ b/lucene/core/src/java/org/apache/lucene/index/MultiSorter.java @@ -21,6 +21,7 @@ import java.io.IOException; import java.util.ArrayList; import java.util.List; +import org.apache.lucene.index.MergeState.DocMap; import org.apache.lucene.index.MergeState; import org.apache.lucene.search.LeafFieldComparator; import org.apache.lucene.search.Sort; @@ -32,7 +33,7 @@ import org.apache.lucene.util.packed.PackedLongValues; final class MultiSorter { - /** Does a merge sort of the leaves of the incoming reader, returning {@link MergeState#DocMap} to map each leaf's + /** Does a merge sort of the leaves of the incoming reader, returning {@link DocMap} to map each leaf's * documents into the merged segment. The documents for each incoming leaf reader must already be sorted by the same sort! */ static MergeState.DocMap[] sort(Sort sort, List readers) throws IOException { diff --git a/lucene/misc/src/java/org/apache/lucene/index/SlowCompositeReaderWrapper.java b/lucene/misc/src/java/org/apache/lucene/index/SlowCompositeReaderWrapper.java index 8762d731b26..18545fb96c9 100644 --- a/lucene/misc/src/java/org/apache/lucene/index/SlowCompositeReaderWrapper.java +++ b/lucene/misc/src/java/org/apache/lucene/index/SlowCompositeReaderWrapper.java @@ -71,11 +71,6 @@ public final class SlowCompositeReaderWrapper extends LeafReader { if (getFieldInfos().hasPointValues()) { throw new IllegalArgumentException("cannot wrap points"); } - for(LeafReaderContext context : reader.leaves()) { - if (context.reader().getIndexSort() != null) { - throw new IllegalArgumentException("cannot use index sort"); - } - } fields = MultiFields.getFields(in); in.registerParentReader(this); this.merging = merging; diff --git a/lucene/misc/src/java/org/apache/lucene/index/package.html b/lucene/misc/src/java/org/apache/lucene/index/package.html index dc9cbb7b676..33ce964eaf2 100644 --- a/lucene/misc/src/java/org/apache/lucene/index/package.html +++ b/lucene/misc/src/java/org/apache/lucene/index/package.html @@ -18,23 +18,5 @@ Misc index tools and index support. - -SortingMergePolicy: -

Provides index sorting capablities. The application can use any -Sort specification, e.g. to sort by fields using DocValues or FieldCache, or to -reverse the order of the documents (by using SortField.Type.DOC in reverse). -Multi-level sorts can be specified the same way you would when searching, by -building Sort from multiple SortFields. - -

{@link org.apache.lucene.index.SortingMergePolicy} can be used to -make Lucene sort segments before merging them. This will ensure that every -segment resulting from a merge will be sorted according to the provided -{@link org.apache.lucene.search.Sort}. This however makes merging and -thus indexing slower. - -

Sorted segments allow for early query termination when the sort order -matches index order. This makes query execution faster since not all documents -need to be visited. Please note that this is an expert feature and should not -be used without a deep understanding of Lucene merging and document collection. diff --git a/solr/core/src/java/org/apache/solr/core/SchemaCodecFactory.java b/solr/core/src/java/org/apache/solr/core/SchemaCodecFactory.java index dc423d99212..c575ecbca10 100644 --- a/solr/core/src/java/org/apache/solr/core/SchemaCodecFactory.java +++ b/solr/core/src/java/org/apache/solr/core/SchemaCodecFactory.java @@ -24,9 +24,9 @@ import org.apache.lucene.codecs.Codec; import org.apache.lucene.codecs.DocValuesFormat; import org.apache.lucene.codecs.PostingsFormat; import org.apache.lucene.codecs.lucene50.Lucene50StoredFieldsFormat.Mode; -import org.apache.lucene.codecs.lucene60.Lucene60Codec; -import org.apache.solr.common.SolrException; +import org.apache.lucene.codecs.lucene62.Lucene62Codec; import org.apache.solr.common.SolrException.ErrorCode; +import org.apache.solr.common.SolrException; import org.apache.solr.common.util.NamedList; import org.apache.solr.schema.SchemaField; import org.apache.solr.util.plugin.SolrCoreAware; @@ -91,7 +91,7 @@ public class SchemaCodecFactory extends CodecFactory implements SolrCoreAware { compressionMode = SOLR_DEFAULT_COMPRESSION_MODE; log.info("Using default compressionMode: " + compressionMode); } - codec = new Lucene60Codec(compressionMode) { + codec = new Lucene62Codec(compressionMode) { @Override public PostingsFormat getPostingsFormatForField(String field) { final SchemaField schemaField = core.getLatestSchema().getFieldOrNull(field); diff --git a/solr/core/src/java/org/apache/solr/index/SortingMergePolicy.java b/solr/core/src/java/org/apache/solr/index/SortingMergePolicy.java index 7b334b15c88..e9265a73192 100644 --- a/solr/core/src/java/org/apache/solr/index/SortingMergePolicy.java +++ b/solr/core/src/java/org/apache/solr/index/SortingMergePolicy.java @@ -17,29 +17,11 @@ package org.apache.solr.index; import java.io.IOException; -import java.util.ArrayList; -import java.util.Collections; -import java.util.List; -import java.util.Map; -import org.apache.lucene.index.IndexWriter; -import org.apache.lucene.index.LeafReader; import org.apache.lucene.index.MergePolicy; import org.apache.lucene.index.MergePolicyWrapper; -import org.apache.lucene.index.MergeState; -import org.apache.lucene.index.MergeTrigger; -import org.apache.lucene.index.MultiReader; -import org.apache.lucene.index.SegmentCommitInfo; -import org.apache.lucene.index.SegmentInfo; -import org.apache.lucene.index.SegmentInfos; -import org.apache.lucene.index.SegmentReader; + import org.apache.lucene.search.Sort; -import org.apache.lucene.store.Directory; -import org.apache.lucene.util.Bits; -import org.apache.lucene.util.InfoStream; -import org.apache.lucene.util.packed.PackedInts; -import org.apache.lucene.util.packed.PackedLongValues; -import org.apache.solr.index.SlowCompositeReaderWrapper; // TODO: remove this and add indexSort specification directly to solrconfig.xml? But for BWC, also accept SortingMergePolicy specifiction? diff --git a/solr/core/src/java/org/apache/solr/index/SortingMergePolicyFactory.java b/solr/core/src/java/org/apache/solr/index/SortingMergePolicyFactory.java index 53190b5f4f9..b22df3b3f97 100644 --- a/solr/core/src/java/org/apache/solr/index/SortingMergePolicyFactory.java +++ b/solr/core/src/java/org/apache/solr/index/SortingMergePolicyFactory.java @@ -17,7 +17,6 @@ package org.apache.solr.index; import org.apache.lucene.index.MergePolicy; -import org.apache.lucene.index.SortingMergePolicy; import org.apache.lucene.search.Sort; import org.apache.solr.core.SolrResourceLoader; diff --git a/solr/core/src/java/org/apache/solr/search/SolrIndexSearcher.java b/solr/core/src/java/org/apache/solr/search/SolrIndexSearcher.java index 54f7e303728..33d616ec808 100644 --- a/solr/core/src/java/org/apache/solr/search/SolrIndexSearcher.java +++ b/solr/core/src/java/org/apache/solr/search/SolrIndexSearcher.java @@ -237,7 +237,7 @@ public class SolrIndexSearcher extends IndexSearcher implements Closeable, SolrI !EarlyTerminatingSortingCollector.canEarlyTerminate(cmdSort, mergeSort)) { log.warn("unsupported combination: segmentTerminateEarly=true cmdSort={} cmdLen={} mergeSort={}", cmdSort, cmdLen, mergeSort); } else { - collector = earlyTerminatingSortingCollector = new EarlyTerminatingSortingCollector(collector, cmdSort, cmd.getLen(), mergeSort); + collector = earlyTerminatingSortingCollector = new EarlyTerminatingSortingCollector(collector, cmdSort, cmd.getLen()); } } diff --git a/solr/core/src/java/org/apache/solr/update/DefaultSolrCoreState.java b/solr/core/src/java/org/apache/solr/update/DefaultSolrCoreState.java index 8eab83f52e8..a29d57dcb5f 100644 --- a/solr/core/src/java/org/apache/solr/update/DefaultSolrCoreState.java +++ b/solr/core/src/java/org/apache/solr/update/DefaultSolrCoreState.java @@ -29,16 +29,16 @@ import java.util.concurrent.locks.ReentrantReadWriteLock; import org.apache.lucene.index.IndexWriter; import org.apache.lucene.index.MergePolicy; -import org.apache.lucene.index.SortingMergePolicy; import org.apache.lucene.search.Sort; import org.apache.solr.cloud.ActionThrottle; import org.apache.solr.cloud.RecoveryStrategy; -import org.apache.solr.common.SolrException; import org.apache.solr.common.SolrException.ErrorCode; +import org.apache.solr.common.SolrException; import org.apache.solr.core.CoreContainer; import org.apache.solr.core.CoreDescriptor; import org.apache.solr.core.DirectoryFactory; import org.apache.solr.core.SolrCore; +import org.apache.solr.index.SortingMergePolicy; import org.apache.solr.logging.MDCLoggingContext; import org.apache.solr.util.RefCounted; import org.slf4j.Logger; diff --git a/solr/core/src/java/org/apache/solr/update/SolrIndexConfig.java b/solr/core/src/java/org/apache/solr/update/SolrIndexConfig.java index bc2d6545210..696b3ed38be 100644 --- a/solr/core/src/java/org/apache/solr/update/SolrIndexConfig.java +++ b/solr/core/src/java/org/apache/solr/update/SolrIndexConfig.java @@ -16,8 +16,7 @@ */ package org.apache.solr.update; -import static org.apache.solr.core.Config.assertWarnOrFail; - +import java.io.IOException; import java.lang.invoke.MethodHandles; import java.util.List; import java.util.Map; @@ -31,6 +30,7 @@ import org.apache.lucene.index.LogMergePolicy; import org.apache.lucene.index.MergePolicy; import org.apache.lucene.index.MergeScheduler; import org.apache.lucene.index.TieredMergePolicy; +import org.apache.lucene.search.Sort; import org.apache.lucene.util.InfoStream; import org.apache.lucene.util.Version; import org.apache.solr.common.util.NamedList; @@ -44,11 +44,14 @@ import org.apache.solr.core.SolrResourceLoader; import org.apache.solr.index.DefaultMergePolicyFactory; import org.apache.solr.index.MergePolicyFactory; import org.apache.solr.index.MergePolicyFactoryArgs; +import org.apache.solr.index.SortingMergePolicy; import org.apache.solr.schema.IndexSchema; import org.apache.solr.util.SolrPluginUtils; import org.slf4j.Logger; import org.slf4j.LoggerFactory; +import static org.apache.solr.core.Config.assertWarnOrFail; + /** * This config object encapsulates IndexWriter config params, * defined in the <indexConfig> section of solrconfig.xml @@ -222,7 +225,7 @@ public class SolrIndexConfig implements MapSerializable { } } - public IndexWriterConfig toIndexWriterConfig(SolrCore core) { + public IndexWriterConfig toIndexWriterConfig(SolrCore core) throws IOException { IndexSchema schema = core.getLatestSchema(); IndexWriterConfig iwc = new IndexWriterConfig(new DelayedSchemaAnalyzer(core)); if (maxBufferedDocs != -1) @@ -232,10 +235,16 @@ public class SolrIndexConfig implements MapSerializable { iwc.setRAMBufferSizeMB(ramBufferSizeMB); iwc.setSimilarity(schema.getSimilarity()); - iwc.setMergePolicy(buildMergePolicy(schema)); + MergePolicy mergePolicy = buildMergePolicy(schema); + iwc.setMergePolicy(mergePolicy); iwc.setMergeScheduler(buildMergeScheduler(schema)); iwc.setInfoStream(infoStream); + if (mergePolicy instanceof SortingMergePolicy) { + Sort indexSort = ((SortingMergePolicy) mergePolicy).getSort(); + iwc.setIndexSort(indexSort); + } + // do this after buildMergePolicy since the backcompat logic // there may modify the effective useCompoundFile iwc.setUseCompoundFile(getUseCompoundFile()); diff --git a/solr/core/src/test/org/apache/solr/search/TestDocSet.java b/solr/core/src/test/org/apache/solr/search/TestDocSet.java index 9c46d5baa57..cdddd86e7ec 100644 --- a/solr/core/src/test/org/apache/solr/search/TestDocSet.java +++ b/solr/core/src/test/org/apache/solr/search/TestDocSet.java @@ -22,7 +22,6 @@ import java.util.List; import java.util.Random; import org.apache.lucene.index.BinaryDocValues; -import org.apache.lucene.index.PointValues; import org.apache.lucene.index.FieldInfo; import org.apache.lucene.index.FieldInfos; import org.apache.lucene.index.Fields; @@ -32,12 +31,14 @@ import org.apache.lucene.index.LeafReader; import org.apache.lucene.index.LeafReaderContext; import org.apache.lucene.index.MultiReader; import org.apache.lucene.index.NumericDocValues; +import org.apache.lucene.index.PointValues; import org.apache.lucene.index.SortedDocValues; import org.apache.lucene.index.SortedNumericDocValues; import org.apache.lucene.index.SortedSetDocValues; import org.apache.lucene.index.StoredFieldVisitor; import org.apache.lucene.search.DocIdSet; import org.apache.lucene.search.DocIdSetIterator; +import org.apache.lucene.search.Sort; import org.apache.lucene.util.BitSetIterator; import org.apache.lucene.util.Bits; import org.apache.lucene.util.FixedBitSet; @@ -467,6 +468,11 @@ public class TestDocSet extends LuceneTestCase { @Override public void checkIntegrity() throws IOException { } + + @Override + public Sort getIndexSort() { + return null; + } }; } diff --git a/solr/core/src/test/org/apache/solr/update/SolrIndexConfigTest.java b/solr/core/src/test/org/apache/solr/update/SolrIndexConfigTest.java index ffb495e9f4b..08a9037608a 100644 --- a/solr/core/src/test/org/apache/solr/update/SolrIndexConfigTest.java +++ b/solr/core/src/test/org/apache/solr/update/SolrIndexConfigTest.java @@ -23,7 +23,6 @@ import org.apache.lucene.index.ConcurrentMergeScheduler; import org.apache.lucene.index.IndexWriterConfig; import org.apache.lucene.index.MergePolicy; import org.apache.lucene.index.SimpleMergedSegmentWarmer; -import org.apache.lucene.index.SortingMergePolicy; import org.apache.lucene.index.TieredMergePolicy; import org.apache.lucene.search.Sort; import org.apache.lucene.search.SortField; @@ -31,6 +30,7 @@ import org.apache.solr.SolrTestCaseJ4; import org.apache.solr.core.DirectoryFactory; import org.apache.solr.core.SolrConfig; import org.apache.solr.core.TestMergePolicyConfig; +import org.apache.solr.index.SortingMergePolicy; import org.apache.solr.schema.IndexSchema; import org.apache.solr.schema.IndexSchemaFactory; import org.junit.BeforeClass;