LUCENE-6766: keep SortingMergePolicy for solr back-compat; fix Solr tests; fix precommit failures

This commit is contained in:
Mike McCandless 2016-05-13 04:29:48 -04:00
parent 2f6cdea9a9
commit a3270ac6e6
16 changed files with 74 additions and 57 deletions

View File

@ -0,0 +1,25 @@
<!doctype html public "-//w3c//dtd html 4.0 transitional//en">
<!--
Licensed to the Apache Software Foundation (ASF) under one or more
contributor license agreements. See the NOTICE file distributed with
this work for additional information regarding copyright ownership.
The ASF licenses this file to You under the Apache License, Version 2.0
(the "License"); you may not use this file except in compliance with
the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
-->
<html>
<head>
<meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1">
</head>
<body>
Lucene 6.0 file format.
</body>
</html>

View File

@ -385,6 +385,8 @@ public final class CheckIndex implements Closeable {
* Status from testing index sort * Status from testing index sort
*/ */
public static final class IndexSortStatus { public static final class IndexSortStatus {
IndexSortStatus() {
}
/** Exception thrown during term index test (null on success) */ /** Exception thrown during term index test (null on success) */
public Throwable error = null; public Throwable error = null;
@ -822,6 +824,10 @@ public final class CheckIndex implements Closeable {
return result; return result;
} }
/**
* Tests index sort order.
* @lucene.experimental
*/
public static Status.IndexSortStatus testSort(CodecReader reader, Sort sort, PrintStream infoStream, boolean failFast) throws IOException { public static Status.IndexSortStatus testSort(CodecReader reader, Sort sort, PrintStream infoStream, boolean failFast) throws IOException {
// This segment claims its documents are sorted according to the incoming sort ... let's make sure: // This segment claims its documents are sorted according to the incoming sort ... let's make sure:

View File

@ -42,10 +42,14 @@ public class DocIDMerger<T extends DocIDMerger.Sub> {
private T current; private T current;
private int nextIndex; private int nextIndex;
/** Represents one sub-reader being merged */
public static abstract class Sub { public static abstract class Sub {
/** Mapped doc ID */
public int mappedDocID; public int mappedDocID;
final MergeState.DocMap docMap; final MergeState.DocMap docMap;
/** Sole constructor */
public Sub(MergeState.DocMap docMap) { public Sub(MergeState.DocMap docMap) {
this.docMap = docMap; this.docMap = docMap;
} }
@ -54,10 +58,10 @@ public class DocIDMerger<T extends DocIDMerger.Sub> {
public abstract int nextDoc(); public abstract int nextDoc();
} }
/** Construct this from the provided subs, specifying the maximum sub count */
public DocIDMerger(List<T> subs, int maxCount, boolean indexIsSorted) { public DocIDMerger(List<T> subs, int maxCount, boolean indexIsSorted) {
this.subs = subs; this.subs = subs;
// nocommit safe?
if (indexIsSorted && maxCount > 1) { if (indexIsSorted && maxCount > 1) {
queue = new PriorityQueue<T>(maxCount) { queue = new PriorityQueue<T>(maxCount) {
@Override @Override
@ -74,6 +78,7 @@ public class DocIDMerger<T extends DocIDMerger.Sub> {
reset(); reset();
} }
/** Construct this from the provided subs */
public DocIDMerger(List<T> subs, boolean indexIsSorted) { public DocIDMerger(List<T> subs, boolean indexIsSorted) {
this(subs, subs.size(), indexIsSorted); this(subs, subs.size(), indexIsSorted);
} }

View File

@ -264,6 +264,10 @@ public class MergeState {
/** A map of doc IDs. */ /** A map of doc IDs. */
public static abstract class DocMap { public static abstract class DocMap {
/** Sole constructor */
public DocMap() {
}
/** Return the mapped docID or -1 if the given doc is not mapped. */ /** Return the mapped docID or -1 if the given doc is not mapped. */
public abstract int get(int docID); public abstract int get(int docID);
} }

View File

@ -172,6 +172,9 @@ public final class MultiPostingsEnum extends PostingsEnum {
* fits into the composite reader. */ * fits into the composite reader. */
public ReaderSlice slice; public ReaderSlice slice;
EnumWithSlice() {
}
@Override @Override
public String toString() { public String toString() {
return slice.toString()+":"+ postingsEnum; return slice.toString()+":"+ postingsEnum;

View File

@ -21,6 +21,7 @@ import java.io.IOException;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.List; import java.util.List;
import org.apache.lucene.index.MergeState.DocMap;
import org.apache.lucene.index.MergeState; import org.apache.lucene.index.MergeState;
import org.apache.lucene.search.LeafFieldComparator; import org.apache.lucene.search.LeafFieldComparator;
import org.apache.lucene.search.Sort; import org.apache.lucene.search.Sort;
@ -32,7 +33,7 @@ import org.apache.lucene.util.packed.PackedLongValues;
final class MultiSorter { final class MultiSorter {
/** Does a merge sort of the leaves of the incoming reader, returning {@link MergeState#DocMap} to map each leaf's /** Does a merge sort of the leaves of the incoming reader, returning {@link DocMap} to map each leaf's
* documents into the merged segment. The documents for each incoming leaf reader must already be sorted by the same sort! */ * documents into the merged segment. The documents for each incoming leaf reader must already be sorted by the same sort! */
static MergeState.DocMap[] sort(Sort sort, List<CodecReader> readers) throws IOException { static MergeState.DocMap[] sort(Sort sort, List<CodecReader> readers) throws IOException {

View File

@ -71,11 +71,6 @@ public final class SlowCompositeReaderWrapper extends LeafReader {
if (getFieldInfos().hasPointValues()) { if (getFieldInfos().hasPointValues()) {
throw new IllegalArgumentException("cannot wrap points"); throw new IllegalArgumentException("cannot wrap points");
} }
for(LeafReaderContext context : reader.leaves()) {
if (context.reader().getIndexSort() != null) {
throw new IllegalArgumentException("cannot use index sort");
}
}
fields = MultiFields.getFields(in); fields = MultiFields.getFields(in);
in.registerParentReader(this); in.registerParentReader(this);
this.merging = merging; this.merging = merging;

View File

@ -18,23 +18,5 @@
<html> <html>
<body> <body>
Misc index tools and index support. Misc index tools and index support.
SortingMergePolicy:
<p>Provides index sorting capablities. The application can use any
Sort specification, e.g. to sort by fields using DocValues or FieldCache, or to
reverse the order of the documents (by using SortField.Type.DOC in reverse).
Multi-level sorts can be specified the same way you would when searching, by
building Sort from multiple SortFields.
<p>{@link org.apache.lucene.index.SortingMergePolicy} can be used to
make Lucene sort segments before merging them. This will ensure that every
segment resulting from a merge will be sorted according to the provided
{@link org.apache.lucene.search.Sort}. This however makes merging and
thus indexing slower.
<p>Sorted segments allow for early query termination when the sort order
matches index order. This makes query execution faster since not all documents
need to be visited. Please note that this is an expert feature and should not
be used without a deep understanding of Lucene merging and document collection.
</body> </body>
</html> </html>

View File

@ -24,9 +24,9 @@ import org.apache.lucene.codecs.Codec;
import org.apache.lucene.codecs.DocValuesFormat; import org.apache.lucene.codecs.DocValuesFormat;
import org.apache.lucene.codecs.PostingsFormat; import org.apache.lucene.codecs.PostingsFormat;
import org.apache.lucene.codecs.lucene50.Lucene50StoredFieldsFormat.Mode; import org.apache.lucene.codecs.lucene50.Lucene50StoredFieldsFormat.Mode;
import org.apache.lucene.codecs.lucene60.Lucene60Codec; import org.apache.lucene.codecs.lucene62.Lucene62Codec;
import org.apache.solr.common.SolrException;
import org.apache.solr.common.SolrException.ErrorCode; import org.apache.solr.common.SolrException.ErrorCode;
import org.apache.solr.common.SolrException;
import org.apache.solr.common.util.NamedList; import org.apache.solr.common.util.NamedList;
import org.apache.solr.schema.SchemaField; import org.apache.solr.schema.SchemaField;
import org.apache.solr.util.plugin.SolrCoreAware; import org.apache.solr.util.plugin.SolrCoreAware;
@ -91,7 +91,7 @@ public class SchemaCodecFactory extends CodecFactory implements SolrCoreAware {
compressionMode = SOLR_DEFAULT_COMPRESSION_MODE; compressionMode = SOLR_DEFAULT_COMPRESSION_MODE;
log.info("Using default compressionMode: " + compressionMode); log.info("Using default compressionMode: " + compressionMode);
} }
codec = new Lucene60Codec(compressionMode) { codec = new Lucene62Codec(compressionMode) {
@Override @Override
public PostingsFormat getPostingsFormatForField(String field) { public PostingsFormat getPostingsFormatForField(String field) {
final SchemaField schemaField = core.getLatestSchema().getFieldOrNull(field); final SchemaField schemaField = core.getLatestSchema().getFieldOrNull(field);

View File

@ -17,29 +17,11 @@
package org.apache.solr.index; package org.apache.solr.index;
import java.io.IOException; import java.io.IOException;
import java.util.ArrayList;
import java.util.Collections;
import java.util.List;
import java.util.Map;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.LeafReader;
import org.apache.lucene.index.MergePolicy; import org.apache.lucene.index.MergePolicy;
import org.apache.lucene.index.MergePolicyWrapper; import org.apache.lucene.index.MergePolicyWrapper;
import org.apache.lucene.index.MergeState;
import org.apache.lucene.index.MergeTrigger;
import org.apache.lucene.index.MultiReader;
import org.apache.lucene.index.SegmentCommitInfo;
import org.apache.lucene.index.SegmentInfo;
import org.apache.lucene.index.SegmentInfos;
import org.apache.lucene.index.SegmentReader;
import org.apache.lucene.search.Sort; import org.apache.lucene.search.Sort;
import org.apache.lucene.store.Directory;
import org.apache.lucene.util.Bits;
import org.apache.lucene.util.InfoStream;
import org.apache.lucene.util.packed.PackedInts;
import org.apache.lucene.util.packed.PackedLongValues;
import org.apache.solr.index.SlowCompositeReaderWrapper;
// TODO: remove this and add indexSort specification directly to solrconfig.xml? But for BWC, also accept SortingMergePolicy specifiction? // TODO: remove this and add indexSort specification directly to solrconfig.xml? But for BWC, also accept SortingMergePolicy specifiction?

View File

@ -17,7 +17,6 @@
package org.apache.solr.index; package org.apache.solr.index;
import org.apache.lucene.index.MergePolicy; import org.apache.lucene.index.MergePolicy;
import org.apache.lucene.index.SortingMergePolicy;
import org.apache.lucene.search.Sort; import org.apache.lucene.search.Sort;
import org.apache.solr.core.SolrResourceLoader; import org.apache.solr.core.SolrResourceLoader;

View File

@ -237,7 +237,7 @@ public class SolrIndexSearcher extends IndexSearcher implements Closeable, SolrI
!EarlyTerminatingSortingCollector.canEarlyTerminate(cmdSort, mergeSort)) { !EarlyTerminatingSortingCollector.canEarlyTerminate(cmdSort, mergeSort)) {
log.warn("unsupported combination: segmentTerminateEarly=true cmdSort={} cmdLen={} mergeSort={}", cmdSort, cmdLen, mergeSort); log.warn("unsupported combination: segmentTerminateEarly=true cmdSort={} cmdLen={} mergeSort={}", cmdSort, cmdLen, mergeSort);
} else { } else {
collector = earlyTerminatingSortingCollector = new EarlyTerminatingSortingCollector(collector, cmdSort, cmd.getLen(), mergeSort); collector = earlyTerminatingSortingCollector = new EarlyTerminatingSortingCollector(collector, cmdSort, cmd.getLen());
} }
} }

View File

@ -29,16 +29,16 @@ import java.util.concurrent.locks.ReentrantReadWriteLock;
import org.apache.lucene.index.IndexWriter; import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.MergePolicy; import org.apache.lucene.index.MergePolicy;
import org.apache.lucene.index.SortingMergePolicy;
import org.apache.lucene.search.Sort; import org.apache.lucene.search.Sort;
import org.apache.solr.cloud.ActionThrottle; import org.apache.solr.cloud.ActionThrottle;
import org.apache.solr.cloud.RecoveryStrategy; import org.apache.solr.cloud.RecoveryStrategy;
import org.apache.solr.common.SolrException;
import org.apache.solr.common.SolrException.ErrorCode; import org.apache.solr.common.SolrException.ErrorCode;
import org.apache.solr.common.SolrException;
import org.apache.solr.core.CoreContainer; import org.apache.solr.core.CoreContainer;
import org.apache.solr.core.CoreDescriptor; import org.apache.solr.core.CoreDescriptor;
import org.apache.solr.core.DirectoryFactory; import org.apache.solr.core.DirectoryFactory;
import org.apache.solr.core.SolrCore; import org.apache.solr.core.SolrCore;
import org.apache.solr.index.SortingMergePolicy;
import org.apache.solr.logging.MDCLoggingContext; import org.apache.solr.logging.MDCLoggingContext;
import org.apache.solr.util.RefCounted; import org.apache.solr.util.RefCounted;
import org.slf4j.Logger; import org.slf4j.Logger;

View File

@ -16,8 +16,7 @@
*/ */
package org.apache.solr.update; package org.apache.solr.update;
import static org.apache.solr.core.Config.assertWarnOrFail; import java.io.IOException;
import java.lang.invoke.MethodHandles; import java.lang.invoke.MethodHandles;
import java.util.List; import java.util.List;
import java.util.Map; import java.util.Map;
@ -31,6 +30,7 @@ import org.apache.lucene.index.LogMergePolicy;
import org.apache.lucene.index.MergePolicy; import org.apache.lucene.index.MergePolicy;
import org.apache.lucene.index.MergeScheduler; import org.apache.lucene.index.MergeScheduler;
import org.apache.lucene.index.TieredMergePolicy; import org.apache.lucene.index.TieredMergePolicy;
import org.apache.lucene.search.Sort;
import org.apache.lucene.util.InfoStream; import org.apache.lucene.util.InfoStream;
import org.apache.lucene.util.Version; import org.apache.lucene.util.Version;
import org.apache.solr.common.util.NamedList; import org.apache.solr.common.util.NamedList;
@ -44,11 +44,14 @@ import org.apache.solr.core.SolrResourceLoader;
import org.apache.solr.index.DefaultMergePolicyFactory; import org.apache.solr.index.DefaultMergePolicyFactory;
import org.apache.solr.index.MergePolicyFactory; import org.apache.solr.index.MergePolicyFactory;
import org.apache.solr.index.MergePolicyFactoryArgs; import org.apache.solr.index.MergePolicyFactoryArgs;
import org.apache.solr.index.SortingMergePolicy;
import org.apache.solr.schema.IndexSchema; import org.apache.solr.schema.IndexSchema;
import org.apache.solr.util.SolrPluginUtils; import org.apache.solr.util.SolrPluginUtils;
import org.slf4j.Logger; import org.slf4j.Logger;
import org.slf4j.LoggerFactory; import org.slf4j.LoggerFactory;
import static org.apache.solr.core.Config.assertWarnOrFail;
/** /**
* This config object encapsulates IndexWriter config params, * This config object encapsulates IndexWriter config params,
* defined in the &lt;indexConfig&gt; section of solrconfig.xml * defined in the &lt;indexConfig&gt; section of solrconfig.xml
@ -222,7 +225,7 @@ public class SolrIndexConfig implements MapSerializable {
} }
} }
public IndexWriterConfig toIndexWriterConfig(SolrCore core) { public IndexWriterConfig toIndexWriterConfig(SolrCore core) throws IOException {
IndexSchema schema = core.getLatestSchema(); IndexSchema schema = core.getLatestSchema();
IndexWriterConfig iwc = new IndexWriterConfig(new DelayedSchemaAnalyzer(core)); IndexWriterConfig iwc = new IndexWriterConfig(new DelayedSchemaAnalyzer(core));
if (maxBufferedDocs != -1) if (maxBufferedDocs != -1)
@ -232,10 +235,16 @@ public class SolrIndexConfig implements MapSerializable {
iwc.setRAMBufferSizeMB(ramBufferSizeMB); iwc.setRAMBufferSizeMB(ramBufferSizeMB);
iwc.setSimilarity(schema.getSimilarity()); iwc.setSimilarity(schema.getSimilarity());
iwc.setMergePolicy(buildMergePolicy(schema)); MergePolicy mergePolicy = buildMergePolicy(schema);
iwc.setMergePolicy(mergePolicy);
iwc.setMergeScheduler(buildMergeScheduler(schema)); iwc.setMergeScheduler(buildMergeScheduler(schema));
iwc.setInfoStream(infoStream); iwc.setInfoStream(infoStream);
if (mergePolicy instanceof SortingMergePolicy) {
Sort indexSort = ((SortingMergePolicy) mergePolicy).getSort();
iwc.setIndexSort(indexSort);
}
// do this after buildMergePolicy since the backcompat logic // do this after buildMergePolicy since the backcompat logic
// there may modify the effective useCompoundFile // there may modify the effective useCompoundFile
iwc.setUseCompoundFile(getUseCompoundFile()); iwc.setUseCompoundFile(getUseCompoundFile());

View File

@ -22,7 +22,6 @@ import java.util.List;
import java.util.Random; import java.util.Random;
import org.apache.lucene.index.BinaryDocValues; import org.apache.lucene.index.BinaryDocValues;
import org.apache.lucene.index.PointValues;
import org.apache.lucene.index.FieldInfo; import org.apache.lucene.index.FieldInfo;
import org.apache.lucene.index.FieldInfos; import org.apache.lucene.index.FieldInfos;
import org.apache.lucene.index.Fields; import org.apache.lucene.index.Fields;
@ -32,12 +31,14 @@ import org.apache.lucene.index.LeafReader;
import org.apache.lucene.index.LeafReaderContext; import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.index.MultiReader; import org.apache.lucene.index.MultiReader;
import org.apache.lucene.index.NumericDocValues; import org.apache.lucene.index.NumericDocValues;
import org.apache.lucene.index.PointValues;
import org.apache.lucene.index.SortedDocValues; import org.apache.lucene.index.SortedDocValues;
import org.apache.lucene.index.SortedNumericDocValues; import org.apache.lucene.index.SortedNumericDocValues;
import org.apache.lucene.index.SortedSetDocValues; import org.apache.lucene.index.SortedSetDocValues;
import org.apache.lucene.index.StoredFieldVisitor; import org.apache.lucene.index.StoredFieldVisitor;
import org.apache.lucene.search.DocIdSet; import org.apache.lucene.search.DocIdSet;
import org.apache.lucene.search.DocIdSetIterator; import org.apache.lucene.search.DocIdSetIterator;
import org.apache.lucene.search.Sort;
import org.apache.lucene.util.BitSetIterator; import org.apache.lucene.util.BitSetIterator;
import org.apache.lucene.util.Bits; import org.apache.lucene.util.Bits;
import org.apache.lucene.util.FixedBitSet; import org.apache.lucene.util.FixedBitSet;
@ -467,6 +468,11 @@ public class TestDocSet extends LuceneTestCase {
@Override @Override
public void checkIntegrity() throws IOException { public void checkIntegrity() throws IOException {
} }
@Override
public Sort getIndexSort() {
return null;
}
}; };
} }

View File

@ -23,7 +23,6 @@ import org.apache.lucene.index.ConcurrentMergeScheduler;
import org.apache.lucene.index.IndexWriterConfig; import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.MergePolicy; import org.apache.lucene.index.MergePolicy;
import org.apache.lucene.index.SimpleMergedSegmentWarmer; import org.apache.lucene.index.SimpleMergedSegmentWarmer;
import org.apache.lucene.index.SortingMergePolicy;
import org.apache.lucene.index.TieredMergePolicy; import org.apache.lucene.index.TieredMergePolicy;
import org.apache.lucene.search.Sort; import org.apache.lucene.search.Sort;
import org.apache.lucene.search.SortField; import org.apache.lucene.search.SortField;
@ -31,6 +30,7 @@ import org.apache.solr.SolrTestCaseJ4;
import org.apache.solr.core.DirectoryFactory; import org.apache.solr.core.DirectoryFactory;
import org.apache.solr.core.SolrConfig; import org.apache.solr.core.SolrConfig;
import org.apache.solr.core.TestMergePolicyConfig; import org.apache.solr.core.TestMergePolicyConfig;
import org.apache.solr.index.SortingMergePolicy;
import org.apache.solr.schema.IndexSchema; import org.apache.solr.schema.IndexSchema;
import org.apache.solr.schema.IndexSchemaFactory; import org.apache.solr.schema.IndexSchemaFactory;
import org.junit.BeforeClass; import org.junit.BeforeClass;