mirror of https://github.com/apache/lucene.git
LUCENE-5493: hide Sorter, SortSorter, fix tests, change suggest to use public Sort API, cut over collector to take Sort
git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/branches/lucene5493@1574918 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
28a2d1dc60
commit
38eb9cc96c
|
@ -24,6 +24,7 @@ import org.apache.lucene.index.IndexWriter;
|
|||
import org.apache.lucene.search.CollectionTerminatedException;
|
||||
import org.apache.lucene.search.Collector;
|
||||
import org.apache.lucene.search.Scorer;
|
||||
import org.apache.lucene.search.Sort;
|
||||
import org.apache.lucene.search.TopDocsCollector;
|
||||
import org.apache.lucene.search.TotalHitCountCollector;
|
||||
|
||||
|
@ -47,7 +48,7 @@ import org.apache.lucene.search.TotalHitCountCollector;
|
|||
* <p>
|
||||
* <b>NOTE</b>: This {@link Collector} uses {@link Sorter#getID()} to detect
|
||||
* whether a segment was sorted with the same {@link Sorter} as the one given in
|
||||
* {@link #EarlyTerminatingSortingCollector(Collector, Sorter, int)}. This has
|
||||
* {@link #EarlyTerminatingSortingCollector(Collector, Sort, int)}. This has
|
||||
* two implications:
|
||||
* <ul>
|
||||
* <li>if {@link Sorter#getID()} is not implemented correctly and returns
|
||||
|
@ -61,10 +62,11 @@ import org.apache.lucene.search.TotalHitCountCollector;
|
|||
*
|
||||
* @lucene.experimental
|
||||
*/
|
||||
// nocommit: fix these javadocs to be about Sort
|
||||
public class EarlyTerminatingSortingCollector extends Collector {
|
||||
|
||||
protected final Collector in;
|
||||
protected final Sorter sorter;
|
||||
protected final Sort sort;
|
||||
protected final int numDocsToCollect;
|
||||
|
||||
protected int segmentTotalCollect;
|
||||
|
@ -77,20 +79,19 @@ public class EarlyTerminatingSortingCollector extends Collector {
|
|||
*
|
||||
* @param in
|
||||
* the collector to wrap
|
||||
* @param sorter
|
||||
* the same sorter as the one which is used by {@link IndexWriter}'s
|
||||
* {@link SortingMergePolicy}
|
||||
* @param sort
|
||||
* the sort you are sorting the search results on
|
||||
* @param numDocsToCollect
|
||||
* the number of documents to collect on each segment. When wrapping
|
||||
* a {@link TopDocsCollector}, this number should be the number of
|
||||
* hits.
|
||||
*/
|
||||
public EarlyTerminatingSortingCollector(Collector in, Sorter sorter, int numDocsToCollect) {
|
||||
public EarlyTerminatingSortingCollector(Collector in, Sort sort, int numDocsToCollect) {
|
||||
if (numDocsToCollect <= 0) {
|
||||
throw new IllegalStateException("numDocsToCollect must always be > 0, got " + segmentTotalCollect);
|
||||
}
|
||||
this.in = in;
|
||||
this.sorter = sorter;
|
||||
this.sort = sort;
|
||||
this.numDocsToCollect = numDocsToCollect;
|
||||
}
|
||||
|
||||
|
@ -110,7 +111,7 @@ public class EarlyTerminatingSortingCollector extends Collector {
|
|||
@Override
|
||||
public void setNextReader(AtomicReaderContext context) throws IOException {
|
||||
in.setNextReader(context);
|
||||
segmentSorted = SortingMergePolicy.isSorted(context.reader(), sorter);
|
||||
segmentSorted = SortingMergePolicy.isSorted(context.reader(), sort);
|
||||
segmentTotalCollect = segmentSorted ? numDocsToCollect : Integer.MAX_VALUE;
|
||||
numCollected = 0;
|
||||
}
|
||||
|
|
|
@ -26,7 +26,7 @@ import org.apache.lucene.search.Sort;
|
|||
import org.apache.lucene.search.SortField;
|
||||
|
||||
// nocommit: temporary class to engage the cutover!
|
||||
public class SortSorter extends Sorter {
|
||||
class SortSorter extends Sorter {
|
||||
final Sort sort;
|
||||
|
||||
public SortSorter(Sort sort) {
|
||||
|
|
|
@ -34,7 +34,7 @@ import org.apache.lucene.util.packed.MonotonicAppendingLongBuffer;
|
|||
* when documents are directly comparable by their field values.
|
||||
* @lucene.experimental
|
||||
*/
|
||||
public abstract class Sorter {
|
||||
abstract class Sorter {
|
||||
|
||||
/** A comparator that keeps documents in index order. */
|
||||
public static final DocComparator INDEX_ORDER_COMPARATOR = new DocComparator() {
|
||||
|
|
|
@ -35,6 +35,7 @@ import org.apache.lucene.index.StoredFieldVisitor;
|
|||
import org.apache.lucene.index.Terms;
|
||||
import org.apache.lucene.index.TermsEnum;
|
||||
import org.apache.lucene.search.DocIdSetIterator;
|
||||
import org.apache.lucene.search.Sort;
|
||||
import org.apache.lucene.store.IndexInput;
|
||||
import org.apache.lucene.store.IndexOutput;
|
||||
import org.apache.lucene.store.RAMFile;
|
||||
|
@ -710,12 +711,12 @@ public class SortingAtomicReader extends FilterAtomicReader {
|
|||
/** Return a sorted view of <code>reader</code> according to the order
|
||||
* defined by <code>sorter</code>. If the reader is already sorted, this
|
||||
* method might return the reader as-is. */
|
||||
public static AtomicReader wrap(AtomicReader reader, Sorter sorter) throws IOException {
|
||||
return wrap(reader, sorter.sort(reader));
|
||||
public static AtomicReader wrap(AtomicReader reader, Sort sort) throws IOException {
|
||||
return wrap(reader, new SortSorter(sort).sort(reader));
|
||||
}
|
||||
|
||||
/** Expert: same as {@link #wrap(AtomicReader, Sorter)} but operates directly on a {@link Sorter.DocMap}. */
|
||||
public static AtomicReader wrap(AtomicReader reader, Sorter.DocMap docMap) {
|
||||
/** Expert: same as {@link #wrap(AtomicReader, Sort)} but operates directly on a {@link Sorter.DocMap}. */
|
||||
static AtomicReader wrap(AtomicReader reader, Sorter.DocMap docMap) {
|
||||
if (docMap == null) {
|
||||
// the reader is already sorter
|
||||
return reader;
|
||||
|
|
|
@ -33,11 +33,12 @@ import org.apache.lucene.index.SegmentCommitInfo;
|
|||
import org.apache.lucene.index.SegmentInfos;
|
||||
import org.apache.lucene.index.SegmentReader;
|
||||
import org.apache.lucene.index.SlowCompositeReaderWrapper;
|
||||
import org.apache.lucene.search.Sort;
|
||||
import org.apache.lucene.store.Directory;
|
||||
import org.apache.lucene.util.Bits;
|
||||
import org.apache.lucene.util.packed.MonotonicAppendingLongBuffer;
|
||||
|
||||
/** A {@link MergePolicy} that reorders documents according to a {@link Sorter}
|
||||
/** A {@link MergePolicy} that reorders documents according to a {@link Sort}
|
||||
* before merging them. As a consequence, all segments resulting from a merge
|
||||
* will be sorted while segments resulting from a flush will be in the order
|
||||
* in which documents have been added.
|
||||
|
@ -45,11 +46,12 @@ import org.apache.lucene.util.packed.MonotonicAppendingLongBuffer;
|
|||
* {@link IndexWriter#addDocuments(Iterable, org.apache.lucene.analysis.Analyzer)}
|
||||
* to have sequentially-assigned doc IDs, this policy will scatter doc IDs.
|
||||
* <p><b>NOTE</b>: This {@link MergePolicy} should only be used with idempotent
|
||||
* {@link Sorter}s so that the order of segments is predictable. For example,
|
||||
* {@link Sort}s so that the order of segments is predictable. For example,
|
||||
* using {@link SortingMergePolicy} with {@link Sorter#REVERSE_DOCS} (which is
|
||||
* not idempotent) will make the order of documents in a segment depend on the
|
||||
* number of times the segment has been merged.
|
||||
* @lucene.experimental */
|
||||
// nocommit: fix these jdocs around idempotency
|
||||
public final class SortingMergePolicy extends MergePolicy {
|
||||
|
||||
/**
|
||||
|
@ -147,12 +149,12 @@ public final class SortingMergePolicy extends MergePolicy {
|
|||
|
||||
}
|
||||
|
||||
/** Returns true if the given reader is sorted by the given sorter. */
|
||||
public static boolean isSorted(AtomicReader reader, Sorter sorter) {
|
||||
/** Returns true if the given reader is sorted by the given sort. */
|
||||
public static boolean isSorted(AtomicReader reader, Sort sort) {
|
||||
if (reader instanceof SegmentReader) {
|
||||
final SegmentReader segReader = (SegmentReader) reader;
|
||||
final Map<String, String> diagnostics = segReader.getSegmentInfo().info.getDiagnostics();
|
||||
if (diagnostics != null && sorter.getID().equals(diagnostics.get(SORTER_ID_PROP))) {
|
||||
if (diagnostics != null && sort.toString().equals(diagnostics.get(SORTER_ID_PROP))) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
@ -172,11 +174,13 @@ public final class SortingMergePolicy extends MergePolicy {
|
|||
|
||||
final MergePolicy in;
|
||||
final Sorter sorter;
|
||||
final Sort sort;
|
||||
|
||||
/** Create a new {@link MergePolicy} that sorts documents with <code>sorter</code>. */
|
||||
public SortingMergePolicy(MergePolicy in, Sorter sorter) {
|
||||
/** Create a new {@link MergePolicy} that sorts documents with <code>sort</code>. */
|
||||
public SortingMergePolicy(MergePolicy in, Sort sort) {
|
||||
this.in = in;
|
||||
this.sorter = sorter;
|
||||
this.sorter = new SortSorter(sort);
|
||||
this.sort = sort;
|
||||
}
|
||||
|
||||
@Override
|
||||
|
@ -200,7 +204,7 @@ public final class SortingMergePolicy extends MergePolicy {
|
|||
|
||||
@Override
|
||||
public MergePolicy clone() {
|
||||
return new SortingMergePolicy(in.clone(), sorter);
|
||||
return new SortingMergePolicy(in.clone(), sort);
|
||||
}
|
||||
|
||||
@Override
|
||||
|
|
|
@ -33,9 +33,9 @@ import org.junit.BeforeClass;
|
|||
|
||||
public class IndexSortingTest extends SorterTestBase {
|
||||
|
||||
private static final Sorter[] SORTERS = new Sorter[] {
|
||||
new SortSorter(new Sort(new SortField(NUMERIC_DV_FIELD, SortField.Type.LONG))),
|
||||
Sorter.REVERSE_DOCS,
|
||||
private static final Sort[] SORT = new Sort[] {
|
||||
new Sort(new SortField(NUMERIC_DV_FIELD, SortField.Type.LONG)),
|
||||
new Sort(new SortField(null, SortField.Type.DOC, true))
|
||||
};
|
||||
|
||||
@BeforeClass
|
||||
|
@ -49,13 +49,14 @@ public class IndexSortingTest extends SorterTestBase {
|
|||
values.add(Integer.valueOf(reader.document(i).get(ID_FIELD)));
|
||||
}
|
||||
}
|
||||
Sorter sorter = SORTERS[random().nextInt(SORTERS.length)];
|
||||
if (sorter == Sorter.REVERSE_DOCS) {
|
||||
int idx = random().nextInt(SORT.length);
|
||||
Sort sorter = SORT[idx];
|
||||
if (idx == 1) { // reverse doc sort
|
||||
Collections.reverse(values);
|
||||
} else {
|
||||
Collections.sort(values);
|
||||
if (sorter instanceof SortSorter && random().nextBoolean()) {
|
||||
sorter = new SortSorter(new Sort(new SortField(NUMERIC_DV_FIELD, SortField.Type.LONG, true))); // descending
|
||||
if (random().nextBoolean()) {
|
||||
sorter = new Sort(new SortField(NUMERIC_DV_FIELD, SortField.Type.LONG, true)); // descending
|
||||
Collections.reverse(values);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -21,6 +21,7 @@ import java.io.IOException;
|
|||
import java.util.Arrays;
|
||||
|
||||
import org.apache.lucene.index.AtomicReader;
|
||||
import org.apache.lucene.search.SortField;
|
||||
import org.apache.lucene.util.Bits;
|
||||
import org.apache.lucene.util.TestUtil;
|
||||
import org.apache.lucene.util.TestUtil;
|
||||
|
@ -57,16 +58,8 @@ public class SortingAtomicReaderTest extends SorterTestBase {
|
|||
System.out.println("sortedValues: " + Arrays.toString(sortedValues));
|
||||
}
|
||||
|
||||
reader = SortingAtomicReader.wrap(reader, new Sorter() {
|
||||
@Override
|
||||
public Sorter.DocMap sort(AtomicReader reader) throws IOException {
|
||||
return docMap;
|
||||
}
|
||||
@Override
|
||||
public String getID() {
|
||||
return ID_FIELD;
|
||||
}
|
||||
});
|
||||
// TODO: what is this doing? like a no-op sort?
|
||||
reader = SortingAtomicReader.wrap(reader, docMap);
|
||||
|
||||
if (VERBOSE) {
|
||||
System.out.print("mapped-deleted-docs: ");
|
||||
|
|
|
@ -51,14 +51,14 @@ public class TestEarlyTermination extends LuceneTestCase {
|
|||
private int numDocs;
|
||||
private List<String> terms;
|
||||
private Directory dir;
|
||||
private Sorter sorter;
|
||||
private Sort sort;
|
||||
private RandomIndexWriter iw;
|
||||
private IndexReader reader;
|
||||
|
||||
@Override
|
||||
public void setUp() throws Exception {
|
||||
super.setUp();
|
||||
sorter = new SortSorter(new Sort(new SortField("ndv1", SortField.Type.LONG)));
|
||||
sort = new Sort(new SortField("ndv1", SortField.Type.LONG));
|
||||
}
|
||||
|
||||
private Document randomDocument() {
|
||||
|
@ -80,7 +80,7 @@ public class TestEarlyTermination extends LuceneTestCase {
|
|||
terms = new ArrayList<String>(randomTerms);
|
||||
final long seed = random().nextLong();
|
||||
final IndexWriterConfig iwc = newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(new Random(seed)));
|
||||
iwc.setMergePolicy(TestSortingMergePolicy.newSortingMergePolicy(sorter));
|
||||
iwc.setMergePolicy(TestSortingMergePolicy.newSortingMergePolicy(sort));
|
||||
iw = new RandomIndexWriter(new Random(seed), dir, iwc);
|
||||
for (int i = 0; i < numDocs; ++i) {
|
||||
final Document doc = randomDocument();
|
||||
|
@ -120,7 +120,7 @@ public class TestEarlyTermination extends LuceneTestCase {
|
|||
for (int i = 0; i < iters; ++i) {
|
||||
final TermQuery query = new TermQuery(new Term("s", RandomPicks.randomFrom(random(), terms)));
|
||||
searcher.search(query, collector1);
|
||||
searcher.search(query, new EarlyTerminatingSortingCollector(collector2, sorter, numHits));
|
||||
searcher.search(query, new EarlyTerminatingSortingCollector(collector2, sort, numHits));
|
||||
}
|
||||
assertTrue(collector1.getTotalHits() >= collector2.getTotalHits());
|
||||
assertTopDocsEquals(collector1.topDocs().scoreDocs, collector2.topDocs().scoreDocs);
|
||||
|
@ -144,7 +144,7 @@ public class TestEarlyTermination extends LuceneTestCase {
|
|||
for (int i = 0; i < iters; ++i) {
|
||||
final TermQuery query = new TermQuery(new Term("s", RandomPicks.randomFrom(random(), terms)));
|
||||
searcher.search(query, collector1);
|
||||
Sorter different = new SortSorter(new Sort(new SortField("ndv2", SortField.Type.LONG)));
|
||||
Sort different = new Sort(new SortField("ndv2", SortField.Type.LONG));
|
||||
searcher.search(query, new EarlyTerminatingSortingCollector(collector2, different, numHits) {
|
||||
@Override
|
||||
public void setNextReader(AtomicReaderContext context) throws IOException {
|
||||
|
|
|
@ -52,14 +52,14 @@ public class TestSortingMergePolicy extends LuceneTestCase {
|
|||
|
||||
private List<String> terms;
|
||||
private Directory dir1, dir2;
|
||||
private Sorter sorter;
|
||||
private Sort sort;
|
||||
private IndexReader reader;
|
||||
private IndexReader sortedReader;
|
||||
|
||||
@Override
|
||||
public void setUp() throws Exception {
|
||||
super.setUp();
|
||||
sorter = new SortSorter(new Sort(new SortField("ndv", SortField.Type.LONG)));
|
||||
sort = new Sort(new SortField("ndv", SortField.Type.LONG));
|
||||
createRandomIndexes();
|
||||
}
|
||||
|
||||
|
@ -70,7 +70,7 @@ public class TestSortingMergePolicy extends LuceneTestCase {
|
|||
return doc;
|
||||
}
|
||||
|
||||
static MergePolicy newSortingMergePolicy(Sorter sorter) {
|
||||
static MergePolicy newSortingMergePolicy(Sort sort) {
|
||||
// create a MP with a low merge factor so that many merges happen
|
||||
MergePolicy mp;
|
||||
if (random().nextBoolean()) {
|
||||
|
@ -85,7 +85,7 @@ public class TestSortingMergePolicy extends LuceneTestCase {
|
|||
mp = lmp;
|
||||
}
|
||||
// wrap it with a sorting mp
|
||||
return new SortingMergePolicy(mp, sorter);
|
||||
return new SortingMergePolicy(mp, sort);
|
||||
}
|
||||
|
||||
private void createRandomIndexes() throws IOException {
|
||||
|
@ -101,7 +101,7 @@ public class TestSortingMergePolicy extends LuceneTestCase {
|
|||
final long seed = random().nextLong();
|
||||
final IndexWriterConfig iwc1 = newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(new Random(seed)));
|
||||
final IndexWriterConfig iwc2 = newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(new Random(seed)));
|
||||
iwc2.setMergePolicy(newSortingMergePolicy(sorter));
|
||||
iwc2.setMergePolicy(newSortingMergePolicy(sort));
|
||||
final RandomIndexWriter iw1 = new RandomIndexWriter(new Random(seed), dir1, iwc1);
|
||||
final RandomIndexWriter iw2 = new RandomIndexWriter(new Random(seed), dir2, iwc2);
|
||||
for (int i = 0; i < numDocs; ++i) {
|
||||
|
@ -164,7 +164,7 @@ public class TestSortingMergePolicy extends LuceneTestCase {
|
|||
}
|
||||
|
||||
public void testSortingMP() throws IOException {
|
||||
final AtomicReader sortedReader1 = SortingAtomicReader.wrap(SlowCompositeReaderWrapper.wrap(reader), sorter);
|
||||
final AtomicReader sortedReader1 = SortingAtomicReader.wrap(SlowCompositeReaderWrapper.wrap(reader), sort);
|
||||
final AtomicReader sortedReader2 = SlowCompositeReaderWrapper.wrap(sortedReader);
|
||||
|
||||
assertSorted(sortedReader1);
|
||||
|
|
|
@ -54,10 +54,8 @@ import org.apache.lucene.index.SegmentReader;
|
|||
import org.apache.lucene.index.SlowCompositeReaderWrapper;
|
||||
import org.apache.lucene.index.Term;
|
||||
import org.apache.lucene.index.sorter.EarlyTerminatingSortingCollector;
|
||||
import org.apache.lucene.index.sorter.Sorter;
|
||||
import org.apache.lucene.index.sorter.SortingAtomicReader;
|
||||
import org.apache.lucene.index.sorter.SortingMergePolicy;
|
||||
import org.apache.lucene.index.sorter.SortSorter;
|
||||
import org.apache.lucene.search.BooleanClause;
|
||||
import org.apache.lucene.search.BooleanQuery;
|
||||
import org.apache.lucene.search.Collector;
|
||||
|
@ -131,7 +129,7 @@ public class AnalyzingInfixSuggester extends Lookup implements Closeable {
|
|||
* PrefixQuery is used (4). */
|
||||
public static final int DEFAULT_MIN_PREFIX_CHARS = 4;
|
||||
|
||||
private Sorter sorter;
|
||||
private Sort sorter;
|
||||
|
||||
/** Create a new instance, loading from a previously built
|
||||
* directory, if it exists. */
|
||||
|
@ -173,7 +171,7 @@ public class AnalyzingInfixSuggester extends Lookup implements Closeable {
|
|||
/** Override this to customize index settings, e.g. which
|
||||
* codec to use. Sorter is null if this config is for
|
||||
* the first pass writer. */
|
||||
protected IndexWriterConfig getIndexWriterConfig(Version matchVersion, Analyzer indexAnalyzer, Sorter sorter, IndexWriterConfig.OpenMode openMode) {
|
||||
protected IndexWriterConfig getIndexWriterConfig(Version matchVersion, Analyzer indexAnalyzer, Sort sorter, IndexWriterConfig.OpenMode openMode) {
|
||||
IndexWriterConfig iwc = new IndexWriterConfig(matchVersion, indexAnalyzer);
|
||||
iwc.setCodec(new Lucene46Codec());
|
||||
iwc.setOpenMode(openMode);
|
||||
|
@ -360,7 +358,7 @@ public class AnalyzingInfixSuggester extends Lookup implements Closeable {
|
|||
}
|
||||
|
||||
private void initSorter() {
|
||||
sorter = new SortSorter(new Sort(new SortField("weight", SortField.Type.LONG, true)));
|
||||
sorter = new Sort(new SortField("weight", SortField.Type.LONG, true));
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
Loading…
Reference in New Issue