LUCENE-5493: hide Sorter, SortSorter, fix tests, change suggest to use public Sort API, cut over collector to take Sort

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/branches/lucene5493@1574918 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Robert Muir 2014-03-06 15:25:40 +00:00
parent 28a2d1dc60
commit 38eb9cc96c
10 changed files with 54 additions and 56 deletions

View File

@ -24,6 +24,7 @@ import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.search.CollectionTerminatedException;
import org.apache.lucene.search.Collector;
import org.apache.lucene.search.Scorer;
import org.apache.lucene.search.Sort;
import org.apache.lucene.search.TopDocsCollector;
import org.apache.lucene.search.TotalHitCountCollector;
@ -47,7 +48,7 @@ import org.apache.lucene.search.TotalHitCountCollector;
* <p>
* <b>NOTE</b>: This {@link Collector} uses {@link Sorter#getID()} to detect
* whether a segment was sorted with the same {@link Sorter} as the one given in
* {@link #EarlyTerminatingSortingCollector(Collector, Sorter, int)}. This has
* {@link #EarlyTerminatingSortingCollector(Collector, Sort, int)}. This has
* two implications:
* <ul>
* <li>if {@link Sorter#getID()} is not implemented correctly and returns
@ -61,10 +62,11 @@ import org.apache.lucene.search.TotalHitCountCollector;
*
* @lucene.experimental
*/
// nocommit: fix these javadocs to be about Sort
public class EarlyTerminatingSortingCollector extends Collector {
protected final Collector in;
protected final Sorter sorter;
protected final Sort sort;
protected final int numDocsToCollect;
protected int segmentTotalCollect;
@ -77,20 +79,19 @@ public class EarlyTerminatingSortingCollector extends Collector {
*
* @param in
* the collector to wrap
* @param sorter
* the same sorter as the one which is used by {@link IndexWriter}'s
* {@link SortingMergePolicy}
* @param sort
* the sort you are sorting the search results on
* @param numDocsToCollect
* the number of documents to collect on each segment. When wrapping
* a {@link TopDocsCollector}, this number should be the number of
* hits.
*/
public EarlyTerminatingSortingCollector(Collector in, Sorter sorter, int numDocsToCollect) {
public EarlyTerminatingSortingCollector(Collector in, Sort sort, int numDocsToCollect) {
if (numDocsToCollect <= 0) {
throw new IllegalStateException("numDocsToCollect must always be > 0, got " + segmentTotalCollect);
}
this.in = in;
this.sorter = sorter;
this.sort = sort;
this.numDocsToCollect = numDocsToCollect;
}
@ -110,7 +111,7 @@ public class EarlyTerminatingSortingCollector extends Collector {
@Override
public void setNextReader(AtomicReaderContext context) throws IOException {
in.setNextReader(context);
segmentSorted = SortingMergePolicy.isSorted(context.reader(), sorter);
segmentSorted = SortingMergePolicy.isSorted(context.reader(), sort);
segmentTotalCollect = segmentSorted ? numDocsToCollect : Integer.MAX_VALUE;
numCollected = 0;
}

View File

@ -26,7 +26,7 @@ import org.apache.lucene.search.Sort;
import org.apache.lucene.search.SortField;
// nocommit: temporary class to engage the cutover!
public class SortSorter extends Sorter {
class SortSorter extends Sorter {
final Sort sort;
public SortSorter(Sort sort) {

View File

@ -34,7 +34,7 @@ import org.apache.lucene.util.packed.MonotonicAppendingLongBuffer;
* when documents are directly comparable by their field values.
* @lucene.experimental
*/
public abstract class Sorter {
abstract class Sorter {
/** A comparator that keeps documents in index order. */
public static final DocComparator INDEX_ORDER_COMPARATOR = new DocComparator() {

View File

@ -35,6 +35,7 @@ import org.apache.lucene.index.StoredFieldVisitor;
import org.apache.lucene.index.Terms;
import org.apache.lucene.index.TermsEnum;
import org.apache.lucene.search.DocIdSetIterator;
import org.apache.lucene.search.Sort;
import org.apache.lucene.store.IndexInput;
import org.apache.lucene.store.IndexOutput;
import org.apache.lucene.store.RAMFile;
@ -710,12 +711,12 @@ public class SortingAtomicReader extends FilterAtomicReader {
/** Return a sorted view of <code>reader</code> according to the order
* defined by <code>sorter</code>. If the reader is already sorted, this
* method might return the reader as-is. */
public static AtomicReader wrap(AtomicReader reader, Sorter sorter) throws IOException {
return wrap(reader, sorter.sort(reader));
public static AtomicReader wrap(AtomicReader reader, Sort sort) throws IOException {
return wrap(reader, new SortSorter(sort).sort(reader));
}
/** Expert: same as {@link #wrap(AtomicReader, Sorter)} but operates directly on a {@link Sorter.DocMap}. */
public static AtomicReader wrap(AtomicReader reader, Sorter.DocMap docMap) {
/** Expert: same as {@link #wrap(AtomicReader, Sort)} but operates directly on a {@link Sorter.DocMap}. */
static AtomicReader wrap(AtomicReader reader, Sorter.DocMap docMap) {
if (docMap == null) {
// the reader is already sorter
return reader;

View File

@ -33,11 +33,12 @@ import org.apache.lucene.index.SegmentCommitInfo;
import org.apache.lucene.index.SegmentInfos;
import org.apache.lucene.index.SegmentReader;
import org.apache.lucene.index.SlowCompositeReaderWrapper;
import org.apache.lucene.search.Sort;
import org.apache.lucene.store.Directory;
import org.apache.lucene.util.Bits;
import org.apache.lucene.util.packed.MonotonicAppendingLongBuffer;
/** A {@link MergePolicy} that reorders documents according to a {@link Sorter}
/** A {@link MergePolicy} that reorders documents according to a {@link Sort}
* before merging them. As a consequence, all segments resulting from a merge
* will be sorted while segments resulting from a flush will be in the order
* in which documents have been added.
@ -45,11 +46,12 @@ import org.apache.lucene.util.packed.MonotonicAppendingLongBuffer;
* {@link IndexWriter#addDocuments(Iterable, org.apache.lucene.analysis.Analyzer)}
* to have sequentially-assigned doc IDs, this policy will scatter doc IDs.
* <p><b>NOTE</b>: This {@link MergePolicy} should only be used with idempotent
* {@link Sorter}s so that the order of segments is predictable. For example,
* {@link Sort}s so that the order of segments is predictable. For example,
* using {@link SortingMergePolicy} with {@link Sorter#REVERSE_DOCS} (which is
* not idempotent) will make the order of documents in a segment depend on the
* number of times the segment has been merged.
* @lucene.experimental */
// nocommit: fix these jdocs around idempotency
public final class SortingMergePolicy extends MergePolicy {
/**
@ -147,12 +149,12 @@ public final class SortingMergePolicy extends MergePolicy {
}
/** Returns true if the given reader is sorted by the given sorter. */
public static boolean isSorted(AtomicReader reader, Sorter sorter) {
/** Returns true if the given reader is sorted by the given sort. */
public static boolean isSorted(AtomicReader reader, Sort sort) {
if (reader instanceof SegmentReader) {
final SegmentReader segReader = (SegmentReader) reader;
final Map<String, String> diagnostics = segReader.getSegmentInfo().info.getDiagnostics();
if (diagnostics != null && sorter.getID().equals(diagnostics.get(SORTER_ID_PROP))) {
if (diagnostics != null && sort.toString().equals(diagnostics.get(SORTER_ID_PROP))) {
return true;
}
}
@ -172,11 +174,13 @@ public final class SortingMergePolicy extends MergePolicy {
final MergePolicy in;
final Sorter sorter;
final Sort sort;
/** Create a new {@link MergePolicy} that sorts documents with <code>sorter</code>. */
public SortingMergePolicy(MergePolicy in, Sorter sorter) {
/** Create a new {@link MergePolicy} that sorts documents with <code>sort</code>. */
public SortingMergePolicy(MergePolicy in, Sort sort) {
this.in = in;
this.sorter = sorter;
this.sorter = new SortSorter(sort);
this.sort = sort;
}
@Override
@ -200,7 +204,7 @@ public final class SortingMergePolicy extends MergePolicy {
@Override
public MergePolicy clone() {
return new SortingMergePolicy(in.clone(), sorter);
return new SortingMergePolicy(in.clone(), sort);
}
@Override

View File

@ -33,9 +33,9 @@ import org.junit.BeforeClass;
public class IndexSortingTest extends SorterTestBase {
private static final Sorter[] SORTERS = new Sorter[] {
new SortSorter(new Sort(new SortField(NUMERIC_DV_FIELD, SortField.Type.LONG))),
Sorter.REVERSE_DOCS,
private static final Sort[] SORT = new Sort[] {
new Sort(new SortField(NUMERIC_DV_FIELD, SortField.Type.LONG)),
new Sort(new SortField(null, SortField.Type.DOC, true))
};
@BeforeClass
@ -49,13 +49,14 @@ public class IndexSortingTest extends SorterTestBase {
values.add(Integer.valueOf(reader.document(i).get(ID_FIELD)));
}
}
Sorter sorter = SORTERS[random().nextInt(SORTERS.length)];
if (sorter == Sorter.REVERSE_DOCS) {
int idx = random().nextInt(SORT.length);
Sort sorter = SORT[idx];
if (idx == 1) { // reverse doc sort
Collections.reverse(values);
} else {
Collections.sort(values);
if (sorter instanceof SortSorter && random().nextBoolean()) {
sorter = new SortSorter(new Sort(new SortField(NUMERIC_DV_FIELD, SortField.Type.LONG, true))); // descending
if (random().nextBoolean()) {
sorter = new Sort(new SortField(NUMERIC_DV_FIELD, SortField.Type.LONG, true)); // descending
Collections.reverse(values);
}
}

View File

@ -21,6 +21,7 @@ import java.io.IOException;
import java.util.Arrays;
import org.apache.lucene.index.AtomicReader;
import org.apache.lucene.search.SortField;
import org.apache.lucene.util.Bits;
import org.apache.lucene.util.TestUtil;
import org.apache.lucene.util.TestUtil;
@ -57,16 +58,8 @@ public class SortingAtomicReaderTest extends SorterTestBase {
System.out.println("sortedValues: " + Arrays.toString(sortedValues));
}
reader = SortingAtomicReader.wrap(reader, new Sorter() {
@Override
public Sorter.DocMap sort(AtomicReader reader) throws IOException {
return docMap;
}
@Override
public String getID() {
return ID_FIELD;
}
});
// TODO: what is this doing? like a no-op sort?
reader = SortingAtomicReader.wrap(reader, docMap);
if (VERBOSE) {
System.out.print("mapped-deleted-docs: ");

View File

@ -51,14 +51,14 @@ public class TestEarlyTermination extends LuceneTestCase {
private int numDocs;
private List<String> terms;
private Directory dir;
private Sorter sorter;
private Sort sort;
private RandomIndexWriter iw;
private IndexReader reader;
@Override
public void setUp() throws Exception {
super.setUp();
sorter = new SortSorter(new Sort(new SortField("ndv1", SortField.Type.LONG)));
sort = new Sort(new SortField("ndv1", SortField.Type.LONG));
}
private Document randomDocument() {
@ -80,7 +80,7 @@ public class TestEarlyTermination extends LuceneTestCase {
terms = new ArrayList<String>(randomTerms);
final long seed = random().nextLong();
final IndexWriterConfig iwc = newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(new Random(seed)));
iwc.setMergePolicy(TestSortingMergePolicy.newSortingMergePolicy(sorter));
iwc.setMergePolicy(TestSortingMergePolicy.newSortingMergePolicy(sort));
iw = new RandomIndexWriter(new Random(seed), dir, iwc);
for (int i = 0; i < numDocs; ++i) {
final Document doc = randomDocument();
@ -120,7 +120,7 @@ public class TestEarlyTermination extends LuceneTestCase {
for (int i = 0; i < iters; ++i) {
final TermQuery query = new TermQuery(new Term("s", RandomPicks.randomFrom(random(), terms)));
searcher.search(query, collector1);
searcher.search(query, new EarlyTerminatingSortingCollector(collector2, sorter, numHits));
searcher.search(query, new EarlyTerminatingSortingCollector(collector2, sort, numHits));
}
assertTrue(collector1.getTotalHits() >= collector2.getTotalHits());
assertTopDocsEquals(collector1.topDocs().scoreDocs, collector2.topDocs().scoreDocs);
@ -144,7 +144,7 @@ public class TestEarlyTermination extends LuceneTestCase {
for (int i = 0; i < iters; ++i) {
final TermQuery query = new TermQuery(new Term("s", RandomPicks.randomFrom(random(), terms)));
searcher.search(query, collector1);
Sorter different = new SortSorter(new Sort(new SortField("ndv2", SortField.Type.LONG)));
Sort different = new Sort(new SortField("ndv2", SortField.Type.LONG));
searcher.search(query, new EarlyTerminatingSortingCollector(collector2, different, numHits) {
@Override
public void setNextReader(AtomicReaderContext context) throws IOException {

View File

@ -52,14 +52,14 @@ public class TestSortingMergePolicy extends LuceneTestCase {
private List<String> terms;
private Directory dir1, dir2;
private Sorter sorter;
private Sort sort;
private IndexReader reader;
private IndexReader sortedReader;
@Override
public void setUp() throws Exception {
super.setUp();
sorter = new SortSorter(new Sort(new SortField("ndv", SortField.Type.LONG)));
sort = new Sort(new SortField("ndv", SortField.Type.LONG));
createRandomIndexes();
}
@ -70,7 +70,7 @@ public class TestSortingMergePolicy extends LuceneTestCase {
return doc;
}
static MergePolicy newSortingMergePolicy(Sorter sorter) {
static MergePolicy newSortingMergePolicy(Sort sort) {
// create a MP with a low merge factor so that many merges happen
MergePolicy mp;
if (random().nextBoolean()) {
@ -85,7 +85,7 @@ public class TestSortingMergePolicy extends LuceneTestCase {
mp = lmp;
}
// wrap it with a sorting mp
return new SortingMergePolicy(mp, sorter);
return new SortingMergePolicy(mp, sort);
}
private void createRandomIndexes() throws IOException {
@ -101,7 +101,7 @@ public class TestSortingMergePolicy extends LuceneTestCase {
final long seed = random().nextLong();
final IndexWriterConfig iwc1 = newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(new Random(seed)));
final IndexWriterConfig iwc2 = newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(new Random(seed)));
iwc2.setMergePolicy(newSortingMergePolicy(sorter));
iwc2.setMergePolicy(newSortingMergePolicy(sort));
final RandomIndexWriter iw1 = new RandomIndexWriter(new Random(seed), dir1, iwc1);
final RandomIndexWriter iw2 = new RandomIndexWriter(new Random(seed), dir2, iwc2);
for (int i = 0; i < numDocs; ++i) {
@ -164,7 +164,7 @@ public class TestSortingMergePolicy extends LuceneTestCase {
}
public void testSortingMP() throws IOException {
final AtomicReader sortedReader1 = SortingAtomicReader.wrap(SlowCompositeReaderWrapper.wrap(reader), sorter);
final AtomicReader sortedReader1 = SortingAtomicReader.wrap(SlowCompositeReaderWrapper.wrap(reader), sort);
final AtomicReader sortedReader2 = SlowCompositeReaderWrapper.wrap(sortedReader);
assertSorted(sortedReader1);

View File

@ -54,10 +54,8 @@ import org.apache.lucene.index.SegmentReader;
import org.apache.lucene.index.SlowCompositeReaderWrapper;
import org.apache.lucene.index.Term;
import org.apache.lucene.index.sorter.EarlyTerminatingSortingCollector;
import org.apache.lucene.index.sorter.Sorter;
import org.apache.lucene.index.sorter.SortingAtomicReader;
import org.apache.lucene.index.sorter.SortingMergePolicy;
import org.apache.lucene.index.sorter.SortSorter;
import org.apache.lucene.search.BooleanClause;
import org.apache.lucene.search.BooleanQuery;
import org.apache.lucene.search.Collector;
@ -131,7 +129,7 @@ public class AnalyzingInfixSuggester extends Lookup implements Closeable {
* PrefixQuery is used (4). */
public static final int DEFAULT_MIN_PREFIX_CHARS = 4;
private Sorter sorter;
private Sort sorter;
/** Create a new instance, loading from a previously built
* directory, if it exists. */
@ -173,7 +171,7 @@ public class AnalyzingInfixSuggester extends Lookup implements Closeable {
/** Override this to customize index settings, e.g. which
* codec to use. Sorter is null if this config is for
* the first pass writer. */
protected IndexWriterConfig getIndexWriterConfig(Version matchVersion, Analyzer indexAnalyzer, Sorter sorter, IndexWriterConfig.OpenMode openMode) {
protected IndexWriterConfig getIndexWriterConfig(Version matchVersion, Analyzer indexAnalyzer, Sort sorter, IndexWriterConfig.OpenMode openMode) {
IndexWriterConfig iwc = new IndexWriterConfig(matchVersion, indexAnalyzer);
iwc.setCodec(new Lucene46Codec());
iwc.setOpenMode(openMode);
@ -360,7 +358,7 @@ public class AnalyzingInfixSuggester extends Lookup implements Closeable {
}
private void initSorter() {
sorter = new SortSorter(new Sort(new SortField("weight", SortField.Type.LONG, true)));
sorter = new Sort(new SortField("weight", SortField.Type.LONG, true));
}
/**