mirror of https://github.com/apache/lucene.git
LUCENE-4830: Sorter API: Make the doc ID mapping an abstract class.
git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1456787 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
67083534ff
commit
44ca68f294
|
@ -18,8 +18,6 @@ package org.apache.lucene.index.sorter;
|
||||||
*/
|
*/
|
||||||
|
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.util.AbstractList;
|
|
||||||
import java.util.List;
|
|
||||||
|
|
||||||
import org.apache.lucene.index.AtomicReader;
|
import org.apache.lucene.index.AtomicReader;
|
||||||
import org.apache.lucene.index.NumericDocValues;
|
import org.apache.lucene.index.NumericDocValues;
|
||||||
|
@ -27,7 +25,7 @@ import org.apache.lucene.index.NumericDocValues;
|
||||||
/**
|
/**
|
||||||
* A {@link Sorter} which sorts documents according to their
|
* A {@link Sorter} which sorts documents according to their
|
||||||
* {@link NumericDocValues}.
|
* {@link NumericDocValues}.
|
||||||
*
|
*
|
||||||
* @lucene.experimental
|
* @lucene.experimental
|
||||||
*/
|
*/
|
||||||
public class NumericDocValuesSorter extends Sorter {
|
public class NumericDocValuesSorter extends Sorter {
|
||||||
|
@ -39,27 +37,19 @@ public class NumericDocValuesSorter extends Sorter {
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public int[] oldToNew(final AtomicReader reader) throws IOException {
|
public Sorter.DocMap sort(final AtomicReader reader) throws IOException {
|
||||||
final NumericDocValues ndv = reader.getNumericDocValues(fieldName);
|
final NumericDocValues ndv = reader.getNumericDocValues(fieldName);
|
||||||
final int maxDoc = reader.maxDoc();
|
final DocComparator comparator = new DocComparator() {
|
||||||
final int[] docs = new int[maxDoc];
|
|
||||||
final List<Long> values = new AbstractList<Long>() {
|
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public Long get(int doc) {
|
public int compare(int docID1, int docID2) {
|
||||||
return ndv.get(doc);
|
final long v1 = ndv.get(docID1);
|
||||||
}
|
final long v2 = ndv.get(docID2);
|
||||||
|
return v1 < v2 ? -1 : v1 == v2 ? 0 : 1;
|
||||||
@Override
|
|
||||||
public int size() {
|
|
||||||
return reader.maxDoc();
|
|
||||||
}
|
}
|
||||||
|
|
||||||
};
|
};
|
||||||
for (int i = 0; i < maxDoc; i++) {
|
return sort(reader.maxDoc(), comparator);
|
||||||
docs[i] = i;
|
|
||||||
}
|
|
||||||
return compute(docs, values);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -18,58 +18,102 @@ package org.apache.lucene.index.sorter;
|
||||||
*/
|
*/
|
||||||
|
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.util.List;
|
import java.util.Comparator;
|
||||||
|
|
||||||
import org.apache.lucene.index.AtomicReader;
|
import org.apache.lucene.index.AtomicReader;
|
||||||
|
import org.apache.lucene.index.IndexReader;
|
||||||
import org.apache.lucene.util.SorterTemplate;
|
import org.apache.lucene.util.SorterTemplate;
|
||||||
|
import org.apache.lucene.util.packed.MonotonicAppendingLongBuffer;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Sorts documents in a given index by returning a permutation on the docs.
|
* Sorts documents in a given index by returning a permutation on the docs.
|
||||||
* Implementations can call {@link #compute(int[], List)} to compute the
|
* Implementations can call {@link #sort(int, DocComparator)} to compute the
|
||||||
* old-to-new permutation over the given documents and values.
|
* old-to-new permutation over the given documents and values.
|
||||||
*
|
*
|
||||||
* @lucene.experimental
|
* @lucene.experimental
|
||||||
*/
|
*/
|
||||||
public abstract class Sorter {
|
public abstract class Sorter {
|
||||||
|
|
||||||
|
/**
|
||||||
|
* A permutation of doc IDs. For every document ID between <tt>0</tt> and
|
||||||
|
* {@link IndexReader#maxDoc()}, <code>oldToNew(newToOld(docID))</code> must
|
||||||
|
* return <code>docID</code>.
|
||||||
|
*/
|
||||||
|
public static abstract class DocMap {
|
||||||
|
|
||||||
|
/** Given a doc ID from the original index, return its ordinal in the
|
||||||
|
* sorted index. */
|
||||||
|
public abstract int oldToNew(int docID);
|
||||||
|
|
||||||
|
/** Given the ordinal of a doc ID, return its doc ID in the original index. */
|
||||||
|
public abstract int newToOld(int docID);
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
/** Check consistency of a {@link DocMap}, useful for assertions. */
|
||||||
|
static boolean isConsistent(DocMap docMap, int maxDoc) {
|
||||||
|
for (int i = 0; i < maxDoc; ++i) {
|
||||||
|
final int newID = docMap.oldToNew(i);
|
||||||
|
final int oldID = docMap.newToOld(newID);
|
||||||
|
assert newID >= 0 && newID < maxDoc : "doc IDs must be in [0-" + maxDoc + "[, got " + newID;
|
||||||
|
assert i == oldID : "mapping is inconsistent: " + i + " --oldToNew--> " + newID + " --newToOld--> " + oldID;
|
||||||
|
if (i != oldID || newID < 0 || newID >= maxDoc) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
/** A comparator of doc IDs. */
|
||||||
|
public static abstract class DocComparator {
|
||||||
|
|
||||||
|
/** Compare docID1 against docID2. The contract for the return value is the
|
||||||
|
* same as {@link Comparator#compare(Object, Object)}. */
|
||||||
|
public abstract int compare(int docID1, int docID2);
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
/** Sorts documents in reverse order. */
|
/** Sorts documents in reverse order. */
|
||||||
public static final Sorter REVERSE_DOCS = new Sorter() {
|
public static final Sorter REVERSE_DOCS = new Sorter() {
|
||||||
@Override
|
@Override
|
||||||
public int[] oldToNew(final AtomicReader reader) throws IOException {
|
public DocMap sort(final AtomicReader reader) throws IOException {
|
||||||
final int maxDoc = reader.maxDoc();
|
final int maxDoc = reader.maxDoc();
|
||||||
int[] reverseDocs = new int[maxDoc];
|
return new DocMap() {
|
||||||
for (int i = 0; i < maxDoc; i++) {
|
public int oldToNew(int docID) {
|
||||||
reverseDocs[i] = maxDoc - (i + 1);
|
return maxDoc - docID - 1;
|
||||||
}
|
}
|
||||||
return reverseDocs;
|
public int newToOld(int docID) {
|
||||||
|
return maxDoc - docID - 1;
|
||||||
|
}
|
||||||
|
};
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
private static final class DocValueSorterTemplate<T extends Comparable<? super T>> extends SorterTemplate {
|
private static final class DocValueSorterTemplate extends SorterTemplate {
|
||||||
|
|
||||||
private final int[] docs;
|
private final int[] docs;
|
||||||
private final List<T> values;
|
private final Sorter.DocComparator comparator;
|
||||||
|
|
||||||
private T pivot;
|
private int pivot;
|
||||||
|
|
||||||
public DocValueSorterTemplate(int[] docs, List<T> values) {
|
public DocValueSorterTemplate(int[] docs, Sorter.DocComparator comparator) {
|
||||||
this.docs = docs;
|
this.docs = docs;
|
||||||
this.values = values;
|
this.comparator = comparator;
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
protected int compare(int i, int j) {
|
protected int compare(int i, int j) {
|
||||||
return values.get(docs[i]).compareTo(values.get(docs[j]));
|
return comparator.compare(docs[i], docs[j]);
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
protected int comparePivot(int j) {
|
protected int comparePivot(int j) {
|
||||||
return pivot.compareTo(values.get(docs[j]));
|
return comparator.compare(pivot, docs[j]);
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
protected void setPivot(int i) {
|
protected void setPivot(int i) {
|
||||||
pivot = values.get(docs[i]);
|
pivot = docs[i];
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
@ -80,27 +124,73 @@ public abstract class Sorter {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/** Computes the old-to-new permutation over the given documents and values. */
|
/** Computes the old-to-new permutation over the given comparator. */
|
||||||
protected static <T extends Comparable<? super T>> int[] compute(int[] docs, List<T> values) {
|
protected static Sorter.DocMap sort(final int maxDoc, DocComparator comparator) {
|
||||||
SorterTemplate sorter = new DocValueSorterTemplate<T>(docs, values);
|
// check if the index is sorted
|
||||||
sorter.quickSort(0, docs.length - 1);
|
boolean sorted = true;
|
||||||
|
for (int i = 1; i < maxDoc; ++i) {
|
||||||
final int[] oldToNew = new int[docs.length];
|
if (comparator.compare(i-1, i) > 0) {
|
||||||
for (int i = 0; i < docs.length; i++) {
|
sorted = false;
|
||||||
oldToNew[docs[i]] = i;
|
break;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
return oldToNew;
|
if (sorted) {
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
|
// sort doc IDs
|
||||||
|
final int[] docs = new int[maxDoc];
|
||||||
|
for (int i = 0; i < maxDoc; i++) {
|
||||||
|
docs[i] = i;
|
||||||
|
}
|
||||||
|
|
||||||
|
SorterTemplate sorter = new DocValueSorterTemplate(docs, comparator);
|
||||||
|
// TODO: use a stable sort instead?
|
||||||
|
sorter.quickSort(0, docs.length - 1); // docs is now the newToOld mapping
|
||||||
|
|
||||||
|
// The reason why we use MonotonicAppendingLongBuffer here is that it
|
||||||
|
// wastes very little memory if the index is in random order but can save
|
||||||
|
// a lot of memory if the index is already "almost" sorted
|
||||||
|
final MonotonicAppendingLongBuffer newToOld = new MonotonicAppendingLongBuffer();
|
||||||
|
for (int i = 0; i < maxDoc; ++i) {
|
||||||
|
newToOld.add(docs[i]);
|
||||||
|
}
|
||||||
|
|
||||||
|
for (int i = 0; i < maxDoc; ++i) {
|
||||||
|
docs[(int) newToOld.get(i)] = i;
|
||||||
|
} // docs is now the oldToNew mapping
|
||||||
|
|
||||||
|
final MonotonicAppendingLongBuffer oldToNew = new MonotonicAppendingLongBuffer();
|
||||||
|
for (int i = 0; i < maxDoc; ++i) {
|
||||||
|
oldToNew.add(docs[i]);
|
||||||
|
}
|
||||||
|
|
||||||
|
return new Sorter.DocMap() {
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public int oldToNew(int docID) {
|
||||||
|
return (int) oldToNew.get(docID);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public int newToOld(int docID) {
|
||||||
|
return (int) newToOld.get(docID);
|
||||||
|
}
|
||||||
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Returns a mapping from the old document ID to its new location in the
|
* Returns a mapping from the old document ID to its new location in the
|
||||||
* sorted index. Implementations can use the auxiliary
|
* sorted index. Implementations can use the auxiliary
|
||||||
* {@link #compute(int[], List)} to compute the old-to-new permutation
|
* {@link #sort(int, DocComparator)} to compute the old-to-new permutation
|
||||||
* given an array of documents and their corresponding values.
|
* given a list of documents and their corresponding values.
|
||||||
|
* <p>
|
||||||
|
* A return value of <tt>null</tt> is allowed and means that
|
||||||
|
* <code>reader</code> is already sorted.
|
||||||
* <p>
|
* <p>
|
||||||
* <b>NOTE:</b> deleted documents are expected to appear in the mapping as
|
* <b>NOTE:</b> deleted documents are expected to appear in the mapping as
|
||||||
* well, they will however be dropped when the index is actually sorted.
|
* well, they will however be dropped when the index is actually sorted.
|
||||||
*/
|
*/
|
||||||
public abstract int[] oldToNew(AtomicReader reader) throws IOException;
|
public abstract DocMap sort(AtomicReader reader) throws IOException;
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -43,7 +43,6 @@ import org.apache.lucene.store.RAMOutputStream;
|
||||||
import org.apache.lucene.util.ArrayUtil;
|
import org.apache.lucene.util.ArrayUtil;
|
||||||
import org.apache.lucene.util.Bits;
|
import org.apache.lucene.util.Bits;
|
||||||
import org.apache.lucene.util.BytesRef;
|
import org.apache.lucene.util.BytesRef;
|
||||||
import org.apache.lucene.util.FixedBitSet;
|
|
||||||
import org.apache.lucene.util.SorterTemplate;
|
import org.apache.lucene.util.SorterTemplate;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -66,13 +65,13 @@ public class SortingAtomicReader extends FilterAtomicReader {
|
||||||
|
|
||||||
private static class SortingFields extends FilterFields {
|
private static class SortingFields extends FilterFields {
|
||||||
|
|
||||||
private final int[] old2new;
|
private final Sorter.DocMap docMap;
|
||||||
private final Bits inLiveDocs;
|
private final Bits inLiveDocs;
|
||||||
private final FieldInfos infos;
|
private final FieldInfos infos;
|
||||||
|
|
||||||
public SortingFields(final Fields in, final Bits inLiveDocs, FieldInfos infos, final int[] old2new) {
|
public SortingFields(final Fields in, final Bits inLiveDocs, FieldInfos infos, Sorter.DocMap docMap) {
|
||||||
super(in);
|
super(in);
|
||||||
this.old2new = old2new;
|
this.docMap = docMap;
|
||||||
this.inLiveDocs = inLiveDocs;
|
this.inLiveDocs = inLiveDocs;
|
||||||
this.infos = infos;
|
this.infos = infos;
|
||||||
}
|
}
|
||||||
|
@ -83,7 +82,7 @@ public class SortingAtomicReader extends FilterAtomicReader {
|
||||||
if (terms == null) {
|
if (terms == null) {
|
||||||
return null;
|
return null;
|
||||||
} else {
|
} else {
|
||||||
return new SortingTerms(terms, inLiveDocs, infos.fieldInfo(field).getIndexOptions(), old2new);
|
return new SortingTerms(terms, inLiveDocs, infos.fieldInfo(field).getIndexOptions(), docMap);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -91,33 +90,33 @@ public class SortingAtomicReader extends FilterAtomicReader {
|
||||||
|
|
||||||
private static class SortingTerms extends FilterTerms {
|
private static class SortingTerms extends FilterTerms {
|
||||||
|
|
||||||
private final int[] old2new;
|
private final Sorter.DocMap docMap;
|
||||||
private final Bits inLiveDocs;
|
private final Bits inLiveDocs;
|
||||||
private final IndexOptions indexOptions;
|
private final IndexOptions indexOptions;
|
||||||
|
|
||||||
public SortingTerms(final Terms in, final Bits inLiveDocs, IndexOptions indexOptions, final int[] old2new) {
|
public SortingTerms(final Terms in, final Bits inLiveDocs, IndexOptions indexOptions, final Sorter.DocMap docMap) {
|
||||||
super(in);
|
super(in);
|
||||||
this.old2new = old2new;
|
this.docMap = docMap;
|
||||||
this.inLiveDocs = inLiveDocs;
|
this.inLiveDocs = inLiveDocs;
|
||||||
this.indexOptions = indexOptions;
|
this.indexOptions = indexOptions;
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public TermsEnum iterator(final TermsEnum reuse) throws IOException {
|
public TermsEnum iterator(final TermsEnum reuse) throws IOException {
|
||||||
return new SortingTermsEnum(in.iterator(reuse), inLiveDocs, old2new, indexOptions);
|
return new SortingTermsEnum(in.iterator(reuse), inLiveDocs, docMap, indexOptions);
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
private static class SortingTermsEnum extends FilterTermsEnum {
|
private static class SortingTermsEnum extends FilterTermsEnum {
|
||||||
|
|
||||||
private final int[] old2new;
|
private final Sorter.DocMap docMap;
|
||||||
private final Bits inLiveDocs;
|
private final Bits inLiveDocs;
|
||||||
private final IndexOptions indexOptions;
|
private final IndexOptions indexOptions;
|
||||||
|
|
||||||
public SortingTermsEnum(final TermsEnum in, final Bits inLiveDocs, final int[] old2new, IndexOptions indexOptions) {
|
public SortingTermsEnum(final TermsEnum in, final Bits inLiveDocs, Sorter.DocMap docMap, IndexOptions indexOptions) {
|
||||||
super(in);
|
super(in);
|
||||||
this.old2new = old2new;
|
this.docMap = docMap;
|
||||||
this.inLiveDocs = inLiveDocs;
|
this.inLiveDocs = inLiveDocs;
|
||||||
this.indexOptions = indexOptions;
|
this.indexOptions = indexOptions;
|
||||||
}
|
}
|
||||||
|
@ -134,7 +133,7 @@ public class SortingAtomicReader extends FilterAtomicReader {
|
||||||
reuse = ((SortingDocsEnum) reuse).getWrapped();
|
reuse = ((SortingDocsEnum) reuse).getWrapped();
|
||||||
}
|
}
|
||||||
boolean withFreqs = indexOptions.compareTo(IndexOptions.DOCS_AND_FREQS) >=0 && (flags & DocsEnum.FLAG_FREQS) != 0;
|
boolean withFreqs = indexOptions.compareTo(IndexOptions.DOCS_AND_FREQS) >=0 && (flags & DocsEnum.FLAG_FREQS) != 0;
|
||||||
return new SortingDocsEnum(in.docs(liveDocs, reuse, flags), withFreqs, old2new);
|
return new SortingDocsEnum(in.docs(liveDocs, reuse, flags), withFreqs, docMap);
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
@ -158,7 +157,7 @@ public class SortingAtomicReader extends FilterAtomicReader {
|
||||||
// ask for everything. if that assumption changes in the future, we can
|
// ask for everything. if that assumption changes in the future, we can
|
||||||
// factor in whether 'flags' says offsets are not required.
|
// factor in whether 'flags' says offsets are not required.
|
||||||
boolean storeOffsets = indexOptions.compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS) >= 0;
|
boolean storeOffsets = indexOptions.compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS) >= 0;
|
||||||
return new SortingDocsAndPositionsEnum(positions, old2new, storeOffsets);
|
return new SortingDocsAndPositionsEnum(positions, docMap, storeOffsets);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -167,48 +166,48 @@ public class SortingAtomicReader extends FilterAtomicReader {
|
||||||
private static class SortingBinaryDocValues extends BinaryDocValues {
|
private static class SortingBinaryDocValues extends BinaryDocValues {
|
||||||
|
|
||||||
private final BinaryDocValues in;
|
private final BinaryDocValues in;
|
||||||
private final int[] new2old;
|
private final Sorter.DocMap docMap;
|
||||||
|
|
||||||
SortingBinaryDocValues(BinaryDocValues in, int[] new2old) {
|
SortingBinaryDocValues(BinaryDocValues in, Sorter.DocMap docMap) {
|
||||||
this.in = in;
|
this.in = in;
|
||||||
this.new2old = new2old;
|
this.docMap = docMap;
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public void get(int docID, BytesRef result) {
|
public void get(int docID, BytesRef result) {
|
||||||
in.get(new2old[docID], result);
|
in.get(docMap.newToOld(docID), result);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
private static class SortingNumericDocValues extends NumericDocValues {
|
private static class SortingNumericDocValues extends NumericDocValues {
|
||||||
|
|
||||||
private final NumericDocValues in;
|
private final NumericDocValues in;
|
||||||
private final int[] new2old;
|
private final Sorter.DocMap docMap;
|
||||||
|
|
||||||
public SortingNumericDocValues(final NumericDocValues in, final int[] new2old) {
|
public SortingNumericDocValues(final NumericDocValues in, Sorter.DocMap docMap) {
|
||||||
this.in = in;
|
this.in = in;
|
||||||
this.new2old = new2old;
|
this.docMap = docMap;
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public long get(int docID) {
|
public long get(int docID) {
|
||||||
return in.get(new2old[docID]);
|
return in.get(docMap.newToOld(docID));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
private static class SortingSortedDocValues extends SortedDocValues {
|
private static class SortingSortedDocValues extends SortedDocValues {
|
||||||
|
|
||||||
private final SortedDocValues in;
|
private final SortedDocValues in;
|
||||||
private final int[] new2old;
|
private final Sorter.DocMap docMap;
|
||||||
|
|
||||||
SortingSortedDocValues(SortedDocValues in, int[] new2old) {
|
SortingSortedDocValues(SortedDocValues in, Sorter.DocMap docMap) {
|
||||||
this.in = in;
|
this.in = in;
|
||||||
this.new2old = new2old;
|
this.docMap = docMap;
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public int getOrd(int docID) {
|
public int getOrd(int docID) {
|
||||||
return in.getOrd(new2old[docID]);
|
return in.getOrd(docMap.newToOld(docID));
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
@ -223,7 +222,7 @@ public class SortingAtomicReader extends FilterAtomicReader {
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public void get(int docID, BytesRef result) {
|
public void get(int docID, BytesRef result) {
|
||||||
in.get(new2old[docID], result);
|
in.get(docMap.newToOld(docID), result);
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
@ -235,11 +234,11 @@ public class SortingAtomicReader extends FilterAtomicReader {
|
||||||
private static class SortingSortedSetDocValues extends SortedSetDocValues {
|
private static class SortingSortedSetDocValues extends SortedSetDocValues {
|
||||||
|
|
||||||
private final SortedSetDocValues in;
|
private final SortedSetDocValues in;
|
||||||
private final int[] new2old;
|
private final Sorter.DocMap docMap;
|
||||||
|
|
||||||
SortingSortedSetDocValues(SortedSetDocValues in, int[] new2old) {
|
SortingSortedSetDocValues(SortedSetDocValues in, Sorter.DocMap docMap) {
|
||||||
this.in = in;
|
this.in = in;
|
||||||
this.new2old = new2old;
|
this.docMap = docMap;
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
@ -249,7 +248,7 @@ public class SortingAtomicReader extends FilterAtomicReader {
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public void setDocument(int docID) {
|
public void setDocument(int docID) {
|
||||||
in.setDocument(new2old[docID]);
|
in.setDocument(docMap.newToOld(docID));
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
@ -315,7 +314,7 @@ public class SortingAtomicReader extends FilterAtomicReader {
|
||||||
private final int upto;
|
private final int upto;
|
||||||
private final boolean withFreqs;
|
private final boolean withFreqs;
|
||||||
|
|
||||||
public SortingDocsEnum(final DocsEnum in, boolean withFreqs, final int[] old2new) throws IOException {
|
public SortingDocsEnum(final DocsEnum in, boolean withFreqs, final Sorter.DocMap docMap) throws IOException {
|
||||||
super(in);
|
super(in);
|
||||||
this.withFreqs = withFreqs;
|
this.withFreqs = withFreqs;
|
||||||
int i = 0;
|
int i = 0;
|
||||||
|
@ -327,7 +326,7 @@ public class SortingAtomicReader extends FilterAtomicReader {
|
||||||
docs = ArrayUtil.grow(docs, docs.length + 1);
|
docs = ArrayUtil.grow(docs, docs.length + 1);
|
||||||
freqs = ArrayUtil.grow(freqs, freqs.length + 1);
|
freqs = ArrayUtil.grow(freqs, freqs.length + 1);
|
||||||
}
|
}
|
||||||
docs[i] = old2new[doc];
|
docs[i] = docMap.oldToNew(doc);
|
||||||
freqs[i] = in.freq();
|
freqs[i] = in.freq();
|
||||||
++i;
|
++i;
|
||||||
}
|
}
|
||||||
|
@ -339,7 +338,7 @@ public class SortingAtomicReader extends FilterAtomicReader {
|
||||||
if (i >= docs.length) {
|
if (i >= docs.length) {
|
||||||
docs = ArrayUtil.grow(docs, docs.length + 1);
|
docs = ArrayUtil.grow(docs, docs.length + 1);
|
||||||
}
|
}
|
||||||
docs[i++] = old2new[doc];
|
docs[i++] = docMap.oldToNew(doc);
|
||||||
}
|
}
|
||||||
Arrays.sort(docs, 0, i);
|
Arrays.sort(docs, 0, i);
|
||||||
}
|
}
|
||||||
|
@ -436,7 +435,7 @@ public class SortingAtomicReader extends FilterAtomicReader {
|
||||||
private final BytesRef payload = new BytesRef(32);
|
private final BytesRef payload = new BytesRef(32);
|
||||||
private int currFreq;
|
private int currFreq;
|
||||||
|
|
||||||
public SortingDocsAndPositionsEnum(final DocsAndPositionsEnum in, final int[] old2new, boolean storeOffsets) throws IOException {
|
public SortingDocsAndPositionsEnum(final DocsAndPositionsEnum in, Sorter.DocMap docMap, boolean storeOffsets) throws IOException {
|
||||||
super(in);
|
super(in);
|
||||||
this.storeOffsets = storeOffsets;
|
this.storeOffsets = storeOffsets;
|
||||||
final RAMFile file = new RAMFile();
|
final RAMFile file = new RAMFile();
|
||||||
|
@ -454,7 +453,7 @@ public class SortingAtomicReader extends FilterAtomicReader {
|
||||||
System.arraycopy(offsets, 0, tmp, 0, offsets.length);
|
System.arraycopy(offsets, 0, tmp, 0, offsets.length);
|
||||||
offsets = tmp;
|
offsets = tmp;
|
||||||
}
|
}
|
||||||
docs[i] = old2new[doc];
|
docs[i] = docMap.oldToNew(doc);
|
||||||
offsets[i] = out.getFilePointer();
|
offsets[i] = out.getFilePointer();
|
||||||
addPositions(in, out);
|
addPositions(in, out);
|
||||||
i++;
|
i++;
|
||||||
|
@ -551,38 +550,29 @@ public class SortingAtomicReader extends FilterAtomicReader {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
private final int[] old2new, new2old;
|
/** Return a sorted view of <code>reader</code> according to the order
|
||||||
private final FixedBitSet mappedLiveDocs;
|
* defined by <code>sorter</code>. If the reader is already sorted, this
|
||||||
|
* method might return the reader as-is. */
|
||||||
|
public static AtomicReader sort(AtomicReader reader, Sorter sorter) throws IOException {
|
||||||
|
final Sorter.DocMap docMap = sorter.sort(reader);
|
||||||
|
if (docMap == null) {
|
||||||
|
// the reader is already sorter
|
||||||
|
return reader;
|
||||||
|
}
|
||||||
|
assert Sorter.isConsistent(docMap, reader.maxDoc());
|
||||||
|
return new SortingAtomicReader(reader, docMap);
|
||||||
|
}
|
||||||
|
|
||||||
public SortingAtomicReader(final AtomicReader in, final Sorter sorter) throws IOException {
|
private final Sorter.DocMap docMap;
|
||||||
|
|
||||||
|
private SortingAtomicReader(final AtomicReader in, final Sorter.DocMap docMap) {
|
||||||
super(in);
|
super(in);
|
||||||
old2new = sorter.oldToNew(in);
|
this.docMap = docMap;
|
||||||
if (old2new.length != in.maxDoc()) {
|
|
||||||
throw new IllegalArgumentException("sorter should provide mapping for every document in the index, including deleted ones");
|
|
||||||
}
|
|
||||||
new2old = new int[old2new.length];
|
|
||||||
for (int i = 0; i < new2old.length; i++) {
|
|
||||||
new2old[old2new[i]] = i;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (!in.hasDeletions()) {
|
|
||||||
mappedLiveDocs = null;
|
|
||||||
} else {
|
|
||||||
mappedLiveDocs = new FixedBitSet(in.maxDoc());
|
|
||||||
mappedLiveDocs.set(0, in.maxDoc());
|
|
||||||
Bits liveDocs = in.getLiveDocs();
|
|
||||||
int len = liveDocs.length();
|
|
||||||
for (int i = 0; i < len; i++) {
|
|
||||||
if (!liveDocs.get(i)) {
|
|
||||||
mappedLiveDocs.clear(old2new[i]);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public void document(final int docID, final StoredFieldVisitor visitor) throws IOException {
|
public void document(final int docID, final StoredFieldVisitor visitor) throws IOException {
|
||||||
in.document(new2old[docID], visitor);
|
in.document(docMap.newToOld(docID), visitor);
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
@ -591,7 +581,7 @@ public class SortingAtomicReader extends FilterAtomicReader {
|
||||||
if (fields == null) {
|
if (fields == null) {
|
||||||
return null;
|
return null;
|
||||||
} else {
|
} else {
|
||||||
return new SortingFields(fields, in.getLiveDocs(), in.getFieldInfos(), old2new);
|
return new SortingFields(fields, in.getLiveDocs(), in.getFieldInfos(), docMap);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -601,14 +591,29 @@ public class SortingAtomicReader extends FilterAtomicReader {
|
||||||
if (oldDocValues == null) {
|
if (oldDocValues == null) {
|
||||||
return null;
|
return null;
|
||||||
} else {
|
} else {
|
||||||
return new SortingBinaryDocValues(oldDocValues, new2old);
|
return new SortingBinaryDocValues(oldDocValues, docMap);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public Bits getLiveDocs() {
|
public Bits getLiveDocs() {
|
||||||
ensureOpen();
|
final Bits inLiveDocs = in.getLiveDocs();
|
||||||
return mappedLiveDocs;
|
if (inLiveDocs == null) {
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
return new Bits() {
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public boolean get(int index) {
|
||||||
|
return inLiveDocs.get(docMap.newToOld(index));
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public int length() {
|
||||||
|
return inLiveDocs.length();
|
||||||
|
}
|
||||||
|
|
||||||
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
@ -617,7 +622,7 @@ public class SortingAtomicReader extends FilterAtomicReader {
|
||||||
if (norm == null) {
|
if (norm == null) {
|
||||||
return null;
|
return null;
|
||||||
} else {
|
} else {
|
||||||
return new SortingNumericDocValues(norm, new2old);
|
return new SortingNumericDocValues(norm, docMap);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -625,7 +630,7 @@ public class SortingAtomicReader extends FilterAtomicReader {
|
||||||
public NumericDocValues getNumericDocValues(String field) throws IOException {
|
public NumericDocValues getNumericDocValues(String field) throws IOException {
|
||||||
final NumericDocValues oldDocValues = in.getNumericDocValues(field);
|
final NumericDocValues oldDocValues = in.getNumericDocValues(field);
|
||||||
if (oldDocValues == null) return null;
|
if (oldDocValues == null) return null;
|
||||||
return new SortingNumericDocValues(oldDocValues, new2old);
|
return new SortingNumericDocValues(oldDocValues, docMap);
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
@ -634,7 +639,7 @@ public class SortingAtomicReader extends FilterAtomicReader {
|
||||||
if (sortedDV == null) {
|
if (sortedDV == null) {
|
||||||
return null;
|
return null;
|
||||||
} else {
|
} else {
|
||||||
return new SortingSortedDocValues(sortedDV, new2old);
|
return new SortingSortedDocValues(sortedDV, docMap);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -644,13 +649,13 @@ public class SortingAtomicReader extends FilterAtomicReader {
|
||||||
if (sortedSetDV == null) {
|
if (sortedSetDV == null) {
|
||||||
return null;
|
return null;
|
||||||
} else {
|
} else {
|
||||||
return new SortingSortedSetDocValues(sortedSetDV, new2old);
|
return new SortingSortedSetDocValues(sortedSetDV, docMap);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public Fields getTermVectors(final int docID) throws IOException {
|
public Fields getTermVectors(final int docID) throws IOException {
|
||||||
return in.getTermVectors(new2old[docID]);
|
return in.getTermVectors(docMap.newToOld(docID));
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -61,7 +61,7 @@ public class IndexSortingTest extends SorterTestBase {
|
||||||
|
|
||||||
Directory target = newDirectory();
|
Directory target = newDirectory();
|
||||||
IndexWriter writer = new IndexWriter(target, newIndexWriterConfig(TEST_VERSION_CURRENT, null));
|
IndexWriter writer = new IndexWriter(target, newIndexWriterConfig(TEST_VERSION_CURRENT, null));
|
||||||
reader = new SortingAtomicReader(reader, sorter);
|
reader = SortingAtomicReader.sort(reader, sorter);
|
||||||
writer.addIndexes(reader);
|
writer.addIndexes(reader);
|
||||||
writer.close();
|
writer.close();
|
||||||
reader.close();
|
reader.close();
|
||||||
|
|
|
@ -19,7 +19,6 @@ package org.apache.lucene.index.sorter;
|
||||||
|
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.util.Arrays;
|
import java.util.Arrays;
|
||||||
import java.util.Collections;
|
|
||||||
|
|
||||||
import org.apache.lucene.index.AtomicReader;
|
import org.apache.lucene.index.AtomicReader;
|
||||||
import org.apache.lucene.util.Bits;
|
import org.apache.lucene.util.Bits;
|
||||||
|
@ -33,28 +32,34 @@ public class SortingAtomicReaderTest extends SorterTestBase {
|
||||||
// build the mapping from the reader, since we deleted documents, some of
|
// build the mapping from the reader, since we deleted documents, some of
|
||||||
// them might have disappeared from the index (e.g. if an entire segment is
|
// them might have disappeared from the index (e.g. if an entire segment is
|
||||||
// dropped b/c all its docs are deleted)
|
// dropped b/c all its docs are deleted)
|
||||||
Integer[] values = new Integer[reader.maxDoc()];
|
final int[] values = new int[reader.maxDoc()];
|
||||||
int[] docs = new int[reader.maxDoc()];
|
|
||||||
for (int i = 0; i < reader.maxDoc(); i++) {
|
for (int i = 0; i < reader.maxDoc(); i++) {
|
||||||
docs[i] = i;
|
|
||||||
values[i] = Integer.valueOf(reader.document(i).get(ID_FIELD));
|
values[i] = Integer.valueOf(reader.document(i).get(ID_FIELD));
|
||||||
}
|
}
|
||||||
|
final Sorter.DocComparator comparator = new Sorter.DocComparator() {
|
||||||
|
@Override
|
||||||
|
public int compare(int docID1, int docID2) {
|
||||||
|
final int v1 = values[docID1];
|
||||||
|
final int v2 = values[docID2];
|
||||||
|
return v1 < v2 ? -1 : v1 == v2 ? 0 : 1;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
final int[] oldToNew = Sorter.compute(docs, Collections.unmodifiableList(Arrays.asList(values)));
|
final Sorter.DocMap docMap = Sorter.sort(reader.maxDoc(), comparator);
|
||||||
// Sorter.compute also sorts the values
|
// Sorter.compute also sorts the values
|
||||||
sortedValues = new Integer[reader.maxDoc()];
|
sortedValues = new Integer[reader.maxDoc()];
|
||||||
for (int i = 0; i < reader.maxDoc(); ++i) {
|
for (int i = 0; i < reader.maxDoc(); ++i) {
|
||||||
sortedValues[oldToNew[i]] = values[i];
|
sortedValues[docMap.oldToNew(i)] = values[i];
|
||||||
}
|
}
|
||||||
if (VERBOSE) {
|
if (VERBOSE) {
|
||||||
System.out.println("oldToNew: " + Arrays.toString(oldToNew));
|
System.out.println("docMap: " + docMap);
|
||||||
System.out.println("sortedValues: " + Arrays.toString(sortedValues));
|
System.out.println("sortedValues: " + Arrays.toString(sortedValues));
|
||||||
}
|
}
|
||||||
|
|
||||||
reader = new SortingAtomicReader(reader, new Sorter() {
|
reader = SortingAtomicReader.sort(reader, new Sorter() {
|
||||||
@Override
|
@Override
|
||||||
public int[] oldToNew(AtomicReader reader) throws IOException {
|
public Sorter.DocMap sort(AtomicReader reader) throws IOException {
|
||||||
return oldToNew;
|
return docMap;
|
||||||
}
|
}
|
||||||
});
|
});
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue