mirror of https://github.com/apache/lucene.git
LUCENE-4847: Sorter API: Fully reuse docs enums.
git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1457760 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
c04a4a5c96
commit
edbad37b0c
|
@ -136,34 +136,48 @@ public class SortingAtomicReader extends FilterAtomicReader {
|
|||
|
||||
@Override
|
||||
public DocsEnum docs(Bits liveDocs, DocsEnum reuse, final int flags) throws IOException {
|
||||
// if we're asked to reuse the given DocsEnum and it is Sorting, return
|
||||
// the wrapped one, since some Codecs expect it.
|
||||
final DocsEnum inReuse;
|
||||
final SortingDocsEnum wrapReuse;
|
||||
if (reuse != null && reuse instanceof SortingDocsEnum) {
|
||||
reuse = ((SortingDocsEnum) reuse).getWrapped();
|
||||
// if we're asked to reuse the given DocsEnum and it is Sorting, return
|
||||
// the wrapped one, since some Codecs expect it.
|
||||
wrapReuse = (SortingDocsEnum) reuse;
|
||||
inReuse = wrapReuse.getWrapped();
|
||||
} else {
|
||||
wrapReuse = null;
|
||||
inReuse = reuse;
|
||||
}
|
||||
boolean withFreqs = indexOptions.compareTo(IndexOptions.DOCS_AND_FREQS) >=0 && (flags & DocsEnum.FLAG_FREQS) != 0;
|
||||
return new SortingDocsEnum(in.docs(newToOld(liveDocs), reuse, flags), withFreqs, docMap);
|
||||
|
||||
final DocsEnum inDocs = in.docs(newToOld(liveDocs), inReuse, flags);
|
||||
final boolean withFreqs = indexOptions.compareTo(IndexOptions.DOCS_AND_FREQS) >=0 && (flags & DocsEnum.FLAG_FREQS) != 0;
|
||||
return new SortingDocsEnum(wrapReuse, inDocs, withFreqs, docMap);
|
||||
}
|
||||
|
||||
@Override
|
||||
public DocsAndPositionsEnum docsAndPositions(Bits liveDocs, DocsAndPositionsEnum reuse, final int flags) throws IOException {
|
||||
// if we're asked to reuse the given DocsAndPositionsEnum and it is
|
||||
// Sorting, return the wrapped one, since some Codecs expect it.
|
||||
final DocsAndPositionsEnum inReuse;
|
||||
final SortingDocsAndPositionsEnum wrapReuse;
|
||||
if (reuse != null && reuse instanceof SortingDocsAndPositionsEnum) {
|
||||
reuse = ((SortingDocsAndPositionsEnum) reuse).getWrapped();
|
||||
// if we're asked to reuse the given DocsEnum and it is Sorting, return
|
||||
// the wrapped one, since some Codecs expect it.
|
||||
wrapReuse = (SortingDocsAndPositionsEnum) reuse;
|
||||
inReuse = wrapReuse.getWrapped();
|
||||
} else {
|
||||
wrapReuse = null;
|
||||
inReuse = reuse;
|
||||
}
|
||||
|
||||
final DocsAndPositionsEnum positions = in.docsAndPositions(newToOld(liveDocs), reuse, flags);
|
||||
if (positions == null) {
|
||||
final DocsAndPositionsEnum inDocsAndPositions = in.docsAndPositions(newToOld(liveDocs), inReuse, flags);
|
||||
if (inDocsAndPositions == null) {
|
||||
return null;
|
||||
} else {
|
||||
// we ignore the fact that offsets may be stored but not asked for,
|
||||
// since this code is expected to be used during addIndexes which will
|
||||
// ask for everything. if that assumption changes in the future, we can
|
||||
// factor in whether 'flags' says offsets are not required.
|
||||
boolean storeOffsets = indexOptions.compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS) >= 0;
|
||||
return new SortingDocsAndPositionsEnum(positions, docMap, storeOffsets);
|
||||
}
|
||||
|
||||
// we ignore the fact that offsets may be stored but not asked for,
|
||||
// since this code is expected to be used during addIndexes which will
|
||||
// ask for everything. if that assumption changes in the future, we can
|
||||
// factor in whether 'flags' says offsets are not required.
|
||||
final boolean storeOffsets = indexOptions.compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS) >= 0;
|
||||
return new SortingDocsAndPositionsEnum(wrapReuse, inDocsAndPositions, docMap, storeOffsets);
|
||||
}
|
||||
|
||||
}
|
||||
|
@ -272,7 +286,7 @@ public class SortingAtomicReader extends FilterAtomicReader {
|
|||
}
|
||||
}
|
||||
|
||||
private static class SortingDocsEnum extends FilterDocsEnum {
|
||||
static class SortingDocsEnum extends FilterDocsEnum {
|
||||
|
||||
private static final class DocFreqSorterTemplate extends SorterTemplate {
|
||||
|
||||
|
@ -315,19 +329,28 @@ public class SortingAtomicReader extends FilterAtomicReader {
|
|||
}
|
||||
}
|
||||
|
||||
private int[] docs = new int[64];
|
||||
private int[] docs;
|
||||
private int[] freqs;
|
||||
private int docIt = -1;
|
||||
private final int upto;
|
||||
private final boolean withFreqs;
|
||||
|
||||
public SortingDocsEnum(final DocsEnum in, boolean withFreqs, final Sorter.DocMap docMap) throws IOException {
|
||||
SortingDocsEnum(SortingDocsEnum reuse, final DocsEnum in, boolean withFreqs, final Sorter.DocMap docMap) throws IOException {
|
||||
super(in);
|
||||
this.withFreqs = withFreqs;
|
||||
if (reuse != null) {
|
||||
docs = reuse.docs;
|
||||
freqs = reuse.freqs; // maybe null
|
||||
} else {
|
||||
docs = new int[64];
|
||||
}
|
||||
docIt = -1;
|
||||
int i = 0;
|
||||
int doc;
|
||||
if (withFreqs) {
|
||||
freqs = new int[docs.length];
|
||||
if (freqs == null || freqs.length < docs.length) {
|
||||
freqs = new int[docs.length];
|
||||
}
|
||||
while ((doc = in.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS){
|
||||
if (i >= docs.length) {
|
||||
docs = ArrayUtil.grow(docs, docs.length + 1);
|
||||
|
@ -352,6 +375,14 @@ public class SortingAtomicReader extends FilterAtomicReader {
|
|||
upto = i;
|
||||
}
|
||||
|
||||
// for testing
|
||||
boolean reused(DocsEnum other) {
|
||||
if (other == null || !(other instanceof SortingDocsEnum)) {
|
||||
return false;
|
||||
}
|
||||
return docs == ((SortingDocsEnum) other).docs;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int advance(final int target) throws IOException {
|
||||
// need to support it for checkIndex, but in practice it won't be called, so
|
||||
|
@ -382,7 +413,7 @@ public class SortingAtomicReader extends FilterAtomicReader {
|
|||
}
|
||||
}
|
||||
|
||||
private static class SortingDocsAndPositionsEnum extends FilterDocsAndPositionsEnum {
|
||||
static class SortingDocsAndPositionsEnum extends FilterDocsAndPositionsEnum {
|
||||
|
||||
/**
|
||||
* A {@link SorterTemplate} which sorts two parallel arrays of doc IDs and
|
||||
|
@ -439,16 +470,26 @@ public class SortingAtomicReader extends FilterAtomicReader {
|
|||
private int pos;
|
||||
private int startOffset = -1;
|
||||
private int endOffset = -1;
|
||||
private final BytesRef payload = new BytesRef(32);
|
||||
private final BytesRef payload;
|
||||
private int currFreq;
|
||||
|
||||
public SortingDocsAndPositionsEnum(final DocsAndPositionsEnum in, Sorter.DocMap docMap, boolean storeOffsets) throws IOException {
|
||||
private final RAMFile file;
|
||||
|
||||
SortingDocsAndPositionsEnum(SortingDocsAndPositionsEnum reuse, final DocsAndPositionsEnum in, Sorter.DocMap docMap, boolean storeOffsets) throws IOException {
|
||||
super(in);
|
||||
this.storeOffsets = storeOffsets;
|
||||
final RAMFile file = new RAMFile();
|
||||
if (reuse != null) {
|
||||
docs = reuse.docs;
|
||||
offsets = reuse.offsets;
|
||||
payload = reuse.payload;
|
||||
file = reuse.file;
|
||||
} else {
|
||||
docs = new int[32];
|
||||
offsets = new long[32];
|
||||
payload = new BytesRef(32);
|
||||
file = new RAMFile();
|
||||
}
|
||||
final IndexOutput out = new RAMOutputStream(file);
|
||||
docs = new int[32];
|
||||
offsets = new long[32];
|
||||
int doc;
|
||||
int i = 0;
|
||||
while ((doc = in.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) {
|
||||
|
@ -468,6 +509,14 @@ public class SortingAtomicReader extends FilterAtomicReader {
|
|||
this.postingInput = new RAMInputStream("", file);
|
||||
}
|
||||
|
||||
// for testing
|
||||
boolean reused(DocsAndPositionsEnum other) {
|
||||
if (other == null || !(other instanceof SortingDocsAndPositionsEnum)) {
|
||||
return false;
|
||||
}
|
||||
return docs == ((SortingDocsAndPositionsEnum) other).docs;
|
||||
}
|
||||
|
||||
private void addPositions(final DocsAndPositionsEnum in, final IndexOutput out) throws IOException {
|
||||
int freq = in.freq();
|
||||
out.writeVInt(freq);
|
||||
|
|
|
@ -47,6 +47,7 @@ import org.apache.lucene.index.DocsAndPositionsEnum;
|
|||
import org.apache.lucene.index.DocsEnum;
|
||||
import org.apache.lucene.index.FieldInfo.IndexOptions;
|
||||
import org.apache.lucene.index.FieldInvertState;
|
||||
import org.apache.lucene.index.IndexReader;
|
||||
import org.apache.lucene.index.IndexWriterConfig;
|
||||
import org.apache.lucene.index.NumericDocValues;
|
||||
import org.apache.lucene.index.RandomIndexWriter;
|
||||
|
@ -57,6 +58,8 @@ import org.apache.lucene.index.Term;
|
|||
import org.apache.lucene.index.Terms;
|
||||
import org.apache.lucene.index.TermsEnum;
|
||||
import org.apache.lucene.index.TermsEnum.SeekStatus;
|
||||
import org.apache.lucene.index.sorter.SortingAtomicReader.SortingDocsAndPositionsEnum;
|
||||
import org.apache.lucene.index.sorter.SortingAtomicReader.SortingDocsEnum;
|
||||
import org.apache.lucene.search.CollectionStatistics;
|
||||
import org.apache.lucene.search.DocIdSetIterator;
|
||||
import org.apache.lucene.search.TermStatistics;
|
||||
|
@ -255,8 +258,9 @@ public abstract class SorterTestBase extends LuceneTestCase {
|
|||
|
||||
@Test
|
||||
public void testDocsAndPositionsEnum() throws Exception {
|
||||
Term term = new Term(DOC_POSITIONS_FIELD, DOC_POSITIONS_TERM);
|
||||
DocsAndPositionsEnum sortedPositions = reader.termPositionsEnum(term);
|
||||
TermsEnum termsEnum = reader.terms(DOC_POSITIONS_FIELD).iterator(null);
|
||||
assertEquals(SeekStatus.FOUND, termsEnum.seekCeil(new BytesRef(DOC_POSITIONS_TERM)));
|
||||
DocsAndPositionsEnum sortedPositions = termsEnum.docsAndPositions(null, null);
|
||||
int doc;
|
||||
|
||||
// test nextDoc()
|
||||
|
@ -274,7 +278,11 @@ public abstract class SorterTestBase extends LuceneTestCase {
|
|||
}
|
||||
|
||||
// test advance()
|
||||
sortedPositions = reader.termPositionsEnum(term);
|
||||
final DocsAndPositionsEnum reuse = sortedPositions;
|
||||
sortedPositions = termsEnum.docsAndPositions(null, reuse);
|
||||
if (sortedPositions instanceof SortingDocsAndPositionsEnum) {
|
||||
assertTrue(((SortingDocsAndPositionsEnum) sortedPositions).reused(reuse)); // make sure reuse worked
|
||||
}
|
||||
doc = 0;
|
||||
while ((doc = sortedPositions.advance(doc)) != DocIdSetIterator.NO_MORE_DOCS) {
|
||||
int freq = sortedPositions.freq();
|
||||
|
@ -328,8 +336,15 @@ public abstract class SorterTestBase extends LuceneTestCase {
|
|||
assertFalse("document " + prev + " not marked as deleted", mappedLiveDocs == null || mappedLiveDocs.get(prev));
|
||||
}
|
||||
}
|
||||
while (++prev < reader.maxDoc()) {
|
||||
assertFalse("document " + prev + " not marked as deleted", mappedLiveDocs == null || mappedLiveDocs.get(prev));
|
||||
}
|
||||
|
||||
docs = termsEnum.docs(mappedLiveDocs, docs);
|
||||
DocsEnum reuse = docs;
|
||||
docs = termsEnum.docs(mappedLiveDocs, reuse);
|
||||
if (docs instanceof SortingDocsEnum) {
|
||||
assertTrue(((SortingDocsEnum) docs).reused(reuse)); // make sure reuse worked
|
||||
}
|
||||
doc = -1;
|
||||
prev = -1;
|
||||
while ((doc = docs.advance(doc + 1)) != DocIdSetIterator.NO_MORE_DOCS) {
|
||||
|
@ -339,6 +354,9 @@ public abstract class SorterTestBase extends LuceneTestCase {
|
|||
assertFalse("document " + prev + " not marked as deleted", mappedLiveDocs == null || mappedLiveDocs.get(prev));
|
||||
}
|
||||
}
|
||||
while (++prev < reader.maxDoc()) {
|
||||
assertFalse("document " + prev + " not marked as deleted", mappedLiveDocs == null || mappedLiveDocs.get(prev));
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
|
|
Loading…
Reference in New Issue