LUCENE-4847: Sorter API: Fully reuse docs enums.

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1457760 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Adrien Grand 2013-03-18 13:36:18 +00:00
parent c04a4a5c96
commit edbad37b0c
2 changed files with 105 additions and 38 deletions

View File

@ -136,34 +136,48 @@ public class SortingAtomicReader extends FilterAtomicReader {
@Override @Override
public DocsEnum docs(Bits liveDocs, DocsEnum reuse, final int flags) throws IOException { public DocsEnum docs(Bits liveDocs, DocsEnum reuse, final int flags) throws IOException {
// if we're asked to reuse the given DocsEnum and it is Sorting, return final DocsEnum inReuse;
// the wrapped one, since some Codecs expect it. final SortingDocsEnum wrapReuse;
if (reuse != null && reuse instanceof SortingDocsEnum) { if (reuse != null && reuse instanceof SortingDocsEnum) {
reuse = ((SortingDocsEnum) reuse).getWrapped(); // if we're asked to reuse the given DocsEnum and it is Sorting, return
// the wrapped one, since some Codecs expect it.
wrapReuse = (SortingDocsEnum) reuse;
inReuse = wrapReuse.getWrapped();
} else {
wrapReuse = null;
inReuse = reuse;
} }
boolean withFreqs = indexOptions.compareTo(IndexOptions.DOCS_AND_FREQS) >=0 && (flags & DocsEnum.FLAG_FREQS) != 0;
return new SortingDocsEnum(in.docs(newToOld(liveDocs), reuse, flags), withFreqs, docMap); final DocsEnum inDocs = in.docs(newToOld(liveDocs), inReuse, flags);
final boolean withFreqs = indexOptions.compareTo(IndexOptions.DOCS_AND_FREQS) >=0 && (flags & DocsEnum.FLAG_FREQS) != 0;
return new SortingDocsEnum(wrapReuse, inDocs, withFreqs, docMap);
} }
@Override @Override
public DocsAndPositionsEnum docsAndPositions(Bits liveDocs, DocsAndPositionsEnum reuse, final int flags) throws IOException { public DocsAndPositionsEnum docsAndPositions(Bits liveDocs, DocsAndPositionsEnum reuse, final int flags) throws IOException {
// if we're asked to reuse the given DocsAndPositionsEnum and it is final DocsAndPositionsEnum inReuse;
// Sorting, return the wrapped one, since some Codecs expect it. final SortingDocsAndPositionsEnum wrapReuse;
if (reuse != null && reuse instanceof SortingDocsAndPositionsEnum) { if (reuse != null && reuse instanceof SortingDocsAndPositionsEnum) {
reuse = ((SortingDocsAndPositionsEnum) reuse).getWrapped(); // if we're asked to reuse the given DocsEnum and it is Sorting, return
} // the wrapped one, since some Codecs expect it.
wrapReuse = (SortingDocsAndPositionsEnum) reuse;
final DocsAndPositionsEnum positions = in.docsAndPositions(newToOld(liveDocs), reuse, flags); inReuse = wrapReuse.getWrapped();
if (positions == null) {
return null;
} else { } else {
// we ignore the fact that offsets may be stored but not asked for, wrapReuse = null;
// since this code is expected to be used during addIndexes which will inReuse = reuse;
// ask for everything. if that assumption changes in the future, we can
// factor in whether 'flags' says offsets are not required.
boolean storeOffsets = indexOptions.compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS) >= 0;
return new SortingDocsAndPositionsEnum(positions, docMap, storeOffsets);
} }
final DocsAndPositionsEnum inDocsAndPositions = in.docsAndPositions(newToOld(liveDocs), inReuse, flags);
if (inDocsAndPositions == null) {
return null;
}
// we ignore the fact that offsets may be stored but not asked for,
// since this code is expected to be used during addIndexes which will
// ask for everything. if that assumption changes in the future, we can
// factor in whether 'flags' says offsets are not required.
final boolean storeOffsets = indexOptions.compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS) >= 0;
return new SortingDocsAndPositionsEnum(wrapReuse, inDocsAndPositions, docMap, storeOffsets);
} }
} }
@ -272,7 +286,7 @@ public class SortingAtomicReader extends FilterAtomicReader {
} }
} }
private static class SortingDocsEnum extends FilterDocsEnum { static class SortingDocsEnum extends FilterDocsEnum {
private static final class DocFreqSorterTemplate extends SorterTemplate { private static final class DocFreqSorterTemplate extends SorterTemplate {
@ -315,19 +329,28 @@ public class SortingAtomicReader extends FilterAtomicReader {
} }
} }
private int[] docs = new int[64]; private int[] docs;
private int[] freqs; private int[] freqs;
private int docIt = -1; private int docIt = -1;
private final int upto; private final int upto;
private final boolean withFreqs; private final boolean withFreqs;
public SortingDocsEnum(final DocsEnum in, boolean withFreqs, final Sorter.DocMap docMap) throws IOException { SortingDocsEnum(SortingDocsEnum reuse, final DocsEnum in, boolean withFreqs, final Sorter.DocMap docMap) throws IOException {
super(in); super(in);
this.withFreqs = withFreqs; this.withFreqs = withFreqs;
if (reuse != null) {
docs = reuse.docs;
freqs = reuse.freqs; // maybe null
} else {
docs = new int[64];
}
docIt = -1;
int i = 0; int i = 0;
int doc; int doc;
if (withFreqs) { if (withFreqs) {
freqs = new int[docs.length]; if (freqs == null || freqs.length < docs.length) {
freqs = new int[docs.length];
}
while ((doc = in.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS){ while ((doc = in.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS){
if (i >= docs.length) { if (i >= docs.length) {
docs = ArrayUtil.grow(docs, docs.length + 1); docs = ArrayUtil.grow(docs, docs.length + 1);
@ -351,7 +374,15 @@ public class SortingAtomicReader extends FilterAtomicReader {
new DocFreqSorterTemplate(docs, freqs).timSort(0, i - 1); new DocFreqSorterTemplate(docs, freqs).timSort(0, i - 1);
upto = i; upto = i;
} }
// for testing
boolean reused(DocsEnum other) {
if (other == null || !(other instanceof SortingDocsEnum)) {
return false;
}
return docs == ((SortingDocsEnum) other).docs;
}
@Override @Override
public int advance(final int target) throws IOException { public int advance(final int target) throws IOException {
// need to support it for checkIndex, but in practice it won't be called, so // need to support it for checkIndex, but in practice it won't be called, so
@ -382,7 +413,7 @@ public class SortingAtomicReader extends FilterAtomicReader {
} }
} }
private static class SortingDocsAndPositionsEnum extends FilterDocsAndPositionsEnum { static class SortingDocsAndPositionsEnum extends FilterDocsAndPositionsEnum {
/** /**
* A {@link SorterTemplate} which sorts two parallel arrays of doc IDs and * A {@link SorterTemplate} which sorts two parallel arrays of doc IDs and
@ -439,16 +470,26 @@ public class SortingAtomicReader extends FilterAtomicReader {
private int pos; private int pos;
private int startOffset = -1; private int startOffset = -1;
private int endOffset = -1; private int endOffset = -1;
private final BytesRef payload = new BytesRef(32); private final BytesRef payload;
private int currFreq; private int currFreq;
public SortingDocsAndPositionsEnum(final DocsAndPositionsEnum in, Sorter.DocMap docMap, boolean storeOffsets) throws IOException { private final RAMFile file;
SortingDocsAndPositionsEnum(SortingDocsAndPositionsEnum reuse, final DocsAndPositionsEnum in, Sorter.DocMap docMap, boolean storeOffsets) throws IOException {
super(in); super(in);
this.storeOffsets = storeOffsets; this.storeOffsets = storeOffsets;
final RAMFile file = new RAMFile(); if (reuse != null) {
docs = reuse.docs;
offsets = reuse.offsets;
payload = reuse.payload;
file = reuse.file;
} else {
docs = new int[32];
offsets = new long[32];
payload = new BytesRef(32);
file = new RAMFile();
}
final IndexOutput out = new RAMOutputStream(file); final IndexOutput out = new RAMOutputStream(file);
docs = new int[32];
offsets = new long[32];
int doc; int doc;
int i = 0; int i = 0;
while ((doc = in.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) { while ((doc = in.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) {
@ -467,7 +508,15 @@ public class SortingAtomicReader extends FilterAtomicReader {
out.close(); out.close();
this.postingInput = new RAMInputStream("", file); this.postingInput = new RAMInputStream("", file);
} }
// for testing
boolean reused(DocsAndPositionsEnum other) {
if (other == null || !(other instanceof SortingDocsAndPositionsEnum)) {
return false;
}
return docs == ((SortingDocsAndPositionsEnum) other).docs;
}
private void addPositions(final DocsAndPositionsEnum in, final IndexOutput out) throws IOException { private void addPositions(final DocsAndPositionsEnum in, final IndexOutput out) throws IOException {
int freq = in.freq(); int freq = in.freq();
out.writeVInt(freq); out.writeVInt(freq);

View File

@ -47,6 +47,7 @@ import org.apache.lucene.index.DocsAndPositionsEnum;
import org.apache.lucene.index.DocsEnum; import org.apache.lucene.index.DocsEnum;
import org.apache.lucene.index.FieldInfo.IndexOptions; import org.apache.lucene.index.FieldInfo.IndexOptions;
import org.apache.lucene.index.FieldInvertState; import org.apache.lucene.index.FieldInvertState;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriterConfig; import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.NumericDocValues; import org.apache.lucene.index.NumericDocValues;
import org.apache.lucene.index.RandomIndexWriter; import org.apache.lucene.index.RandomIndexWriter;
@ -57,6 +58,8 @@ import org.apache.lucene.index.Term;
import org.apache.lucene.index.Terms; import org.apache.lucene.index.Terms;
import org.apache.lucene.index.TermsEnum; import org.apache.lucene.index.TermsEnum;
import org.apache.lucene.index.TermsEnum.SeekStatus; import org.apache.lucene.index.TermsEnum.SeekStatus;
import org.apache.lucene.index.sorter.SortingAtomicReader.SortingDocsAndPositionsEnum;
import org.apache.lucene.index.sorter.SortingAtomicReader.SortingDocsEnum;
import org.apache.lucene.search.CollectionStatistics; import org.apache.lucene.search.CollectionStatistics;
import org.apache.lucene.search.DocIdSetIterator; import org.apache.lucene.search.DocIdSetIterator;
import org.apache.lucene.search.TermStatistics; import org.apache.lucene.search.TermStatistics;
@ -255,8 +258,9 @@ public abstract class SorterTestBase extends LuceneTestCase {
@Test @Test
public void testDocsAndPositionsEnum() throws Exception { public void testDocsAndPositionsEnum() throws Exception {
Term term = new Term(DOC_POSITIONS_FIELD, DOC_POSITIONS_TERM); TermsEnum termsEnum = reader.terms(DOC_POSITIONS_FIELD).iterator(null);
DocsAndPositionsEnum sortedPositions = reader.termPositionsEnum(term); assertEquals(SeekStatus.FOUND, termsEnum.seekCeil(new BytesRef(DOC_POSITIONS_TERM)));
DocsAndPositionsEnum sortedPositions = termsEnum.docsAndPositions(null, null);
int doc; int doc;
// test nextDoc() // test nextDoc()
@ -274,7 +278,11 @@ public abstract class SorterTestBase extends LuceneTestCase {
} }
// test advance() // test advance()
sortedPositions = reader.termPositionsEnum(term); final DocsAndPositionsEnum reuse = sortedPositions;
sortedPositions = termsEnum.docsAndPositions(null, reuse);
if (sortedPositions instanceof SortingDocsAndPositionsEnum) {
assertTrue(((SortingDocsAndPositionsEnum) sortedPositions).reused(reuse)); // make sure reuse worked
}
doc = 0; doc = 0;
while ((doc = sortedPositions.advance(doc)) != DocIdSetIterator.NO_MORE_DOCS) { while ((doc = sortedPositions.advance(doc)) != DocIdSetIterator.NO_MORE_DOCS) {
int freq = sortedPositions.freq(); int freq = sortedPositions.freq();
@ -328,8 +336,15 @@ public abstract class SorterTestBase extends LuceneTestCase {
assertFalse("document " + prev + " not marked as deleted", mappedLiveDocs == null || mappedLiveDocs.get(prev)); assertFalse("document " + prev + " not marked as deleted", mappedLiveDocs == null || mappedLiveDocs.get(prev));
} }
} }
while (++prev < reader.maxDoc()) {
docs = termsEnum.docs(mappedLiveDocs, docs); assertFalse("document " + prev + " not marked as deleted", mappedLiveDocs == null || mappedLiveDocs.get(prev));
}
DocsEnum reuse = docs;
docs = termsEnum.docs(mappedLiveDocs, reuse);
if (docs instanceof SortingDocsEnum) {
assertTrue(((SortingDocsEnum) docs).reused(reuse)); // make sure reuse worked
}
doc = -1; doc = -1;
prev = -1; prev = -1;
while ((doc = docs.advance(doc + 1)) != DocIdSetIterator.NO_MORE_DOCS) { while ((doc = docs.advance(doc + 1)) != DocIdSetIterator.NO_MORE_DOCS) {
@ -339,6 +354,9 @@ public abstract class SorterTestBase extends LuceneTestCase {
assertFalse("document " + prev + " not marked as deleted", mappedLiveDocs == null || mappedLiveDocs.get(prev)); assertFalse("document " + prev + " not marked as deleted", mappedLiveDocs == null || mappedLiveDocs.get(prev));
} }
} }
while (++prev < reader.maxDoc()) {
assertFalse("document " + prev + " not marked as deleted", mappedLiveDocs == null || mappedLiveDocs.get(prev));
}
} }
@Test @Test