mirror of https://github.com/apache/lucene.git
LUCENE-4847: Sorter API: Fully reuse docs enums.
git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1457760 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
c04a4a5c96
commit
edbad37b0c
|
@ -136,34 +136,48 @@ public class SortingAtomicReader extends FilterAtomicReader {
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public DocsEnum docs(Bits liveDocs, DocsEnum reuse, final int flags) throws IOException {
|
public DocsEnum docs(Bits liveDocs, DocsEnum reuse, final int flags) throws IOException {
|
||||||
// if we're asked to reuse the given DocsEnum and it is Sorting, return
|
final DocsEnum inReuse;
|
||||||
// the wrapped one, since some Codecs expect it.
|
final SortingDocsEnum wrapReuse;
|
||||||
if (reuse != null && reuse instanceof SortingDocsEnum) {
|
if (reuse != null && reuse instanceof SortingDocsEnum) {
|
||||||
reuse = ((SortingDocsEnum) reuse).getWrapped();
|
// if we're asked to reuse the given DocsEnum and it is Sorting, return
|
||||||
|
// the wrapped one, since some Codecs expect it.
|
||||||
|
wrapReuse = (SortingDocsEnum) reuse;
|
||||||
|
inReuse = wrapReuse.getWrapped();
|
||||||
|
} else {
|
||||||
|
wrapReuse = null;
|
||||||
|
inReuse = reuse;
|
||||||
}
|
}
|
||||||
boolean withFreqs = indexOptions.compareTo(IndexOptions.DOCS_AND_FREQS) >=0 && (flags & DocsEnum.FLAG_FREQS) != 0;
|
|
||||||
return new SortingDocsEnum(in.docs(newToOld(liveDocs), reuse, flags), withFreqs, docMap);
|
final DocsEnum inDocs = in.docs(newToOld(liveDocs), inReuse, flags);
|
||||||
|
final boolean withFreqs = indexOptions.compareTo(IndexOptions.DOCS_AND_FREQS) >=0 && (flags & DocsEnum.FLAG_FREQS) != 0;
|
||||||
|
return new SortingDocsEnum(wrapReuse, inDocs, withFreqs, docMap);
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public DocsAndPositionsEnum docsAndPositions(Bits liveDocs, DocsAndPositionsEnum reuse, final int flags) throws IOException {
|
public DocsAndPositionsEnum docsAndPositions(Bits liveDocs, DocsAndPositionsEnum reuse, final int flags) throws IOException {
|
||||||
// if we're asked to reuse the given DocsAndPositionsEnum and it is
|
final DocsAndPositionsEnum inReuse;
|
||||||
// Sorting, return the wrapped one, since some Codecs expect it.
|
final SortingDocsAndPositionsEnum wrapReuse;
|
||||||
if (reuse != null && reuse instanceof SortingDocsAndPositionsEnum) {
|
if (reuse != null && reuse instanceof SortingDocsAndPositionsEnum) {
|
||||||
reuse = ((SortingDocsAndPositionsEnum) reuse).getWrapped();
|
// if we're asked to reuse the given DocsEnum and it is Sorting, return
|
||||||
}
|
// the wrapped one, since some Codecs expect it.
|
||||||
|
wrapReuse = (SortingDocsAndPositionsEnum) reuse;
|
||||||
final DocsAndPositionsEnum positions = in.docsAndPositions(newToOld(liveDocs), reuse, flags);
|
inReuse = wrapReuse.getWrapped();
|
||||||
if (positions == null) {
|
|
||||||
return null;
|
|
||||||
} else {
|
} else {
|
||||||
// we ignore the fact that offsets may be stored but not asked for,
|
wrapReuse = null;
|
||||||
// since this code is expected to be used during addIndexes which will
|
inReuse = reuse;
|
||||||
// ask for everything. if that assumption changes in the future, we can
|
|
||||||
// factor in whether 'flags' says offsets are not required.
|
|
||||||
boolean storeOffsets = indexOptions.compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS) >= 0;
|
|
||||||
return new SortingDocsAndPositionsEnum(positions, docMap, storeOffsets);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
final DocsAndPositionsEnum inDocsAndPositions = in.docsAndPositions(newToOld(liveDocs), inReuse, flags);
|
||||||
|
if (inDocsAndPositions == null) {
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
|
// we ignore the fact that offsets may be stored but not asked for,
|
||||||
|
// since this code is expected to be used during addIndexes which will
|
||||||
|
// ask for everything. if that assumption changes in the future, we can
|
||||||
|
// factor in whether 'flags' says offsets are not required.
|
||||||
|
final boolean storeOffsets = indexOptions.compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS) >= 0;
|
||||||
|
return new SortingDocsAndPositionsEnum(wrapReuse, inDocsAndPositions, docMap, storeOffsets);
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
@ -272,7 +286,7 @@ public class SortingAtomicReader extends FilterAtomicReader {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
private static class SortingDocsEnum extends FilterDocsEnum {
|
static class SortingDocsEnum extends FilterDocsEnum {
|
||||||
|
|
||||||
private static final class DocFreqSorterTemplate extends SorterTemplate {
|
private static final class DocFreqSorterTemplate extends SorterTemplate {
|
||||||
|
|
||||||
|
@ -315,19 +329,28 @@ public class SortingAtomicReader extends FilterAtomicReader {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
private int[] docs = new int[64];
|
private int[] docs;
|
||||||
private int[] freqs;
|
private int[] freqs;
|
||||||
private int docIt = -1;
|
private int docIt = -1;
|
||||||
private final int upto;
|
private final int upto;
|
||||||
private final boolean withFreqs;
|
private final boolean withFreqs;
|
||||||
|
|
||||||
public SortingDocsEnum(final DocsEnum in, boolean withFreqs, final Sorter.DocMap docMap) throws IOException {
|
SortingDocsEnum(SortingDocsEnum reuse, final DocsEnum in, boolean withFreqs, final Sorter.DocMap docMap) throws IOException {
|
||||||
super(in);
|
super(in);
|
||||||
this.withFreqs = withFreqs;
|
this.withFreqs = withFreqs;
|
||||||
|
if (reuse != null) {
|
||||||
|
docs = reuse.docs;
|
||||||
|
freqs = reuse.freqs; // maybe null
|
||||||
|
} else {
|
||||||
|
docs = new int[64];
|
||||||
|
}
|
||||||
|
docIt = -1;
|
||||||
int i = 0;
|
int i = 0;
|
||||||
int doc;
|
int doc;
|
||||||
if (withFreqs) {
|
if (withFreqs) {
|
||||||
freqs = new int[docs.length];
|
if (freqs == null || freqs.length < docs.length) {
|
||||||
|
freqs = new int[docs.length];
|
||||||
|
}
|
||||||
while ((doc = in.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS){
|
while ((doc = in.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS){
|
||||||
if (i >= docs.length) {
|
if (i >= docs.length) {
|
||||||
docs = ArrayUtil.grow(docs, docs.length + 1);
|
docs = ArrayUtil.grow(docs, docs.length + 1);
|
||||||
|
@ -351,7 +374,15 @@ public class SortingAtomicReader extends FilterAtomicReader {
|
||||||
new DocFreqSorterTemplate(docs, freqs).timSort(0, i - 1);
|
new DocFreqSorterTemplate(docs, freqs).timSort(0, i - 1);
|
||||||
upto = i;
|
upto = i;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// for testing
|
||||||
|
boolean reused(DocsEnum other) {
|
||||||
|
if (other == null || !(other instanceof SortingDocsEnum)) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
return docs == ((SortingDocsEnum) other).docs;
|
||||||
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public int advance(final int target) throws IOException {
|
public int advance(final int target) throws IOException {
|
||||||
// need to support it for checkIndex, but in practice it won't be called, so
|
// need to support it for checkIndex, but in practice it won't be called, so
|
||||||
|
@ -382,7 +413,7 @@ public class SortingAtomicReader extends FilterAtomicReader {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
private static class SortingDocsAndPositionsEnum extends FilterDocsAndPositionsEnum {
|
static class SortingDocsAndPositionsEnum extends FilterDocsAndPositionsEnum {
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* A {@link SorterTemplate} which sorts two parallel arrays of doc IDs and
|
* A {@link SorterTemplate} which sorts two parallel arrays of doc IDs and
|
||||||
|
@ -439,16 +470,26 @@ public class SortingAtomicReader extends FilterAtomicReader {
|
||||||
private int pos;
|
private int pos;
|
||||||
private int startOffset = -1;
|
private int startOffset = -1;
|
||||||
private int endOffset = -1;
|
private int endOffset = -1;
|
||||||
private final BytesRef payload = new BytesRef(32);
|
private final BytesRef payload;
|
||||||
private int currFreq;
|
private int currFreq;
|
||||||
|
|
||||||
public SortingDocsAndPositionsEnum(final DocsAndPositionsEnum in, Sorter.DocMap docMap, boolean storeOffsets) throws IOException {
|
private final RAMFile file;
|
||||||
|
|
||||||
|
SortingDocsAndPositionsEnum(SortingDocsAndPositionsEnum reuse, final DocsAndPositionsEnum in, Sorter.DocMap docMap, boolean storeOffsets) throws IOException {
|
||||||
super(in);
|
super(in);
|
||||||
this.storeOffsets = storeOffsets;
|
this.storeOffsets = storeOffsets;
|
||||||
final RAMFile file = new RAMFile();
|
if (reuse != null) {
|
||||||
|
docs = reuse.docs;
|
||||||
|
offsets = reuse.offsets;
|
||||||
|
payload = reuse.payload;
|
||||||
|
file = reuse.file;
|
||||||
|
} else {
|
||||||
|
docs = new int[32];
|
||||||
|
offsets = new long[32];
|
||||||
|
payload = new BytesRef(32);
|
||||||
|
file = new RAMFile();
|
||||||
|
}
|
||||||
final IndexOutput out = new RAMOutputStream(file);
|
final IndexOutput out = new RAMOutputStream(file);
|
||||||
docs = new int[32];
|
|
||||||
offsets = new long[32];
|
|
||||||
int doc;
|
int doc;
|
||||||
int i = 0;
|
int i = 0;
|
||||||
while ((doc = in.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) {
|
while ((doc = in.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) {
|
||||||
|
@ -467,7 +508,15 @@ public class SortingAtomicReader extends FilterAtomicReader {
|
||||||
out.close();
|
out.close();
|
||||||
this.postingInput = new RAMInputStream("", file);
|
this.postingInput = new RAMInputStream("", file);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// for testing
|
||||||
|
boolean reused(DocsAndPositionsEnum other) {
|
||||||
|
if (other == null || !(other instanceof SortingDocsAndPositionsEnum)) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
return docs == ((SortingDocsAndPositionsEnum) other).docs;
|
||||||
|
}
|
||||||
|
|
||||||
private void addPositions(final DocsAndPositionsEnum in, final IndexOutput out) throws IOException {
|
private void addPositions(final DocsAndPositionsEnum in, final IndexOutput out) throws IOException {
|
||||||
int freq = in.freq();
|
int freq = in.freq();
|
||||||
out.writeVInt(freq);
|
out.writeVInt(freq);
|
||||||
|
|
|
@ -47,6 +47,7 @@ import org.apache.lucene.index.DocsAndPositionsEnum;
|
||||||
import org.apache.lucene.index.DocsEnum;
|
import org.apache.lucene.index.DocsEnum;
|
||||||
import org.apache.lucene.index.FieldInfo.IndexOptions;
|
import org.apache.lucene.index.FieldInfo.IndexOptions;
|
||||||
import org.apache.lucene.index.FieldInvertState;
|
import org.apache.lucene.index.FieldInvertState;
|
||||||
|
import org.apache.lucene.index.IndexReader;
|
||||||
import org.apache.lucene.index.IndexWriterConfig;
|
import org.apache.lucene.index.IndexWriterConfig;
|
||||||
import org.apache.lucene.index.NumericDocValues;
|
import org.apache.lucene.index.NumericDocValues;
|
||||||
import org.apache.lucene.index.RandomIndexWriter;
|
import org.apache.lucene.index.RandomIndexWriter;
|
||||||
|
@ -57,6 +58,8 @@ import org.apache.lucene.index.Term;
|
||||||
import org.apache.lucene.index.Terms;
|
import org.apache.lucene.index.Terms;
|
||||||
import org.apache.lucene.index.TermsEnum;
|
import org.apache.lucene.index.TermsEnum;
|
||||||
import org.apache.lucene.index.TermsEnum.SeekStatus;
|
import org.apache.lucene.index.TermsEnum.SeekStatus;
|
||||||
|
import org.apache.lucene.index.sorter.SortingAtomicReader.SortingDocsAndPositionsEnum;
|
||||||
|
import org.apache.lucene.index.sorter.SortingAtomicReader.SortingDocsEnum;
|
||||||
import org.apache.lucene.search.CollectionStatistics;
|
import org.apache.lucene.search.CollectionStatistics;
|
||||||
import org.apache.lucene.search.DocIdSetIterator;
|
import org.apache.lucene.search.DocIdSetIterator;
|
||||||
import org.apache.lucene.search.TermStatistics;
|
import org.apache.lucene.search.TermStatistics;
|
||||||
|
@ -255,8 +258,9 @@ public abstract class SorterTestBase extends LuceneTestCase {
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void testDocsAndPositionsEnum() throws Exception {
|
public void testDocsAndPositionsEnum() throws Exception {
|
||||||
Term term = new Term(DOC_POSITIONS_FIELD, DOC_POSITIONS_TERM);
|
TermsEnum termsEnum = reader.terms(DOC_POSITIONS_FIELD).iterator(null);
|
||||||
DocsAndPositionsEnum sortedPositions = reader.termPositionsEnum(term);
|
assertEquals(SeekStatus.FOUND, termsEnum.seekCeil(new BytesRef(DOC_POSITIONS_TERM)));
|
||||||
|
DocsAndPositionsEnum sortedPositions = termsEnum.docsAndPositions(null, null);
|
||||||
int doc;
|
int doc;
|
||||||
|
|
||||||
// test nextDoc()
|
// test nextDoc()
|
||||||
|
@ -274,7 +278,11 @@ public abstract class SorterTestBase extends LuceneTestCase {
|
||||||
}
|
}
|
||||||
|
|
||||||
// test advance()
|
// test advance()
|
||||||
sortedPositions = reader.termPositionsEnum(term);
|
final DocsAndPositionsEnum reuse = sortedPositions;
|
||||||
|
sortedPositions = termsEnum.docsAndPositions(null, reuse);
|
||||||
|
if (sortedPositions instanceof SortingDocsAndPositionsEnum) {
|
||||||
|
assertTrue(((SortingDocsAndPositionsEnum) sortedPositions).reused(reuse)); // make sure reuse worked
|
||||||
|
}
|
||||||
doc = 0;
|
doc = 0;
|
||||||
while ((doc = sortedPositions.advance(doc)) != DocIdSetIterator.NO_MORE_DOCS) {
|
while ((doc = sortedPositions.advance(doc)) != DocIdSetIterator.NO_MORE_DOCS) {
|
||||||
int freq = sortedPositions.freq();
|
int freq = sortedPositions.freq();
|
||||||
|
@ -328,8 +336,15 @@ public abstract class SorterTestBase extends LuceneTestCase {
|
||||||
assertFalse("document " + prev + " not marked as deleted", mappedLiveDocs == null || mappedLiveDocs.get(prev));
|
assertFalse("document " + prev + " not marked as deleted", mappedLiveDocs == null || mappedLiveDocs.get(prev));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
while (++prev < reader.maxDoc()) {
|
||||||
docs = termsEnum.docs(mappedLiveDocs, docs);
|
assertFalse("document " + prev + " not marked as deleted", mappedLiveDocs == null || mappedLiveDocs.get(prev));
|
||||||
|
}
|
||||||
|
|
||||||
|
DocsEnum reuse = docs;
|
||||||
|
docs = termsEnum.docs(mappedLiveDocs, reuse);
|
||||||
|
if (docs instanceof SortingDocsEnum) {
|
||||||
|
assertTrue(((SortingDocsEnum) docs).reused(reuse)); // make sure reuse worked
|
||||||
|
}
|
||||||
doc = -1;
|
doc = -1;
|
||||||
prev = -1;
|
prev = -1;
|
||||||
while ((doc = docs.advance(doc + 1)) != DocIdSetIterator.NO_MORE_DOCS) {
|
while ((doc = docs.advance(doc + 1)) != DocIdSetIterator.NO_MORE_DOCS) {
|
||||||
|
@ -339,6 +354,9 @@ public abstract class SorterTestBase extends LuceneTestCase {
|
||||||
assertFalse("document " + prev + " not marked as deleted", mappedLiveDocs == null || mappedLiveDocs.get(prev));
|
assertFalse("document " + prev + " not marked as deleted", mappedLiveDocs == null || mappedLiveDocs.get(prev));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
while (++prev < reader.maxDoc()) {
|
||||||
|
assertFalse("document " + prev + " not marked as deleted", mappedLiveDocs == null || mappedLiveDocs.get(prev));
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
|
|
Loading…
Reference in New Issue