From edbad37b0c449d2effbcf1ac155c056298aaa7c6 Mon Sep 17 00:00:00 2001 From: Adrien Grand Date: Mon, 18 Mar 2013 13:36:18 +0000 Subject: [PATCH] LUCENE-4847: Sorter API: Fully reuse docs enums. git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1457760 13f79535-47bb-0310-9956-ffa450edef68 --- .../index/sorter/SortingAtomicReader.java | 115 +++++++++++++----- .../lucene/index/sorter/SorterTestBase.java | 28 ++++- 2 files changed, 105 insertions(+), 38 deletions(-) diff --git a/lucene/misc/src/java/org/apache/lucene/index/sorter/SortingAtomicReader.java b/lucene/misc/src/java/org/apache/lucene/index/sorter/SortingAtomicReader.java index e850076df1e..336b56c34b4 100644 --- a/lucene/misc/src/java/org/apache/lucene/index/sorter/SortingAtomicReader.java +++ b/lucene/misc/src/java/org/apache/lucene/index/sorter/SortingAtomicReader.java @@ -136,34 +136,48 @@ public class SortingAtomicReader extends FilterAtomicReader { @Override public DocsEnum docs(Bits liveDocs, DocsEnum reuse, final int flags) throws IOException { - // if we're asked to reuse the given DocsEnum and it is Sorting, return - // the wrapped one, since some Codecs expect it. + final DocsEnum inReuse; + final SortingDocsEnum wrapReuse; if (reuse != null && reuse instanceof SortingDocsEnum) { - reuse = ((SortingDocsEnum) reuse).getWrapped(); + // if we're asked to reuse the given DocsEnum and it is Sorting, return + // the wrapped one, since some Codecs expect it. + wrapReuse = (SortingDocsEnum) reuse; + inReuse = wrapReuse.getWrapped(); + } else { + wrapReuse = null; + inReuse = reuse; } - boolean withFreqs = indexOptions.compareTo(IndexOptions.DOCS_AND_FREQS) >=0 && (flags & DocsEnum.FLAG_FREQS) != 0; - return new SortingDocsEnum(in.docs(newToOld(liveDocs), reuse, flags), withFreqs, docMap); + + final DocsEnum inDocs = in.docs(newToOld(liveDocs), inReuse, flags); + final boolean withFreqs = indexOptions.compareTo(IndexOptions.DOCS_AND_FREQS) >=0 && (flags & DocsEnum.FLAG_FREQS) != 0; + return new SortingDocsEnum(wrapReuse, inDocs, withFreqs, docMap); } @Override public DocsAndPositionsEnum docsAndPositions(Bits liveDocs, DocsAndPositionsEnum reuse, final int flags) throws IOException { - // if we're asked to reuse the given DocsAndPositionsEnum and it is - // Sorting, return the wrapped one, since some Codecs expect it. + final DocsAndPositionsEnum inReuse; + final SortingDocsAndPositionsEnum wrapReuse; if (reuse != null && reuse instanceof SortingDocsAndPositionsEnum) { - reuse = ((SortingDocsAndPositionsEnum) reuse).getWrapped(); - } - - final DocsAndPositionsEnum positions = in.docsAndPositions(newToOld(liveDocs), reuse, flags); - if (positions == null) { - return null; + // if we're asked to reuse the given DocsEnum and it is Sorting, return + // the wrapped one, since some Codecs expect it. + wrapReuse = (SortingDocsAndPositionsEnum) reuse; + inReuse = wrapReuse.getWrapped(); } else { - // we ignore the fact that offsets may be stored but not asked for, - // since this code is expected to be used during addIndexes which will - // ask for everything. if that assumption changes in the future, we can - // factor in whether 'flags' says offsets are not required. - boolean storeOffsets = indexOptions.compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS) >= 0; - return new SortingDocsAndPositionsEnum(positions, docMap, storeOffsets); + wrapReuse = null; + inReuse = reuse; } + + final DocsAndPositionsEnum inDocsAndPositions = in.docsAndPositions(newToOld(liveDocs), inReuse, flags); + if (inDocsAndPositions == null) { + return null; + } + + // we ignore the fact that offsets may be stored but not asked for, + // since this code is expected to be used during addIndexes which will + // ask for everything. if that assumption changes in the future, we can + // factor in whether 'flags' says offsets are not required. + final boolean storeOffsets = indexOptions.compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS) >= 0; + return new SortingDocsAndPositionsEnum(wrapReuse, inDocsAndPositions, docMap, storeOffsets); } } @@ -272,7 +286,7 @@ public class SortingAtomicReader extends FilterAtomicReader { } } - private static class SortingDocsEnum extends FilterDocsEnum { + static class SortingDocsEnum extends FilterDocsEnum { private static final class DocFreqSorterTemplate extends SorterTemplate { @@ -315,19 +329,28 @@ public class SortingAtomicReader extends FilterAtomicReader { } } - private int[] docs = new int[64]; + private int[] docs; private int[] freqs; private int docIt = -1; private final int upto; private final boolean withFreqs; - - public SortingDocsEnum(final DocsEnum in, boolean withFreqs, final Sorter.DocMap docMap) throws IOException { + + SortingDocsEnum(SortingDocsEnum reuse, final DocsEnum in, boolean withFreqs, final Sorter.DocMap docMap) throws IOException { super(in); this.withFreqs = withFreqs; + if (reuse != null) { + docs = reuse.docs; + freqs = reuse.freqs; // maybe null + } else { + docs = new int[64]; + } + docIt = -1; int i = 0; int doc; if (withFreqs) { - freqs = new int[docs.length]; + if (freqs == null || freqs.length < docs.length) { + freqs = new int[docs.length]; + } while ((doc = in.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS){ if (i >= docs.length) { docs = ArrayUtil.grow(docs, docs.length + 1); @@ -351,7 +374,15 @@ public class SortingAtomicReader extends FilterAtomicReader { new DocFreqSorterTemplate(docs, freqs).timSort(0, i - 1); upto = i; } - + + // for testing + boolean reused(DocsEnum other) { + if (other == null || !(other instanceof SortingDocsEnum)) { + return false; + } + return docs == ((SortingDocsEnum) other).docs; + } + @Override public int advance(final int target) throws IOException { // need to support it for checkIndex, but in practice it won't be called, so @@ -382,7 +413,7 @@ public class SortingAtomicReader extends FilterAtomicReader { } } - private static class SortingDocsAndPositionsEnum extends FilterDocsAndPositionsEnum { + static class SortingDocsAndPositionsEnum extends FilterDocsAndPositionsEnum { /** * A {@link SorterTemplate} which sorts two parallel arrays of doc IDs and @@ -439,16 +470,26 @@ public class SortingAtomicReader extends FilterAtomicReader { private int pos; private int startOffset = -1; private int endOffset = -1; - private final BytesRef payload = new BytesRef(32); + private final BytesRef payload; private int currFreq; - - public SortingDocsAndPositionsEnum(final DocsAndPositionsEnum in, Sorter.DocMap docMap, boolean storeOffsets) throws IOException { + + private final RAMFile file; + + SortingDocsAndPositionsEnum(SortingDocsAndPositionsEnum reuse, final DocsAndPositionsEnum in, Sorter.DocMap docMap, boolean storeOffsets) throws IOException { super(in); this.storeOffsets = storeOffsets; - final RAMFile file = new RAMFile(); + if (reuse != null) { + docs = reuse.docs; + offsets = reuse.offsets; + payload = reuse.payload; + file = reuse.file; + } else { + docs = new int[32]; + offsets = new long[32]; + payload = new BytesRef(32); + file = new RAMFile(); + } final IndexOutput out = new RAMOutputStream(file); - docs = new int[32]; - offsets = new long[32]; int doc; int i = 0; while ((doc = in.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) { @@ -467,7 +508,15 @@ public class SortingAtomicReader extends FilterAtomicReader { out.close(); this.postingInput = new RAMInputStream("", file); } - + + // for testing + boolean reused(DocsAndPositionsEnum other) { + if (other == null || !(other instanceof SortingDocsAndPositionsEnum)) { + return false; + } + return docs == ((SortingDocsAndPositionsEnum) other).docs; + } + private void addPositions(final DocsAndPositionsEnum in, final IndexOutput out) throws IOException { int freq = in.freq(); out.writeVInt(freq); diff --git a/lucene/misc/src/test/org/apache/lucene/index/sorter/SorterTestBase.java b/lucene/misc/src/test/org/apache/lucene/index/sorter/SorterTestBase.java index fe4ce13aa24..0734fd0cfde 100644 --- a/lucene/misc/src/test/org/apache/lucene/index/sorter/SorterTestBase.java +++ b/lucene/misc/src/test/org/apache/lucene/index/sorter/SorterTestBase.java @@ -47,6 +47,7 @@ import org.apache.lucene.index.DocsAndPositionsEnum; import org.apache.lucene.index.DocsEnum; import org.apache.lucene.index.FieldInfo.IndexOptions; import org.apache.lucene.index.FieldInvertState; +import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.IndexWriterConfig; import org.apache.lucene.index.NumericDocValues; import org.apache.lucene.index.RandomIndexWriter; @@ -57,6 +58,8 @@ import org.apache.lucene.index.Term; import org.apache.lucene.index.Terms; import org.apache.lucene.index.TermsEnum; import org.apache.lucene.index.TermsEnum.SeekStatus; +import org.apache.lucene.index.sorter.SortingAtomicReader.SortingDocsAndPositionsEnum; +import org.apache.lucene.index.sorter.SortingAtomicReader.SortingDocsEnum; import org.apache.lucene.search.CollectionStatistics; import org.apache.lucene.search.DocIdSetIterator; import org.apache.lucene.search.TermStatistics; @@ -255,8 +258,9 @@ public abstract class SorterTestBase extends LuceneTestCase { @Test public void testDocsAndPositionsEnum() throws Exception { - Term term = new Term(DOC_POSITIONS_FIELD, DOC_POSITIONS_TERM); - DocsAndPositionsEnum sortedPositions = reader.termPositionsEnum(term); + TermsEnum termsEnum = reader.terms(DOC_POSITIONS_FIELD).iterator(null); + assertEquals(SeekStatus.FOUND, termsEnum.seekCeil(new BytesRef(DOC_POSITIONS_TERM))); + DocsAndPositionsEnum sortedPositions = termsEnum.docsAndPositions(null, null); int doc; // test nextDoc() @@ -274,7 +278,11 @@ public abstract class SorterTestBase extends LuceneTestCase { } // test advance() - sortedPositions = reader.termPositionsEnum(term); + final DocsAndPositionsEnum reuse = sortedPositions; + sortedPositions = termsEnum.docsAndPositions(null, reuse); + if (sortedPositions instanceof SortingDocsAndPositionsEnum) { + assertTrue(((SortingDocsAndPositionsEnum) sortedPositions).reused(reuse)); // make sure reuse worked + } doc = 0; while ((doc = sortedPositions.advance(doc)) != DocIdSetIterator.NO_MORE_DOCS) { int freq = sortedPositions.freq(); @@ -328,8 +336,15 @@ public abstract class SorterTestBase extends LuceneTestCase { assertFalse("document " + prev + " not marked as deleted", mappedLiveDocs == null || mappedLiveDocs.get(prev)); } } - - docs = termsEnum.docs(mappedLiveDocs, docs); + while (++prev < reader.maxDoc()) { + assertFalse("document " + prev + " not marked as deleted", mappedLiveDocs == null || mappedLiveDocs.get(prev)); + } + + DocsEnum reuse = docs; + docs = termsEnum.docs(mappedLiveDocs, reuse); + if (docs instanceof SortingDocsEnum) { + assertTrue(((SortingDocsEnum) docs).reused(reuse)); // make sure reuse worked + } doc = -1; prev = -1; while ((doc = docs.advance(doc + 1)) != DocIdSetIterator.NO_MORE_DOCS) { @@ -339,6 +354,9 @@ public abstract class SorterTestBase extends LuceneTestCase { assertFalse("document " + prev + " not marked as deleted", mappedLiveDocs == null || mappedLiveDocs.get(prev)); } } + while (++prev < reader.maxDoc()) { + assertFalse("document " + prev + " not marked as deleted", mappedLiveDocs == null || mappedLiveDocs.get(prev)); + } } @Test