LUCENE-4847: Sorter API: Fully reuse docs enums.

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1457760 13f79535-47bb-0310-9956-ffa450edef68
2013-03-18 13:36:18 +00:00 · 2013-03-18 13:36:18 +00:00 · edbad37b0c
parent c04a4a5c96
commit edbad37b0c
2 changed files with 105 additions and 38 deletions
--- a/lucene/misc/src/java/org/apache/lucene/index/sorter/SortingAtomicReader.java
+++ b/lucene/misc/src/java/org/apache/lucene/index/sorter/SortingAtomicReader.java
@ -136,34 +136,48 @@ public class SortingAtomicReader extends FilterAtomicReader {

    @Override
    public DocsEnum docs(Bits liveDocs, DocsEnum reuse, final int flags) throws IOException {
-      // if we're asked to reuse the given DocsEnum and it is Sorting, return
-      // the wrapped one, since some Codecs expect it.
+      final DocsEnum inReuse;
+      final SortingDocsEnum wrapReuse;
      if (reuse != null && reuse instanceof SortingDocsEnum) {
-        reuse = ((SortingDocsEnum) reuse).getWrapped();
+        // if we're asked to reuse the given DocsEnum and it is Sorting, return
+        // the wrapped one, since some Codecs expect it.
+        wrapReuse = (SortingDocsEnum) reuse;
+        inReuse = wrapReuse.getWrapped();
+      } else {
+        wrapReuse = null;
+        inReuse = reuse;
      }
-      boolean withFreqs = indexOptions.compareTo(IndexOptions.DOCS_AND_FREQS) >=0 && (flags & DocsEnum.FLAG_FREQS) != 0;
-      return new SortingDocsEnum(in.docs(newToOld(liveDocs), reuse, flags), withFreqs, docMap);
+
+      final DocsEnum inDocs = in.docs(newToOld(liveDocs), inReuse, flags);
+      final boolean withFreqs = indexOptions.compareTo(IndexOptions.DOCS_AND_FREQS) >=0 && (flags & DocsEnum.FLAG_FREQS) != 0;
+      return new SortingDocsEnum(wrapReuse, inDocs, withFreqs, docMap);
    }

    @Override
    public DocsAndPositionsEnum docsAndPositions(Bits liveDocs, DocsAndPositionsEnum reuse, final int flags) throws IOException {
-      // if we're asked to reuse the given DocsAndPositionsEnum and it is
-      // Sorting, return the wrapped one, since some Codecs expect it.
+      final DocsAndPositionsEnum inReuse;
+      final SortingDocsAndPositionsEnum wrapReuse;
      if (reuse != null && reuse instanceof SortingDocsAndPositionsEnum) {
-        reuse = ((SortingDocsAndPositionsEnum) reuse).getWrapped();
+        // if we're asked to reuse the given DocsEnum and it is Sorting, return
+        // the wrapped one, since some Codecs expect it.
+        wrapReuse = (SortingDocsAndPositionsEnum) reuse;
+        inReuse = wrapReuse.getWrapped();
+      } else {
+        wrapReuse = null;
+        inReuse = reuse;
      }

-      final DocsAndPositionsEnum positions = in.docsAndPositions(newToOld(liveDocs), reuse, flags);
-      if (positions == null) {
+      final DocsAndPositionsEnum inDocsAndPositions = in.docsAndPositions(newToOld(liveDocs), inReuse, flags);
+      if (inDocsAndPositions == null) {
        return null;
-      } else {
-        // we ignore the fact that offsets may be stored but not asked for,
-        // since this code is expected to be used during addIndexes which will
-        // ask for everything. if that assumption changes in the future, we can
-        // factor in whether 'flags' says offsets are not required.
-        boolean storeOffsets = indexOptions.compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS) >= 0;
-        return new SortingDocsAndPositionsEnum(positions, docMap, storeOffsets);
      }
+
+      // we ignore the fact that offsets may be stored but not asked for,
+      // since this code is expected to be used during addIndexes which will
+      // ask for everything. if that assumption changes in the future, we can
+      // factor in whether 'flags' says offsets are not required.
+      final boolean storeOffsets = indexOptions.compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS) >= 0;
+      return new SortingDocsAndPositionsEnum(wrapReuse, inDocsAndPositions, docMap, storeOffsets);
    }

  }
@ -272,7 +286,7 @@ public class SortingAtomicReader extends FilterAtomicReader {
    }
  }

-  private static class SortingDocsEnum extends FilterDocsEnum {
+  static class SortingDocsEnum extends FilterDocsEnum {
    
    private static final class DocFreqSorterTemplate extends SorterTemplate {
      
@ -315,19 +329,28 @@ public class SortingAtomicReader extends FilterAtomicReader {
      }
    }
    
-    private int[] docs = new int[64];
+    private int[] docs;
    private int[] freqs;
    private int docIt = -1;
    private final int upto;
    private final boolean withFreqs;

-    public SortingDocsEnum(final DocsEnum in, boolean withFreqs, final Sorter.DocMap docMap) throws IOException {
+    SortingDocsEnum(SortingDocsEnum reuse, final DocsEnum in, boolean withFreqs, final Sorter.DocMap docMap) throws IOException {
      super(in);
      this.withFreqs = withFreqs;
+      if (reuse != null) {
+        docs = reuse.docs;
+        freqs = reuse.freqs; // maybe null
+      } else {
+        docs = new int[64];
+      }
+      docIt = -1;
      int i = 0;
      int doc;
      if (withFreqs) {
-        freqs = new int[docs.length];
+        if (freqs == null || freqs.length < docs.length) {
+          freqs = new int[docs.length];
+        }
        while ((doc = in.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS){
          if (i >= docs.length) {
            docs = ArrayUtil.grow(docs, docs.length + 1);
@ -352,6 +375,14 @@ public class SortingAtomicReader extends FilterAtomicReader {
      upto = i;
    }

+    // for testing
+    boolean reused(DocsEnum other) {
+      if (other == null || !(other instanceof SortingDocsEnum)) {
+        return false;
+      }
+      return docs == ((SortingDocsEnum) other).docs;
+    }
+
    @Override
    public int advance(final int target) throws IOException {
      // need to support it for checkIndex, but in practice it won't be called, so
@ -382,7 +413,7 @@ public class SortingAtomicReader extends FilterAtomicReader {
    }
  }
  
-  private static class SortingDocsAndPositionsEnum extends FilterDocsAndPositionsEnum {
+  static class SortingDocsAndPositionsEnum extends FilterDocsAndPositionsEnum {
    
    /**
     * A {@link SorterTemplate} which sorts two parallel arrays of doc IDs and
@ -439,16 +470,26 @@ public class SortingAtomicReader extends FilterAtomicReader {
    private int pos;
    private int startOffset = -1;
    private int endOffset = -1;
-    private final BytesRef payload = new BytesRef(32);
+    private final BytesRef payload;
    private int currFreq;

-    public SortingDocsAndPositionsEnum(final DocsAndPositionsEnum in, Sorter.DocMap docMap, boolean storeOffsets) throws IOException {
+    private final RAMFile file;
+
+    SortingDocsAndPositionsEnum(SortingDocsAndPositionsEnum reuse, final DocsAndPositionsEnum in, Sorter.DocMap docMap, boolean storeOffsets) throws IOException {
      super(in);
      this.storeOffsets = storeOffsets;
-      final RAMFile file = new RAMFile();
+      if (reuse != null) {
+        docs = reuse.docs;
+        offsets = reuse.offsets;
+        payload = reuse.payload;
+        file = reuse.file;
+      } else {
+        docs = new int[32];
+        offsets = new long[32];
+        payload = new BytesRef(32);
+        file = new RAMFile();
+      }
      final IndexOutput out = new RAMOutputStream(file);
-      docs = new int[32];
-      offsets = new long[32];
      int doc;
      int i = 0;
      while ((doc = in.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) {
@ -468,6 +509,14 @@ public class SortingAtomicReader extends FilterAtomicReader {
      this.postingInput = new RAMInputStream("", file);
    }

+    // for testing
+    boolean reused(DocsAndPositionsEnum other) {
+      if (other == null || !(other instanceof SortingDocsAndPositionsEnum)) {
+        return false;
+      }
+      return docs == ((SortingDocsAndPositionsEnum) other).docs;
+    }
+
    private void addPositions(final DocsAndPositionsEnum in, final IndexOutput out) throws IOException {
      int freq = in.freq();
      out.writeVInt(freq);
--- a/lucene/misc/src/test/org/apache/lucene/index/sorter/SorterTestBase.java
+++ b/lucene/misc/src/test/org/apache/lucene/index/sorter/SorterTestBase.java
@ -47,6 +47,7 @@ import org.apache.lucene.index.DocsAndPositionsEnum;
 import org.apache.lucene.index.DocsEnum;
 import org.apache.lucene.index.FieldInfo.IndexOptions;
 import org.apache.lucene.index.FieldInvertState;
+import org.apache.lucene.index.IndexReader;
 import org.apache.lucene.index.IndexWriterConfig;
 import org.apache.lucene.index.NumericDocValues;
 import org.apache.lucene.index.RandomIndexWriter;
@ -57,6 +58,8 @@ import org.apache.lucene.index.Term;
 import org.apache.lucene.index.Terms;
 import org.apache.lucene.index.TermsEnum;
 import org.apache.lucene.index.TermsEnum.SeekStatus;
+import org.apache.lucene.index.sorter.SortingAtomicReader.SortingDocsAndPositionsEnum;
+import org.apache.lucene.index.sorter.SortingAtomicReader.SortingDocsEnum;
 import org.apache.lucene.search.CollectionStatistics;
 import org.apache.lucene.search.DocIdSetIterator;
 import org.apache.lucene.search.TermStatistics;
@ -255,8 +258,9 @@ public abstract class SorterTestBase extends LuceneTestCase {
  
  @Test
  public void testDocsAndPositionsEnum() throws Exception {
-    Term term = new Term(DOC_POSITIONS_FIELD, DOC_POSITIONS_TERM);
-    DocsAndPositionsEnum sortedPositions = reader.termPositionsEnum(term);
+    TermsEnum termsEnum = reader.terms(DOC_POSITIONS_FIELD).iterator(null);
+    assertEquals(SeekStatus.FOUND, termsEnum.seekCeil(new BytesRef(DOC_POSITIONS_TERM)));
+    DocsAndPositionsEnum sortedPositions = termsEnum.docsAndPositions(null, null);
    int doc;
    
    // test nextDoc()
@ -274,7 +278,11 @@ public abstract class SorterTestBase extends LuceneTestCase {
    }
    
    // test advance()
-    sortedPositions = reader.termPositionsEnum(term);
+    final DocsAndPositionsEnum reuse = sortedPositions;
+    sortedPositions = termsEnum.docsAndPositions(null, reuse);
+    if (sortedPositions instanceof SortingDocsAndPositionsEnum) {
+      assertTrue(((SortingDocsAndPositionsEnum) sortedPositions).reused(reuse)); // make sure reuse worked
+    }
    doc = 0;
    while ((doc = sortedPositions.advance(doc)) != DocIdSetIterator.NO_MORE_DOCS) {
      int freq = sortedPositions.freq();
@ -328,8 +336,15 @@ public abstract class SorterTestBase extends LuceneTestCase {
        assertFalse("document " + prev + " not marked as deleted", mappedLiveDocs == null || mappedLiveDocs.get(prev));
      }
    }
+    while (++prev < reader.maxDoc()) {
+      assertFalse("document " + prev + " not marked as deleted", mappedLiveDocs == null || mappedLiveDocs.get(prev));
+    }

-    docs = termsEnum.docs(mappedLiveDocs, docs);
+    DocsEnum reuse = docs;
+    docs = termsEnum.docs(mappedLiveDocs, reuse);
+    if (docs instanceof SortingDocsEnum) {
+      assertTrue(((SortingDocsEnum) docs).reused(reuse)); // make sure reuse worked
+    }
    doc = -1;
    prev = -1;
    while ((doc = docs.advance(doc + 1)) != DocIdSetIterator.NO_MORE_DOCS) {
@ -339,6 +354,9 @@ public abstract class SorterTestBase extends LuceneTestCase {
        assertFalse("document " + prev + " not marked as deleted", mappedLiveDocs == null || mappedLiveDocs.get(prev));
      }
    }
+    while (++prev < reader.maxDoc()) {
+      assertFalse("document " + prev + " not marked as deleted", mappedLiveDocs == null || mappedLiveDocs.get(prev));
+    }
  }
  
  @Test