LUCENE-3827: implement start/endOffset in DocsAndPositionsEnum returned from MemoryIndex, if offsets were indexed

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1294151 13f79535-47bb-0310-9956-ffa450edef68
2025-02-08 19:15:06 +00:00 · 2012-02-27 13:35:33 +00:00 · 2012-02-27 13:35:33 +00:00 · 4576bc162a
commit 4576bc162a
parent 7be5533989
3 changed files with 14 additions and 8 deletions
--- a/lucene/contrib/CHANGES.txt
+++ b/lucene/contrib/CHANGES.txt
@ -68,6 +68,10 @@ New Features
   cached IO pages due to large merges.  (Varun Thacker, Mike
   McCandless)

+ * LUCENE-3827: DocsAndPositionsEnum from MemoryIndex implements
+   start/endOffset, if offsets are indexed. (Alan Woodward via Mike
+   McCandless)
+
 API Changes

 * LUCENE-2606: Changed RegexCapabilities interface to fix thread 
--- a/lucene/contrib/memory/src/java/org/apache/lucene/index/memory/MemoryIndex.java
+++ b/lucene/contrib/memory/src/java/org/apache/lucene/index/memory/MemoryIndex.java
@ -43,7 +43,6 @@ import org.apache.lucene.index.FieldInfos;
 import org.apache.lucene.index.FieldInvertState;
 import org.apache.lucene.index.Fields;
 import org.apache.lucene.index.FieldsEnum;
-import org.apache.lucene.index.IndexReader;
 import org.apache.lucene.index.OrdTermState;
 import org.apache.lucene.index.StoredFieldVisitor;
 import org.apache.lucene.index.Term;
@ -226,14 +225,14 @@ public class MemoryIndex {
   * Constructs an empty instance that can optionally store the start and end
   * character offset of each token term in the text. This can be useful for
   * highlighting of hit locations with the Lucene highlighter package.
-   * Private until the highlighter package matures, so that this can actually
+   * Protected until the highlighter package matures, so that this can actually
   * be meaningfully integrated.
   * 
   * @param storeOffsets
   *            whether or not to store the start and end character offset of
   *            each token term in the text
   */
-  private MemoryIndex(boolean storeOffsets) {
+  protected MemoryIndex(boolean storeOffsets) {
    this.stride = storeOffsets ? 3 : 1;
    fieldInfos = new FieldInfos();
  }
@ -1046,22 +1045,22 @@ public class MemoryIndex {

      @Override
      public int freq() {
-        return positions.size();
+        return positions.size() / stride;
      }

      @Override
      public int nextPosition() {
-        return positions.get(posUpto++);
+        return positions.get(posUpto++ * stride);
      }

      @Override
      public int startOffset() {
-        return -1;
+        return stride == 1 ? -1 : positions.get((posUpto - 1) * stride + 1);
      }

      @Override
      public int endOffset() {
-        return -1;
+        return stride == 1 ? -1 : positions.get((posUpto - 1) * stride + 2);
      }

      @Override
--- a/lucene/contrib/memory/src/test/org/apache/lucene/index/memory/MemoryIndexTest.java
+++ b/lucene/contrib/memory/src/test/org/apache/lucene/index/memory/MemoryIndexTest.java
@ -204,13 +204,16 @@ public class MemoryIndexTest extends BaseTokenStreamTestCase {
  
  public void testDocsAndPositionsEnumStart() throws Exception {
    Analyzer analyzer = new MockAnalyzer(random);
-    MemoryIndex memory = new MemoryIndex();
+    MemoryIndex memory = new MemoryIndex(true);
    memory.addField("foo", "bar", analyzer);
    AtomicReader reader = (AtomicReader) memory.createSearcher().getIndexReader();
    DocsAndPositionsEnum disi = reader.termPositionsEnum(null, "foo", new BytesRef("bar"), false);
    int docid = disi.docID();
    assertTrue(docid == -1 || docid == DocIdSetIterator.NO_MORE_DOCS);
    assertTrue(disi.nextDoc() != DocIdSetIterator.NO_MORE_DOCS);
+    assertEquals(0, disi.nextPosition());
+    assertEquals(0, disi.startOffset());
+    assertEquals(3, disi.endOffset());
    
    // now reuse and check again
    TermsEnum te = reader.terms("foo").iterator(null);