LUCENE-3827: implement start/endOffset in DocsAndPositionsEnum returned from MemoryIndex, if offsets were indexed

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1294151 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Michael McCandless 2012-02-27 13:35:33 +00:00
parent 7be5533989
commit 4576bc162a
3 changed files with 14 additions and 8 deletions

View File

@ -68,6 +68,10 @@ New Features
cached IO pages due to large merges. (Varun Thacker, Mike
McCandless)
* LUCENE-3827: DocsAndPositionsEnum from MemoryIndex implements
start/endOffset, if offsets are indexed. (Alan Woodward via Mike
McCandless)
API Changes
* LUCENE-2606: Changed RegexCapabilities interface to fix thread

View File

@ -43,7 +43,6 @@ import org.apache.lucene.index.FieldInfos;
import org.apache.lucene.index.FieldInvertState;
import org.apache.lucene.index.Fields;
import org.apache.lucene.index.FieldsEnum;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.OrdTermState;
import org.apache.lucene.index.StoredFieldVisitor;
import org.apache.lucene.index.Term;
@ -226,14 +225,14 @@ public class MemoryIndex {
* Constructs an empty instance that can optionally store the start and end
* character offset of each token term in the text. This can be useful for
* highlighting of hit locations with the Lucene highlighter package.
* Private until the highlighter package matures, so that this can actually
* Protected until the highlighter package matures, so that this can actually
* be meaningfully integrated.
*
* @param storeOffsets
* whether or not to store the start and end character offset of
* each token term in the text
*/
private MemoryIndex(boolean storeOffsets) {
protected MemoryIndex(boolean storeOffsets) {
this.stride = storeOffsets ? 3 : 1;
fieldInfos = new FieldInfos();
}
@ -1046,22 +1045,22 @@ public class MemoryIndex {
@Override
public int freq() {
return positions.size();
return positions.size() / stride;
}
@Override
public int nextPosition() {
return positions.get(posUpto++);
return positions.get(posUpto++ * stride);
}
@Override
public int startOffset() {
return -1;
return stride == 1 ? -1 : positions.get((posUpto - 1) * stride + 1);
}
@Override
public int endOffset() {
return -1;
return stride == 1 ? -1 : positions.get((posUpto - 1) * stride + 2);
}
@Override

View File

@ -204,13 +204,16 @@ public class MemoryIndexTest extends BaseTokenStreamTestCase {
public void testDocsAndPositionsEnumStart() throws Exception {
Analyzer analyzer = new MockAnalyzer(random);
MemoryIndex memory = new MemoryIndex();
MemoryIndex memory = new MemoryIndex(true);
memory.addField("foo", "bar", analyzer);
AtomicReader reader = (AtomicReader) memory.createSearcher().getIndexReader();
DocsAndPositionsEnum disi = reader.termPositionsEnum(null, "foo", new BytesRef("bar"), false);
int docid = disi.docID();
assertTrue(docid == -1 || docid == DocIdSetIterator.NO_MORE_DOCS);
assertTrue(disi.nextDoc() != DocIdSetIterator.NO_MORE_DOCS);
assertEquals(0, disi.nextPosition());
assertEquals(0, disi.startOffset());
assertEquals(3, disi.endOffset());
// now reuse and check again
TermsEnum te = reader.terms("foo").iterator(null);