mirror of https://github.com/apache/lucene.git
LUCENE-3827: implement start/endOffset in DocsAndPositionsEnum returned from MemoryIndex, if offsets were indexed
git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1294151 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
7be5533989
commit
4576bc162a
|
@ -68,6 +68,10 @@ New Features
|
|||
cached IO pages due to large merges. (Varun Thacker, Mike
|
||||
McCandless)
|
||||
|
||||
* LUCENE-3827: DocsAndPositionsEnum from MemoryIndex implements
|
||||
start/endOffset, if offsets are indexed. (Alan Woodward via Mike
|
||||
McCandless)
|
||||
|
||||
API Changes
|
||||
|
||||
* LUCENE-2606: Changed RegexCapabilities interface to fix thread
|
||||
|
|
|
@ -43,7 +43,6 @@ import org.apache.lucene.index.FieldInfos;
|
|||
import org.apache.lucene.index.FieldInvertState;
|
||||
import org.apache.lucene.index.Fields;
|
||||
import org.apache.lucene.index.FieldsEnum;
|
||||
import org.apache.lucene.index.IndexReader;
|
||||
import org.apache.lucene.index.OrdTermState;
|
||||
import org.apache.lucene.index.StoredFieldVisitor;
|
||||
import org.apache.lucene.index.Term;
|
||||
|
@ -226,14 +225,14 @@ public class MemoryIndex {
|
|||
* Constructs an empty instance that can optionally store the start and end
|
||||
* character offset of each token term in the text. This can be useful for
|
||||
* highlighting of hit locations with the Lucene highlighter package.
|
||||
* Private until the highlighter package matures, so that this can actually
|
||||
* Protected until the highlighter package matures, so that this can actually
|
||||
* be meaningfully integrated.
|
||||
*
|
||||
* @param storeOffsets
|
||||
* whether or not to store the start and end character offset of
|
||||
* each token term in the text
|
||||
*/
|
||||
private MemoryIndex(boolean storeOffsets) {
|
||||
protected MemoryIndex(boolean storeOffsets) {
|
||||
this.stride = storeOffsets ? 3 : 1;
|
||||
fieldInfos = new FieldInfos();
|
||||
}
|
||||
|
@ -1046,22 +1045,22 @@ public class MemoryIndex {
|
|||
|
||||
@Override
|
||||
public int freq() {
|
||||
return positions.size();
|
||||
return positions.size() / stride;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int nextPosition() {
|
||||
return positions.get(posUpto++);
|
||||
return positions.get(posUpto++ * stride);
|
||||
}
|
||||
|
||||
@Override
|
||||
public int startOffset() {
|
||||
return -1;
|
||||
return stride == 1 ? -1 : positions.get((posUpto - 1) * stride + 1);
|
||||
}
|
||||
|
||||
@Override
|
||||
public int endOffset() {
|
||||
return -1;
|
||||
return stride == 1 ? -1 : positions.get((posUpto - 1) * stride + 2);
|
||||
}
|
||||
|
||||
@Override
|
||||
|
|
|
@ -204,13 +204,16 @@ public class MemoryIndexTest extends BaseTokenStreamTestCase {
|
|||
|
||||
public void testDocsAndPositionsEnumStart() throws Exception {
|
||||
Analyzer analyzer = new MockAnalyzer(random);
|
||||
MemoryIndex memory = new MemoryIndex();
|
||||
MemoryIndex memory = new MemoryIndex(true);
|
||||
memory.addField("foo", "bar", analyzer);
|
||||
AtomicReader reader = (AtomicReader) memory.createSearcher().getIndexReader();
|
||||
DocsAndPositionsEnum disi = reader.termPositionsEnum(null, "foo", new BytesRef("bar"), false);
|
||||
int docid = disi.docID();
|
||||
assertTrue(docid == -1 || docid == DocIdSetIterator.NO_MORE_DOCS);
|
||||
assertTrue(disi.nextDoc() != DocIdSetIterator.NO_MORE_DOCS);
|
||||
assertEquals(0, disi.nextPosition());
|
||||
assertEquals(0, disi.startOffset());
|
||||
assertEquals(3, disi.endOffset());
|
||||
|
||||
// now reuse and check again
|
||||
TermsEnum te = reader.terms("foo").iterator(null);
|
||||
|
|
Loading…
Reference in New Issue