mirror of https://github.com/apache/lucene.git
LUCENE-1038: Sets the document number of the term vector being mapped.
git-svn-id: https://svn.apache.org/repos/asf/lucene/java/trunk@591620 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
8508386bcb
commit
413c92f178
|
@ -142,6 +142,7 @@ New features
|
|||
3. LUCENE-868: Added new Term Vector access features. New callback mechanism allows application to define how and where to read Term Vectors from disk.
|
||||
This implementation contains several extensions of the new abstract TermVectorMapper class. The new API should be back-compatible. No changes in the
|
||||
actual storage of Term Vectors has taken place.
|
||||
3.1 LUCENE-1038: Added setDocumentNumber() method to TermVectorMapper to provide information about what document is being accessed. (Karl Wetting via Grant Ingersoll)
|
||||
|
||||
4. LUCENE-975: Added PositionBasedTermVectorMapper that allows for position based lookup of term vector information. See item #3 above (LUCENE-868).
|
||||
|
||||
|
|
|
@ -85,4 +85,17 @@ public abstract class TermVectorMapper {
|
|||
return ignoringOffsets;
|
||||
}
|
||||
|
||||
/**
|
||||
* Passes down the index of the document whose term vector is currently being mapped,
|
||||
* once for each top level call to a term vector reader.
|
||||
*<p/>
|
||||
* Default implementation IGNORES the document number. Override if your implementation needs the document number.
|
||||
* <p/>
|
||||
* NOTE: Document numbers are internal to Lucene and subject to change depending on indexing operations.
|
||||
*
|
||||
* @param documentNumber index of document currently being mapped
|
||||
*/
|
||||
public void setDocumentNumber(int documentNumber) {
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
@ -163,6 +163,7 @@ class TermVectorsReader implements Cloneable {
|
|||
for (int i = 0; i <= found; i++)
|
||||
position += tvd.readVLong();
|
||||
|
||||
mapper.setDocumentNumber(docNum);
|
||||
readTermVector(field, position, mapper);
|
||||
} else {
|
||||
//System.out.println("Fieldable not found");
|
||||
|
@ -228,7 +229,7 @@ class TermVectorsReader implements Cloneable {
|
|||
tvfPointers[i] = position;
|
||||
}
|
||||
|
||||
result = readTermVectors(fields, tvfPointers);
|
||||
result = readTermVectors(docNum, fields, tvfPointers);
|
||||
}
|
||||
} else {
|
||||
//System.out.println("No tvx file");
|
||||
|
@ -268,6 +269,7 @@ class TermVectorsReader implements Cloneable {
|
|||
tvfPointers[i] = position;
|
||||
}
|
||||
|
||||
mapper.setDocumentNumber(docNumber);
|
||||
readTermVectors(fields, tvfPointers, mapper);
|
||||
}
|
||||
} else {
|
||||
|
@ -276,11 +278,12 @@ class TermVectorsReader implements Cloneable {
|
|||
}
|
||||
|
||||
|
||||
private SegmentTermVector[] readTermVectors(String fields[], long tvfPointers[])
|
||||
private SegmentTermVector[] readTermVectors(int docNum, String fields[], long tvfPointers[])
|
||||
throws IOException {
|
||||
SegmentTermVector res[] = new SegmentTermVector[fields.length];
|
||||
for (int i = 0; i < fields.length; i++) {
|
||||
ParallelArrayTermVectorMapper mapper = new ParallelArrayTermVectorMapper();
|
||||
mapper.setDocumentNumber(docNum);
|
||||
readTermVector(fields[i], tvfPointers[i], mapper);
|
||||
res[i] = (SegmentTermVector) mapper.materializeVector();
|
||||
}
|
||||
|
|
|
@ -17,13 +17,13 @@ package org.apache.lucene.index;
|
|||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import org.apache.lucene.util.LuceneTestCase;
|
||||
import org.apache.lucene.store.MockRAMDirectory;
|
||||
import org.apache.lucene.analysis.Analyzer;
|
||||
import org.apache.lucene.analysis.TokenStream;
|
||||
import org.apache.lucene.analysis.Token;
|
||||
import org.apache.lucene.document.Field;
|
||||
import org.apache.lucene.analysis.TokenStream;
|
||||
import org.apache.lucene.document.Document;
|
||||
import org.apache.lucene.document.Field;
|
||||
import org.apache.lucene.store.MockRAMDirectory;
|
||||
import org.apache.lucene.util.LuceneTestCase;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.io.Reader;
|
||||
|
@ -329,6 +329,32 @@ public class TestTermVectorsReader extends LuceneTestCase {
|
|||
}
|
||||
}
|
||||
|
||||
// test setDocumentNumber()
|
||||
IndexReader ir = IndexReader.open(dir);
|
||||
DocNumAwareMapper docNumAwareMapper = new DocNumAwareMapper();
|
||||
assertEquals(-1, docNumAwareMapper.getDocumentNumber());
|
||||
|
||||
ir.getTermFreqVector(0, docNumAwareMapper);
|
||||
assertEquals(0, docNumAwareMapper.getDocumentNumber());
|
||||
docNumAwareMapper.setDocumentNumber(-1);
|
||||
|
||||
ir.getTermFreqVector(1, docNumAwareMapper);
|
||||
assertEquals(1, docNumAwareMapper.getDocumentNumber());
|
||||
docNumAwareMapper.setDocumentNumber(-1);
|
||||
|
||||
ir.getTermFreqVector(0, "f1", docNumAwareMapper);
|
||||
assertEquals(0, docNumAwareMapper.getDocumentNumber());
|
||||
docNumAwareMapper.setDocumentNumber(-1);
|
||||
|
||||
ir.getTermFreqVector(1, "f2", docNumAwareMapper);
|
||||
assertEquals(1, docNumAwareMapper.getDocumentNumber());
|
||||
docNumAwareMapper.setDocumentNumber(-1);
|
||||
|
||||
ir.getTermFreqVector(0, "f1", docNumAwareMapper);
|
||||
assertEquals(0, docNumAwareMapper.getDocumentNumber());
|
||||
|
||||
ir.close();
|
||||
|
||||
}
|
||||
|
||||
|
||||
|
@ -364,4 +390,33 @@ public class TestTermVectorsReader extends LuceneTestCase {
|
|||
fail();
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
public static class DocNumAwareMapper extends TermVectorMapper {
|
||||
|
||||
public DocNumAwareMapper() {
|
||||
}
|
||||
|
||||
private int documentNumber = -1;
|
||||
|
||||
public void setExpectations(String field, int numTerms, boolean storeOffsets, boolean storePositions) {
|
||||
if (documentNumber == -1) {
|
||||
throw new RuntimeException("Documentnumber should be set at this point!");
|
||||
}
|
||||
}
|
||||
|
||||
public void map(String term, int frequency, TermVectorOffsetInfo[] offsets, int[] positions) {
|
||||
if (documentNumber == -1) {
|
||||
throw new RuntimeException("Documentnumber should be set at this point!");
|
||||
}
|
||||
}
|
||||
|
||||
public int getDocumentNumber() {
|
||||
return documentNumber;
|
||||
}
|
||||
|
||||
public void setDocumentNumber(int documentNumber) {
|
||||
this.documentNumber = documentNumber;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue