mirror of https://github.com/apache/lucene.git
LUCENE-1038: Sets the document number of the term vector being mapped.
git-svn-id: https://svn.apache.org/repos/asf/lucene/java/trunk@591620 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
8508386bcb
commit
413c92f178
|
@ -142,6 +142,7 @@ New features
|
||||||
3. LUCENE-868: Added new Term Vector access features. New callback mechanism allows application to define how and where to read Term Vectors from disk.
|
3. LUCENE-868: Added new Term Vector access features. New callback mechanism allows application to define how and where to read Term Vectors from disk.
|
||||||
This implementation contains several extensions of the new abstract TermVectorMapper class. The new API should be back-compatible. No changes in the
|
This implementation contains several extensions of the new abstract TermVectorMapper class. The new API should be back-compatible. No changes in the
|
||||||
actual storage of Term Vectors has taken place.
|
actual storage of Term Vectors has taken place.
|
||||||
|
3.1 LUCENE-1038: Added setDocumentNumber() method to TermVectorMapper to provide information about what document is being accessed. (Karl Wetting via Grant Ingersoll)
|
||||||
|
|
||||||
4. LUCENE-975: Added PositionBasedTermVectorMapper that allows for position based lookup of term vector information. See item #3 above (LUCENE-868).
|
4. LUCENE-975: Added PositionBasedTermVectorMapper that allows for position based lookup of term vector information. See item #3 above (LUCENE-868).
|
||||||
|
|
||||||
|
|
|
@ -85,4 +85,17 @@ public abstract class TermVectorMapper {
|
||||||
return ignoringOffsets;
|
return ignoringOffsets;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Passes down the index of the document whose term vector is currently being mapped,
|
||||||
|
* once for each top level call to a term vector reader.
|
||||||
|
*<p/>
|
||||||
|
* Default implementation IGNORES the document number. Override if your implementation needs the document number.
|
||||||
|
* <p/>
|
||||||
|
* NOTE: Document numbers are internal to Lucene and subject to change depending on indexing operations.
|
||||||
|
*
|
||||||
|
* @param documentNumber index of document currently being mapped
|
||||||
|
*/
|
||||||
|
public void setDocumentNumber(int documentNumber) {
|
||||||
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -163,6 +163,7 @@ class TermVectorsReader implements Cloneable {
|
||||||
for (int i = 0; i <= found; i++)
|
for (int i = 0; i <= found; i++)
|
||||||
position += tvd.readVLong();
|
position += tvd.readVLong();
|
||||||
|
|
||||||
|
mapper.setDocumentNumber(docNum);
|
||||||
readTermVector(field, position, mapper);
|
readTermVector(field, position, mapper);
|
||||||
} else {
|
} else {
|
||||||
//System.out.println("Fieldable not found");
|
//System.out.println("Fieldable not found");
|
||||||
|
@ -228,7 +229,7 @@ class TermVectorsReader implements Cloneable {
|
||||||
tvfPointers[i] = position;
|
tvfPointers[i] = position;
|
||||||
}
|
}
|
||||||
|
|
||||||
result = readTermVectors(fields, tvfPointers);
|
result = readTermVectors(docNum, fields, tvfPointers);
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
//System.out.println("No tvx file");
|
//System.out.println("No tvx file");
|
||||||
|
@ -268,6 +269,7 @@ class TermVectorsReader implements Cloneable {
|
||||||
tvfPointers[i] = position;
|
tvfPointers[i] = position;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
mapper.setDocumentNumber(docNumber);
|
||||||
readTermVectors(fields, tvfPointers, mapper);
|
readTermVectors(fields, tvfPointers, mapper);
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
|
@ -276,12 +278,13 @@ class TermVectorsReader implements Cloneable {
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
private SegmentTermVector[] readTermVectors(String fields[], long tvfPointers[])
|
private SegmentTermVector[] readTermVectors(int docNum, String fields[], long tvfPointers[])
|
||||||
throws IOException {
|
throws IOException {
|
||||||
SegmentTermVector res[] = new SegmentTermVector[fields.length];
|
SegmentTermVector res[] = new SegmentTermVector[fields.length];
|
||||||
for (int i = 0; i < fields.length; i++) {
|
for (int i = 0; i < fields.length; i++) {
|
||||||
ParallelArrayTermVectorMapper mapper = new ParallelArrayTermVectorMapper();
|
ParallelArrayTermVectorMapper mapper = new ParallelArrayTermVectorMapper();
|
||||||
readTermVector(fields[i], tvfPointers[i], mapper);
|
mapper.setDocumentNumber(docNum);
|
||||||
|
readTermVector(fields[i], tvfPointers[i], mapper);
|
||||||
res[i] = (SegmentTermVector) mapper.materializeVector();
|
res[i] = (SegmentTermVector) mapper.materializeVector();
|
||||||
}
|
}
|
||||||
return res;
|
return res;
|
||||||
|
|
|
@ -17,13 +17,13 @@ package org.apache.lucene.index;
|
||||||
* limitations under the License.
|
* limitations under the License.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
import org.apache.lucene.util.LuceneTestCase;
|
|
||||||
import org.apache.lucene.store.MockRAMDirectory;
|
|
||||||
import org.apache.lucene.analysis.Analyzer;
|
import org.apache.lucene.analysis.Analyzer;
|
||||||
import org.apache.lucene.analysis.TokenStream;
|
|
||||||
import org.apache.lucene.analysis.Token;
|
import org.apache.lucene.analysis.Token;
|
||||||
import org.apache.lucene.document.Field;
|
import org.apache.lucene.analysis.TokenStream;
|
||||||
import org.apache.lucene.document.Document;
|
import org.apache.lucene.document.Document;
|
||||||
|
import org.apache.lucene.document.Field;
|
||||||
|
import org.apache.lucene.store.MockRAMDirectory;
|
||||||
|
import org.apache.lucene.util.LuceneTestCase;
|
||||||
|
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.io.Reader;
|
import java.io.Reader;
|
||||||
|
@ -329,6 +329,32 @@ public class TestTermVectorsReader extends LuceneTestCase {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// test setDocumentNumber()
|
||||||
|
IndexReader ir = IndexReader.open(dir);
|
||||||
|
DocNumAwareMapper docNumAwareMapper = new DocNumAwareMapper();
|
||||||
|
assertEquals(-1, docNumAwareMapper.getDocumentNumber());
|
||||||
|
|
||||||
|
ir.getTermFreqVector(0, docNumAwareMapper);
|
||||||
|
assertEquals(0, docNumAwareMapper.getDocumentNumber());
|
||||||
|
docNumAwareMapper.setDocumentNumber(-1);
|
||||||
|
|
||||||
|
ir.getTermFreqVector(1, docNumAwareMapper);
|
||||||
|
assertEquals(1, docNumAwareMapper.getDocumentNumber());
|
||||||
|
docNumAwareMapper.setDocumentNumber(-1);
|
||||||
|
|
||||||
|
ir.getTermFreqVector(0, "f1", docNumAwareMapper);
|
||||||
|
assertEquals(0, docNumAwareMapper.getDocumentNumber());
|
||||||
|
docNumAwareMapper.setDocumentNumber(-1);
|
||||||
|
|
||||||
|
ir.getTermFreqVector(1, "f2", docNumAwareMapper);
|
||||||
|
assertEquals(1, docNumAwareMapper.getDocumentNumber());
|
||||||
|
docNumAwareMapper.setDocumentNumber(-1);
|
||||||
|
|
||||||
|
ir.getTermFreqVector(0, "f1", docNumAwareMapper);
|
||||||
|
assertEquals(0, docNumAwareMapper.getDocumentNumber());
|
||||||
|
|
||||||
|
ir.close();
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@ -364,4 +390,33 @@ public class TestTermVectorsReader extends LuceneTestCase {
|
||||||
fail();
|
fail();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
public static class DocNumAwareMapper extends TermVectorMapper {
|
||||||
|
|
||||||
|
public DocNumAwareMapper() {
|
||||||
|
}
|
||||||
|
|
||||||
|
private int documentNumber = -1;
|
||||||
|
|
||||||
|
public void setExpectations(String field, int numTerms, boolean storeOffsets, boolean storePositions) {
|
||||||
|
if (documentNumber == -1) {
|
||||||
|
throw new RuntimeException("Documentnumber should be set at this point!");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public void map(String term, int frequency, TermVectorOffsetInfo[] offsets, int[] positions) {
|
||||||
|
if (documentNumber == -1) {
|
||||||
|
throw new RuntimeException("Documentnumber should be set at this point!");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public int getDocumentNumber() {
|
||||||
|
return documentNumber;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setDocumentNumber(int documentNumber) {
|
||||||
|
this.documentNumber = documentNumber;
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue