LUCENE-1038: Sets the document number of the term vector being mapped.

git-svn-id: https://svn.apache.org/repos/asf/lucene/java/trunk@591620 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Grant Ingersoll 2007-11-03 13:41:46 +00:00
parent 8508386bcb
commit 413c92f178
4 changed files with 79 additions and 7 deletions

View File

@ -142,6 +142,7 @@ New features
3. LUCENE-868: Added new Term Vector access features. New callback mechanism allows application to define how and where to read Term Vectors from disk. 3. LUCENE-868: Added new Term Vector access features. New callback mechanism allows application to define how and where to read Term Vectors from disk.
This implementation contains several extensions of the new abstract TermVectorMapper class. The new API should be back-compatible. No changes in the This implementation contains several extensions of the new abstract TermVectorMapper class. The new API should be back-compatible. No changes in the
actual storage of Term Vectors has taken place. actual storage of Term Vectors has taken place.
3.1 LUCENE-1038: Added setDocumentNumber() method to TermVectorMapper to provide information about what document is being accessed. (Karl Wetting via Grant Ingersoll)
4. LUCENE-975: Added PositionBasedTermVectorMapper that allows for position based lookup of term vector information. See item #3 above (LUCENE-868). 4. LUCENE-975: Added PositionBasedTermVectorMapper that allows for position based lookup of term vector information. See item #3 above (LUCENE-868).

View File

@ -85,4 +85,17 @@ public abstract class TermVectorMapper {
return ignoringOffsets; return ignoringOffsets;
} }
/**
* Passes down the index of the document whose term vector is currently being mapped,
* once for each top level call to a term vector reader.
*<p/>
* Default implementation IGNORES the document number. Override if your implementation needs the document number.
* <p/>
* NOTE: Document numbers are internal to Lucene and subject to change depending on indexing operations.
*
* @param documentNumber index of document currently being mapped
*/
public void setDocumentNumber(int documentNumber) {
}
} }

View File

@ -163,6 +163,7 @@ class TermVectorsReader implements Cloneable {
for (int i = 0; i <= found; i++) for (int i = 0; i <= found; i++)
position += tvd.readVLong(); position += tvd.readVLong();
mapper.setDocumentNumber(docNum);
readTermVector(field, position, mapper); readTermVector(field, position, mapper);
} else { } else {
//System.out.println("Fieldable not found"); //System.out.println("Fieldable not found");
@ -228,7 +229,7 @@ class TermVectorsReader implements Cloneable {
tvfPointers[i] = position; tvfPointers[i] = position;
} }
result = readTermVectors(fields, tvfPointers); result = readTermVectors(docNum, fields, tvfPointers);
} }
} else { } else {
//System.out.println("No tvx file"); //System.out.println("No tvx file");
@ -268,6 +269,7 @@ class TermVectorsReader implements Cloneable {
tvfPointers[i] = position; tvfPointers[i] = position;
} }
mapper.setDocumentNumber(docNumber);
readTermVectors(fields, tvfPointers, mapper); readTermVectors(fields, tvfPointers, mapper);
} }
} else { } else {
@ -276,12 +278,13 @@ class TermVectorsReader implements Cloneable {
} }
private SegmentTermVector[] readTermVectors(String fields[], long tvfPointers[]) private SegmentTermVector[] readTermVectors(int docNum, String fields[], long tvfPointers[])
throws IOException { throws IOException {
SegmentTermVector res[] = new SegmentTermVector[fields.length]; SegmentTermVector res[] = new SegmentTermVector[fields.length];
for (int i = 0; i < fields.length; i++) { for (int i = 0; i < fields.length; i++) {
ParallelArrayTermVectorMapper mapper = new ParallelArrayTermVectorMapper(); ParallelArrayTermVectorMapper mapper = new ParallelArrayTermVectorMapper();
readTermVector(fields[i], tvfPointers[i], mapper); mapper.setDocumentNumber(docNum);
readTermVector(fields[i], tvfPointers[i], mapper);
res[i] = (SegmentTermVector) mapper.materializeVector(); res[i] = (SegmentTermVector) mapper.materializeVector();
} }
return res; return res;

View File

@ -17,13 +17,13 @@ package org.apache.lucene.index;
* limitations under the License. * limitations under the License.
*/ */
import org.apache.lucene.util.LuceneTestCase;
import org.apache.lucene.store.MockRAMDirectory;
import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.Token; import org.apache.lucene.analysis.Token;
import org.apache.lucene.document.Field; import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.document.Document; import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.store.MockRAMDirectory;
import org.apache.lucene.util.LuceneTestCase;
import java.io.IOException; import java.io.IOException;
import java.io.Reader; import java.io.Reader;
@ -329,6 +329,32 @@ public class TestTermVectorsReader extends LuceneTestCase {
} }
} }
// test setDocumentNumber()
IndexReader ir = IndexReader.open(dir);
DocNumAwareMapper docNumAwareMapper = new DocNumAwareMapper();
assertEquals(-1, docNumAwareMapper.getDocumentNumber());
ir.getTermFreqVector(0, docNumAwareMapper);
assertEquals(0, docNumAwareMapper.getDocumentNumber());
docNumAwareMapper.setDocumentNumber(-1);
ir.getTermFreqVector(1, docNumAwareMapper);
assertEquals(1, docNumAwareMapper.getDocumentNumber());
docNumAwareMapper.setDocumentNumber(-1);
ir.getTermFreqVector(0, "f1", docNumAwareMapper);
assertEquals(0, docNumAwareMapper.getDocumentNumber());
docNumAwareMapper.setDocumentNumber(-1);
ir.getTermFreqVector(1, "f2", docNumAwareMapper);
assertEquals(1, docNumAwareMapper.getDocumentNumber());
docNumAwareMapper.setDocumentNumber(-1);
ir.getTermFreqVector(0, "f1", docNumAwareMapper);
assertEquals(0, docNumAwareMapper.getDocumentNumber());
ir.close();
} }
@ -364,4 +390,33 @@ public class TestTermVectorsReader extends LuceneTestCase {
fail(); fail();
} }
} }
public static class DocNumAwareMapper extends TermVectorMapper {
public DocNumAwareMapper() {
}
private int documentNumber = -1;
public void setExpectations(String field, int numTerms, boolean storeOffsets, boolean storePositions) {
if (documentNumber == -1) {
throw new RuntimeException("Documentnumber should be set at this point!");
}
}
public void map(String term, int frequency, TermVectorOffsetInfo[] offsets, int[] positions) {
if (documentNumber == -1) {
throw new RuntimeException("Documentnumber should be set at this point!");
}
}
public int getDocumentNumber() {
return documentNumber;
}
public void setDocumentNumber(int documentNumber) {
this.documentNumber = documentNumber;
}
}
} }