mirror of https://github.com/apache/lucene.git
Grant's nw termvector patch (Bug #18927) applied with
some modifications. git-svn-id: https://svn.apache.org/repos/asf/lucene/java/trunk@150566 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
c6d6a390df
commit
f1667be0fc
|
@ -38,6 +38,8 @@ public final class Field implements java.io.Serializable {
|
||||||
private Object fieldsData = null;
|
private Object fieldsData = null;
|
||||||
|
|
||||||
private boolean storeTermVector = false;
|
private boolean storeTermVector = false;
|
||||||
|
private boolean storeOffsetWithTermVector = false;
|
||||||
|
private boolean storePositionWithTermVector = false;
|
||||||
private boolean isStored = false;
|
private boolean isStored = false;
|
||||||
private boolean isIndexed = true;
|
private boolean isIndexed = true;
|
||||||
private boolean isTokenized = true;
|
private boolean isTokenized = true;
|
||||||
|
@ -55,16 +57,19 @@ public final class Field implements java.io.Serializable {
|
||||||
public String toString() {
|
public String toString() {
|
||||||
return name;
|
return name;
|
||||||
}
|
}
|
||||||
|
|
||||||
/** Store the original field value in the index in a compressed form. This is
|
/** Store the original field value in the index in a compressed form. This is
|
||||||
* useful for long documents and for binary valued fields.
|
* useful for long documents and for binary valued fields.
|
||||||
*/
|
*/
|
||||||
public static final Store COMPRESS = new Store("COMPRESS");
|
public static final Store COMPRESS = new Store("COMPRESS");
|
||||||
|
|
||||||
/** Store the original field value in the index. This is useful for short texts
|
/** Store the original field value in the index. This is useful for short texts
|
||||||
* like a document's title which should be displayed with the results. The
|
* like a document's title which should be displayed with the results. The
|
||||||
* value is stored in its original form, i.e. no analyzer is used before it is
|
* value is stored in its original form, i.e. no analyzer is used before it is
|
||||||
* stored.
|
* stored.
|
||||||
*/
|
*/
|
||||||
public static final Store YES = new Store("YES");
|
public static final Store YES = new Store("YES");
|
||||||
|
|
||||||
/** Do not store the field value in the index. */
|
/** Do not store the field value in the index. */
|
||||||
public static final Store NO = new Store("NO");
|
public static final Store NO = new Store("NO");
|
||||||
}
|
}
|
||||||
|
@ -100,15 +105,41 @@ public final class Field implements java.io.Serializable {
|
||||||
private TermVector(String name) {
|
private TermVector(String name) {
|
||||||
this.name = name;
|
this.name = name;
|
||||||
}
|
}
|
||||||
|
|
||||||
public String toString() {
|
public String toString() {
|
||||||
return name;
|
return name;
|
||||||
}
|
}
|
||||||
|
|
||||||
/** Do not store term vectors.
|
/** Do not store term vectors.
|
||||||
*/
|
*/
|
||||||
public static final TermVector NO = new TermVector("NO");
|
public static final TermVector NO = new TermVector("NO");
|
||||||
|
|
||||||
/** Store the term vectors of each document. A term vector is a list
|
/** Store the term vectors of each document. A term vector is a list
|
||||||
* of the document's terms and their number of occurences in that document. */
|
* of the document's terms and their number of occurences in that document. */
|
||||||
public static final TermVector YES = new TermVector("YES");
|
public static final TermVector YES = new TermVector("YES");
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Store the term vector + token position information
|
||||||
|
*
|
||||||
|
* @see #YES
|
||||||
|
*/
|
||||||
|
public static final TermVector WITH_POSITIONS = new TermVector("WITH_POSITIONS");
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Store the term vector + Token offset information
|
||||||
|
*
|
||||||
|
* @see #YES
|
||||||
|
*/
|
||||||
|
public static final TermVector WITH_OFFSETS = new TermVector("WITH_OFFSETS");
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Store the term vector + Token position and offset information
|
||||||
|
*
|
||||||
|
* @see #YES
|
||||||
|
* @see #WITH_POSITIONS
|
||||||
|
* @see #WITH_OFFSETS
|
||||||
|
*/
|
||||||
|
public static final TermVector WITH_POSITIONS_OFFSETS = new TermVector("WITH_POSITIONS_OFFSETS");
|
||||||
}
|
}
|
||||||
|
|
||||||
/** Sets the boost factor hits on this field. This value will be
|
/** Sets the boost factor hits on this field. This value will be
|
||||||
|
@ -290,14 +321,18 @@ public final class Field implements java.io.Serializable {
|
||||||
this.name = name.intern(); // field names are interned
|
this.name = name.intern(); // field names are interned
|
||||||
this.fieldsData = value;
|
this.fieldsData = value;
|
||||||
|
|
||||||
if (store == Store.YES)
|
if (store == Store.YES){
|
||||||
this.isStored = true;
|
this.isStored = true;
|
||||||
|
this.isCompressed = false;
|
||||||
|
}
|
||||||
else if (store == Store.COMPRESS) {
|
else if (store == Store.COMPRESS) {
|
||||||
this.isStored = true;
|
this.isStored = true;
|
||||||
this.isCompressed = true;
|
this.isCompressed = true;
|
||||||
}
|
}
|
||||||
else if (store == Store.NO)
|
else if (store == Store.NO){
|
||||||
this.isStored = false;
|
this.isStored = false;
|
||||||
|
this.isCompressed = false;
|
||||||
|
}
|
||||||
else
|
else
|
||||||
throw new IllegalArgumentException("unknown store parameter " + store);
|
throw new IllegalArgumentException("unknown store parameter " + store);
|
||||||
|
|
||||||
|
@ -314,6 +349,8 @@ public final class Field implements java.io.Serializable {
|
||||||
throw new IllegalArgumentException("unknown index parameter " + index);
|
throw new IllegalArgumentException("unknown index parameter " + index);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
this.isBinary = false;
|
||||||
|
|
||||||
setStoreTermVector(termVector);
|
setStoreTermVector(termVector);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -343,11 +380,18 @@ public final class Field implements java.io.Serializable {
|
||||||
throw new NullPointerException("name cannot be null");
|
throw new NullPointerException("name cannot be null");
|
||||||
if (reader == null)
|
if (reader == null)
|
||||||
throw new NullPointerException("reader cannot be null");
|
throw new NullPointerException("reader cannot be null");
|
||||||
|
|
||||||
this.name = name.intern(); // field names are interned
|
this.name = name.intern(); // field names are interned
|
||||||
this.fieldsData = reader;
|
this.fieldsData = reader;
|
||||||
|
|
||||||
this.isStored = false;
|
this.isStored = false;
|
||||||
|
this.isCompressed = false;
|
||||||
|
|
||||||
this.isIndexed = true;
|
this.isIndexed = true;
|
||||||
this.isTokenized = true;
|
this.isTokenized = true;
|
||||||
|
|
||||||
|
this.isBinary = false;
|
||||||
|
|
||||||
setStoreTermVector(termVector);
|
setStoreTermVector(termVector);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -374,21 +418,29 @@ public final class Field implements java.io.Serializable {
|
||||||
throw new IllegalArgumentException("name cannot be null");
|
throw new IllegalArgumentException("name cannot be null");
|
||||||
if (value == null)
|
if (value == null)
|
||||||
throw new IllegalArgumentException("value cannot be null");
|
throw new IllegalArgumentException("value cannot be null");
|
||||||
if (store == Store.NO)
|
|
||||||
throw new IllegalArgumentException("binary values can't be unstored");
|
|
||||||
if (store == Store.COMPRESS)
|
|
||||||
this.isCompressed = true;
|
|
||||||
|
|
||||||
this.name = name.intern();
|
this.name = name.intern();
|
||||||
//wrap the byte[] to a ByteBuffer object
|
|
||||||
this.fieldsData = value;
|
this.fieldsData = value;
|
||||||
|
|
||||||
this.isBinary = true;
|
if (store == Store.YES){
|
||||||
this.isStored = true;
|
this.isStored = true;
|
||||||
|
this.isCompressed = false;
|
||||||
|
}
|
||||||
|
else if (store == Store.COMPRESS) {
|
||||||
|
this.isStored = true;
|
||||||
|
this.isCompressed = true;
|
||||||
|
}
|
||||||
|
else if (store == Store.NO)
|
||||||
|
throw new IllegalArgumentException("binary values can't be unstored");
|
||||||
|
else
|
||||||
|
throw new IllegalArgumentException("unknown store parameter " + store);
|
||||||
|
|
||||||
this.isIndexed = false;
|
this.isIndexed = false;
|
||||||
this.isTokenized = false;
|
this.isTokenized = false;
|
||||||
this.storeTermVector = false;
|
|
||||||
|
this.isBinary = true;
|
||||||
|
|
||||||
|
setStoreTermVector(TermVector.NO);
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -422,9 +474,30 @@ public final class Field implements java.io.Serializable {
|
||||||
private void setStoreTermVector(TermVector termVector) {
|
private void setStoreTermVector(TermVector termVector) {
|
||||||
if (termVector == TermVector.NO) {
|
if (termVector == TermVector.NO) {
|
||||||
this.storeTermVector = false;
|
this.storeTermVector = false;
|
||||||
} else if (termVector == TermVector.YES) {
|
this.storePositionWithTermVector = false;
|
||||||
|
this.storeOffsetWithTermVector = false;
|
||||||
|
}
|
||||||
|
else if (termVector == TermVector.YES) {
|
||||||
this.storeTermVector = true;
|
this.storeTermVector = true;
|
||||||
} else {
|
this.storePositionWithTermVector = false;
|
||||||
|
this.storeOffsetWithTermVector = false;
|
||||||
|
}
|
||||||
|
else if (termVector == TermVector.WITH_POSITIONS) {
|
||||||
|
this.storeTermVector = true;
|
||||||
|
this.storePositionWithTermVector = true;
|
||||||
|
this.storeOffsetWithTermVector = false;
|
||||||
|
}
|
||||||
|
else if (termVector == TermVector.WITH_OFFSETS) {
|
||||||
|
this.storeTermVector = true;
|
||||||
|
this.storePositionWithTermVector = false;
|
||||||
|
this.storeOffsetWithTermVector = true;
|
||||||
|
}
|
||||||
|
else if (termVector == TermVector.WITH_POSITIONS_OFFSETS) {
|
||||||
|
this.storeTermVector = true;
|
||||||
|
this.storePositionWithTermVector = true;
|
||||||
|
this.storeOffsetWithTermVector = true;
|
||||||
|
}
|
||||||
|
else {
|
||||||
throw new IllegalArgumentException("unknown termVector parameter " + termVector);
|
throw new IllegalArgumentException("unknown termVector parameter " + termVector);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -456,6 +529,23 @@ public final class Field implements java.io.Serializable {
|
||||||
*/
|
*/
|
||||||
public final boolean isTermVectorStored() { return storeTermVector; }
|
public final boolean isTermVectorStored() { return storeTermVector; }
|
||||||
|
|
||||||
|
/**
|
||||||
|
* True iff terms are stored as term vector together with their offsets
|
||||||
|
* (start and end positon in source text).
|
||||||
|
* @return
|
||||||
|
*/
|
||||||
|
public boolean isStoreOffsetWithTermVector(){
|
||||||
|
return storeOffsetWithTermVector;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* True iff terms are stored as term vector together with their token positions.
|
||||||
|
* @return
|
||||||
|
*/
|
||||||
|
public boolean isStorePositionWithTermVector(){
|
||||||
|
return storePositionWithTermVector;
|
||||||
|
}
|
||||||
|
|
||||||
/** True iff the value of the filed is stored as binary */
|
/** True iff the value of the filed is stored as binary */
|
||||||
public final boolean isBinary() { return isBinary; }
|
public final boolean isBinary() { return isBinary; }
|
||||||
|
|
||||||
|
@ -479,6 +569,16 @@ public final class Field implements java.io.Serializable {
|
||||||
result.append(",");
|
result.append(",");
|
||||||
result.append("termVector");
|
result.append("termVector");
|
||||||
}
|
}
|
||||||
|
if (storeOffsetWithTermVector) {
|
||||||
|
if (result.length() > 0)
|
||||||
|
result.append(",");
|
||||||
|
result.append("termVectorOffsets");
|
||||||
|
}
|
||||||
|
if (storePositionWithTermVector) {
|
||||||
|
if (result.length() > 0)
|
||||||
|
result.append(",");
|
||||||
|
result.append("termVectorPosition");
|
||||||
|
}
|
||||||
if (isBinary) {
|
if (isBinary) {
|
||||||
if (result.length() > 0)
|
if (result.length() > 0)
|
||||||
result.append(",");
|
result.append(",");
|
||||||
|
|
|
@ -74,6 +74,7 @@ final class DocumentWriter {
|
||||||
postingTable.clear(); // clear postingTable
|
postingTable.clear(); // clear postingTable
|
||||||
fieldLengths = new int[fieldInfos.size()]; // init fieldLengths
|
fieldLengths = new int[fieldInfos.size()]; // init fieldLengths
|
||||||
fieldPositions = new int[fieldInfos.size()]; // init fieldPositions
|
fieldPositions = new int[fieldInfos.size()]; // init fieldPositions
|
||||||
|
fieldOffsets = new int[fieldInfos.size()]; // init fieldOffsets
|
||||||
|
|
||||||
fieldBoosts = new float[fieldInfos.size()]; // init fieldBoosts
|
fieldBoosts = new float[fieldInfos.size()]; // init fieldBoosts
|
||||||
Arrays.fill(fieldBoosts, doc.getBoost());
|
Arrays.fill(fieldBoosts, doc.getBoost());
|
||||||
|
@ -100,7 +101,7 @@ final class DocumentWriter {
|
||||||
writePostings(postings, segment);
|
writePostings(postings, segment);
|
||||||
|
|
||||||
// write norms of indexed fields
|
// write norms of indexed fields
|
||||||
writeNorms(doc, segment);
|
writeNorms(segment);
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -109,6 +110,7 @@ final class DocumentWriter {
|
||||||
private final Hashtable postingTable = new Hashtable();
|
private final Hashtable postingTable = new Hashtable();
|
||||||
private int[] fieldLengths;
|
private int[] fieldLengths;
|
||||||
private int[] fieldPositions;
|
private int[] fieldPositions;
|
||||||
|
private int[] fieldOffsets;
|
||||||
private float[] fieldBoosts;
|
private float[] fieldBoosts;
|
||||||
|
|
||||||
// Tokenizes the fields of a document into Postings.
|
// Tokenizes the fields of a document into Postings.
|
||||||
|
@ -122,12 +124,19 @@ final class DocumentWriter {
|
||||||
|
|
||||||
int length = fieldLengths[fieldNumber]; // length of field
|
int length = fieldLengths[fieldNumber]; // length of field
|
||||||
int position = fieldPositions[fieldNumber]; // position in field
|
int position = fieldPositions[fieldNumber]; // position in field
|
||||||
|
int offset = fieldOffsets[fieldNumber]; // offset field
|
||||||
|
|
||||||
if (field.isIndexed()) {
|
if (field.isIndexed()) {
|
||||||
if (!field.isTokenized()) { // un-tokenized field
|
if (!field.isTokenized()) { // un-tokenized field
|
||||||
addPosition(fieldName, field.stringValue(), position++);
|
String stringValue = field.stringValue();
|
||||||
|
if(field.isStoreOffsetWithTermVector())
|
||||||
|
addPosition(fieldName, stringValue, position++, new TermVectorOffsetInfo(offset, offset + stringValue.length()));
|
||||||
|
else
|
||||||
|
addPosition(fieldName, stringValue, position++, null);
|
||||||
|
offset += stringValue.length();
|
||||||
length++;
|
length++;
|
||||||
} else {
|
} else
|
||||||
|
{
|
||||||
Reader reader; // find or make Reader
|
Reader reader; // find or make Reader
|
||||||
if (field.readerValue() != null)
|
if (field.readerValue() != null)
|
||||||
reader = field.readerValue();
|
reader = field.readerValue();
|
||||||
|
@ -140,11 +149,23 @@ final class DocumentWriter {
|
||||||
// Tokenize field and add to postingTable
|
// Tokenize field and add to postingTable
|
||||||
TokenStream stream = analyzer.tokenStream(fieldName, reader);
|
TokenStream stream = analyzer.tokenStream(fieldName, reader);
|
||||||
try {
|
try {
|
||||||
|
Token lastToken = null;
|
||||||
for (Token t = stream.next(); t != null; t = stream.next()) {
|
for (Token t = stream.next(); t != null; t = stream.next()) {
|
||||||
position += (t.getPositionIncrement() - 1);
|
position += (t.getPositionIncrement() - 1);
|
||||||
addPosition(fieldName, t.termText(), position++);
|
|
||||||
if (++length > maxFieldLength) break;
|
if(field.isStoreOffsetWithTermVector())
|
||||||
|
addPosition(fieldName, t.termText(), position++, new TermVectorOffsetInfo(offset + t.startOffset(), offset + t.endOffset()));
|
||||||
|
else
|
||||||
|
addPosition(fieldName, t.termText(), position++, null);
|
||||||
|
|
||||||
|
lastToken = t;
|
||||||
|
if (++length > maxFieldLength)
|
||||||
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if(lastToken != null)
|
||||||
|
offset += lastToken.endOffset() + 1;
|
||||||
|
|
||||||
} finally {
|
} finally {
|
||||||
stream.close();
|
stream.close();
|
||||||
}
|
}
|
||||||
|
@ -153,14 +174,16 @@ final class DocumentWriter {
|
||||||
fieldLengths[fieldNumber] = length; // save field length
|
fieldLengths[fieldNumber] = length; // save field length
|
||||||
fieldPositions[fieldNumber] = position; // save field position
|
fieldPositions[fieldNumber] = position; // save field position
|
||||||
fieldBoosts[fieldNumber] *= field.getBoost();
|
fieldBoosts[fieldNumber] *= field.getBoost();
|
||||||
|
fieldOffsets[fieldNumber] = offset;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
private final Term termBuffer = new Term("", ""); // avoid consing
|
private final Term termBuffer = new Term("", ""); // avoid consing
|
||||||
|
|
||||||
private final void addPosition(String field, String text, int position) {
|
private final void addPosition(String field, String text, int position, TermVectorOffsetInfo offset) {
|
||||||
termBuffer.set(field, text);
|
termBuffer.set(field, text);
|
||||||
|
//System.out.println("Offset: " + offset);
|
||||||
Posting ti = (Posting) postingTable.get(termBuffer);
|
Posting ti = (Posting) postingTable.get(termBuffer);
|
||||||
if (ti != null) { // word seen before
|
if (ti != null) { // word seen before
|
||||||
int freq = ti.freq;
|
int freq = ti.freq;
|
||||||
|
@ -172,10 +195,23 @@ final class DocumentWriter {
|
||||||
ti.positions = newPositions;
|
ti.positions = newPositions;
|
||||||
}
|
}
|
||||||
ti.positions[freq] = position; // add new position
|
ti.positions[freq] = position; // add new position
|
||||||
|
|
||||||
|
if (offset != null) {
|
||||||
|
if (ti.offsets.length == freq){
|
||||||
|
TermVectorOffsetInfo [] newOffsets = new TermVectorOffsetInfo[freq*2];
|
||||||
|
TermVectorOffsetInfo [] offsets = ti.offsets;
|
||||||
|
for (int i = 0; i < freq; i++)
|
||||||
|
{
|
||||||
|
newOffsets[i] = offsets[i];
|
||||||
|
}
|
||||||
|
ti.offsets = newOffsets;
|
||||||
|
}
|
||||||
|
ti.offsets[freq] = offset;
|
||||||
|
}
|
||||||
ti.freq = freq + 1; // update frequency
|
ti.freq = freq + 1; // update frequency
|
||||||
} else { // word not seen before
|
} else { // word not seen before
|
||||||
Term term = new Term(field, text, false);
|
Term term = new Term(field, text, false);
|
||||||
postingTable.put(term, new Posting(term, position));
|
postingTable.put(term, new Posting(term, position, offset));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -294,12 +330,13 @@ final class DocumentWriter {
|
||||||
termVectorWriter.openDocument();
|
termVectorWriter.openDocument();
|
||||||
}
|
}
|
||||||
termVectorWriter.openField(currentField);
|
termVectorWriter.openField(currentField);
|
||||||
|
|
||||||
} else if (termVectorWriter != null) {
|
} else if (termVectorWriter != null) {
|
||||||
termVectorWriter.closeField();
|
termVectorWriter.closeField();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if (termVectorWriter != null && termVectorWriter.isFieldOpen()) {
|
if (termVectorWriter != null && termVectorWriter.isFieldOpen()) {
|
||||||
termVectorWriter.addTerm(posting.term.text(), postingFreq);
|
termVectorWriter.addTerm(posting.term.text(), postingFreq, posting.positions, posting.offsets);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if (termVectorWriter != null)
|
if (termVectorWriter != null)
|
||||||
|
@ -316,7 +353,7 @@ final class DocumentWriter {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
private final void writeNorms(Document doc, String segment) throws IOException {
|
private final void writeNorms(String segment) throws IOException {
|
||||||
for(int n = 0; n < fieldInfos.size(); n++){
|
for(int n = 0; n < fieldInfos.size(); n++){
|
||||||
FieldInfo fi = fieldInfos.fieldInfo(n);
|
FieldInfo fi = fieldInfos.fieldInfo(n);
|
||||||
if(fi.isIndexed){
|
if(fi.isIndexed){
|
||||||
|
@ -336,11 +373,18 @@ final class Posting { // info about a Term in a doc
|
||||||
Term term; // the Term
|
Term term; // the Term
|
||||||
int freq; // its frequency in doc
|
int freq; // its frequency in doc
|
||||||
int[] positions; // positions it occurs at
|
int[] positions; // positions it occurs at
|
||||||
|
TermVectorOffsetInfo [] offsets;
|
||||||
|
|
||||||
Posting(Term t, int position) {
|
Posting(Term t, int position, TermVectorOffsetInfo offset) {
|
||||||
term = t;
|
term = t;
|
||||||
freq = 1;
|
freq = 1;
|
||||||
positions = new int[1];
|
positions = new int[1];
|
||||||
positions[0] = position;
|
positions[0] = position;
|
||||||
|
if(offset != null){
|
||||||
|
offsets = new TermVectorOffsetInfo[1];
|
||||||
|
offsets[0] = offset;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
offsets = null;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -23,11 +23,16 @@ final class FieldInfo {
|
||||||
|
|
||||||
// true if term vector for this field should be stored
|
// true if term vector for this field should be stored
|
||||||
boolean storeTermVector;
|
boolean storeTermVector;
|
||||||
|
boolean storeOffsetWithTermVector;
|
||||||
|
boolean storePositionWithTermVector;
|
||||||
|
|
||||||
FieldInfo(String na, boolean tk, int nu, boolean storeTermVector) {
|
FieldInfo(String na, boolean tk, int nu, boolean storeTermVector,
|
||||||
|
boolean storePositionWithTermVector, boolean storeOffsetWithTermVector) {
|
||||||
name = na;
|
name = na;
|
||||||
isIndexed = tk;
|
isIndexed = tk;
|
||||||
number = nu;
|
number = nu;
|
||||||
this.storeTermVector = storeTermVector;
|
this.storeTermVector = storeTermVector;
|
||||||
|
this.storeOffsetWithTermVector = storeOffsetWithTermVector;
|
||||||
|
this.storePositionWithTermVector = storePositionWithTermVector;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -33,6 +33,12 @@ import org.apache.lucene.store.IndexInput;
|
||||||
* accessing this object.
|
* accessing this object.
|
||||||
*/
|
*/
|
||||||
final class FieldInfos {
|
final class FieldInfos {
|
||||||
|
|
||||||
|
static final byte IS_INDEXED = 0x1;
|
||||||
|
static final byte STORE_TERMVECTOR = 0x2;
|
||||||
|
static final byte STORE_POSITIONS_WITH_TERMVECTOR = 0x4;
|
||||||
|
static final byte STORE_OFFSET_WITH_TERMVECTOR = 0x8;
|
||||||
|
|
||||||
private ArrayList byNumber = new ArrayList();
|
private ArrayList byNumber = new ArrayList();
|
||||||
private HashMap byName = new HashMap();
|
private HashMap byName = new HashMap();
|
||||||
|
|
||||||
|
@ -61,23 +67,30 @@ final class FieldInfos {
|
||||||
Enumeration fields = doc.fields();
|
Enumeration fields = doc.fields();
|
||||||
while (fields.hasMoreElements()) {
|
while (fields.hasMoreElements()) {
|
||||||
Field field = (Field) fields.nextElement();
|
Field field = (Field) fields.nextElement();
|
||||||
add(field.name(), field.isIndexed(), field.isTermVectorStored());
|
add(field.name(), field.isIndexed(), field.isTermVectorStored(), field.isStorePositionWithTermVector(),
|
||||||
|
field.isStoreOffsetWithTermVector());
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
* Add fields that are indexed. Whether they have termvectors has to be specified.
|
||||||
|
*
|
||||||
* @param names The names of the fields
|
* @param names The names of the fields
|
||||||
* @param storeTermVectors Whether the fields store term vectors or not
|
* @param storeTermVectors Whether the fields store term vectors or not
|
||||||
|
* @param storePositionWithTermVector treu if positions should be stored.
|
||||||
|
* @param storeOffsetWithTermVector true if offsets should be stored
|
||||||
*/
|
*/
|
||||||
public void addIndexed(Collection names, boolean storeTermVectors) {
|
public void addIndexed(Collection names, boolean storeTermVectors, boolean storePositionWithTermVector,
|
||||||
|
boolean storeOffsetWithTermVector) {
|
||||||
Iterator i = names.iterator();
|
Iterator i = names.iterator();
|
||||||
while (i.hasNext()) {
|
while (i.hasNext()) {
|
||||||
add((String)i.next(), true, storeTermVectors);
|
add((String)i.next(), true, storeTermVectors, storePositionWithTermVector, storeOffsetWithTermVector);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Assumes the field is not storing term vectors
|
* Assumes the fields are not storing term vectors.
|
||||||
|
*
|
||||||
* @param names The names of the fields
|
* @param names The names of the fields
|
||||||
* @param isIndexed Whether the fields are indexed or not
|
* @param isIndexed Whether the fields are indexed or not
|
||||||
*
|
*
|
||||||
|
@ -91,28 +104,43 @@ final class FieldInfos {
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Calls three parameter add with false for the storeTermVector parameter
|
* Calls 5 parameter add with false for all TermVector parameters.
|
||||||
|
*
|
||||||
* @param name The name of the Field
|
* @param name The name of the Field
|
||||||
* @param isIndexed true if the field is indexed
|
* @param isIndexed true if the field is indexed
|
||||||
* @see #add(String, boolean, boolean)
|
* @see #add(String, boolean, boolean, boolean, boolean)
|
||||||
*/
|
*/
|
||||||
public void add(String name, boolean isIndexed) {
|
public void add(String name, boolean isIndexed) {
|
||||||
add(name, isIndexed, false);
|
add(name, isIndexed, false, false, false);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
/** If the field is not yet known, adds it. If it is known, checks to make
|
* Calls 5 parameter add with false for term vector positions and offsets.
|
||||||
* sure that the isIndexed flag is the same as was given previously for this
|
|
||||||
* field. If not - marks it as being indexed. Same goes for storeTermVector
|
|
||||||
*
|
*
|
||||||
* @param name The name of the field
|
* @param name The name of the field
|
||||||
* @param isIndexed true if the field is indexed
|
* @param isIndexed true if the field is indexed
|
||||||
* @param storeTermVector true if the term vector should be stored
|
* @param storeTermVector true if the term vector should be stored
|
||||||
*/
|
*/
|
||||||
public void add(String name, boolean isIndexed, boolean storeTermVector){
|
public void add(String name, boolean isIndexed, boolean storeTermVector){
|
||||||
|
add(name, isIndexed, storeTermVector, false, false);
|
||||||
|
}
|
||||||
|
|
||||||
|
/** If the field is not yet known, adds it. If it is known, checks to make
|
||||||
|
* sure that the isIndexed flag is the same as was given previously for this
|
||||||
|
* field. If not - marks it as being indexed. Same goes for the TermVector
|
||||||
|
* parameters.
|
||||||
|
*
|
||||||
|
* @param name The name of the field
|
||||||
|
* @param isIndexed true if the field is indexed
|
||||||
|
* @param storeTermVector true if the term vector should be stored
|
||||||
|
* @param storePositionWithTermVector true if the term vector with positions should be stored
|
||||||
|
* @param storeOffsetWithTermVector true if the term vector with offsets should be stored
|
||||||
|
*/
|
||||||
|
public void add(String name, boolean isIndexed, boolean storeTermVector,
|
||||||
|
boolean storePositionWithTermVector, boolean storeOffsetWithTermVector) {
|
||||||
FieldInfo fi = fieldInfo(name);
|
FieldInfo fi = fieldInfo(name);
|
||||||
if (fi == null) {
|
if (fi == null) {
|
||||||
addInternal(name, isIndexed, storeTermVector);
|
addInternal(name, isIndexed, storeTermVector, storePositionWithTermVector, storeOffsetWithTermVector);
|
||||||
} else {
|
} else {
|
||||||
if (fi.isIndexed != isIndexed) {
|
if (fi.isIndexed != isIndexed) {
|
||||||
fi.isIndexed = true; // once indexed, always index
|
fi.isIndexed = true; // once indexed, always index
|
||||||
|
@ -120,13 +148,21 @@ final class FieldInfos {
|
||||||
if (fi.storeTermVector != storeTermVector) {
|
if (fi.storeTermVector != storeTermVector) {
|
||||||
fi.storeTermVector = true; // once vector, always vector
|
fi.storeTermVector = true; // once vector, always vector
|
||||||
}
|
}
|
||||||
|
if (fi.storePositionWithTermVector != storePositionWithTermVector) {
|
||||||
|
fi.storePositionWithTermVector = true; // once vector, always vector
|
||||||
|
}
|
||||||
|
if (fi.storeOffsetWithTermVector != storeOffsetWithTermVector) {
|
||||||
|
fi.storeOffsetWithTermVector = true; // once vector, always vector
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
private void addInternal(String name, boolean isIndexed,
|
private void addInternal(String name, boolean isIndexed,
|
||||||
boolean storeTermVector) {
|
boolean storeTermVector, boolean storePositionWithTermVector,
|
||||||
|
boolean storeOffsetWithTermVector) {
|
||||||
FieldInfo fi =
|
FieldInfo fi =
|
||||||
new FieldInfo(name, isIndexed, byNumber.size(), storeTermVector);
|
new FieldInfo(name, isIndexed, byNumber.size(), storeTermVector, storePositionWithTermVector,
|
||||||
|
storeOffsetWithTermVector);
|
||||||
byNumber.add(fi);
|
byNumber.add(fi);
|
||||||
byName.put(name, fi);
|
byName.put(name, fi);
|
||||||
}
|
}
|
||||||
|
@ -180,11 +216,11 @@ final class FieldInfos {
|
||||||
for (int i = 0; i < size(); i++) {
|
for (int i = 0; i < size(); i++) {
|
||||||
FieldInfo fi = fieldInfo(i);
|
FieldInfo fi = fieldInfo(i);
|
||||||
byte bits = 0x0;
|
byte bits = 0x0;
|
||||||
if (fi.isIndexed) bits |= 0x1;
|
if (fi.isIndexed) bits |= IS_INDEXED;
|
||||||
if (fi.storeTermVector) bits |= 0x2;
|
if (fi.storeTermVector) bits |= STORE_TERMVECTOR;
|
||||||
|
if (fi.storePositionWithTermVector) bits |= STORE_POSITIONS_WITH_TERMVECTOR;
|
||||||
|
if (fi.storeOffsetWithTermVector) bits |= STORE_OFFSET_WITH_TERMVECTOR;
|
||||||
output.writeString(fi.name);
|
output.writeString(fi.name);
|
||||||
//Was REMOVE
|
|
||||||
//output.writeByte((byte)(fi.isIndexed ? 1 : 0));
|
|
||||||
output.writeByte(bits);
|
output.writeByte(bits);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -194,9 +230,11 @@ final class FieldInfos {
|
||||||
for (int i = 0; i < size; i++) {
|
for (int i = 0; i < size; i++) {
|
||||||
String name = input.readString().intern();
|
String name = input.readString().intern();
|
||||||
byte bits = input.readByte();
|
byte bits = input.readByte();
|
||||||
boolean isIndexed = (bits & 0x1) != 0;
|
boolean isIndexed = (bits & IS_INDEXED) != 0;
|
||||||
boolean storeTermVector = (bits & 0x2) != 0;
|
boolean storeTermVector = (bits & STORE_TERMVECTOR) != 0;
|
||||||
addInternal(name, isIndexed, storeTermVector);
|
boolean storePositionsWithTermVector = (bits & STORE_POSITIONS_WITH_TERMVECTOR) != 0;
|
||||||
|
boolean storeOffsetWithTermVector = (bits & STORE_OFFSET_WITH_TERMVECTOR) != 0;
|
||||||
|
addInternal(name, isIndexed, storeTermVector, storePositionsWithTermVector, storeOffsetWithTermVector);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -16,11 +16,12 @@ package org.apache.lucene.index;
|
||||||
* limitations under the License.
|
* limitations under the License.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
|
import org.apache.lucene.document.Document;
|
||||||
|
import org.apache.lucene.document.Field;
|
||||||
|
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.util.Collection;
|
import java.util.Collection;
|
||||||
|
|
||||||
import org.apache.lucene.document.Document;
|
|
||||||
|
|
||||||
/** A <code>FilterIndexReader</code> contains another IndexReader, which it
|
/** A <code>FilterIndexReader</code> contains another IndexReader, which it
|
||||||
* uses as its basic source of data, possibly transforming the data along the
|
* uses as its basic source of data, possibly transforming the data along the
|
||||||
* way or providing additional functionality. The class
|
* way or providing additional functionality. The class
|
||||||
|
@ -146,4 +147,8 @@ public class FilterIndexReader extends IndexReader {
|
||||||
public Collection getIndexedFieldNames(boolean storedTermVector) {
|
public Collection getIndexedFieldNames(boolean storedTermVector) {
|
||||||
return in.getIndexedFieldNames(storedTermVector);
|
return in.getIndexedFieldNames(storedTermVector);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public Collection getIndexedFieldNames (Field.TermVector tvSpec){
|
||||||
|
return in.getIndexedFieldNames(tvSpec);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -16,16 +16,16 @@ package org.apache.lucene.index;
|
||||||
* limitations under the License.
|
* limitations under the License.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
import java.io.IOException;
|
import org.apache.lucene.document.Document;
|
||||||
import java.io.File;
|
import org.apache.lucene.document.Field;
|
||||||
import java.util.Collection;
|
import org.apache.lucene.search.Similarity;
|
||||||
|
|
||||||
import org.apache.lucene.store.Directory;
|
import org.apache.lucene.store.Directory;
|
||||||
import org.apache.lucene.store.FSDirectory;
|
import org.apache.lucene.store.FSDirectory;
|
||||||
import org.apache.lucene.store.Lock;
|
import org.apache.lucene.store.Lock;
|
||||||
import org.apache.lucene.document.Document;
|
|
||||||
import org.apache.lucene.document.Field; // for javadoc
|
import java.io.File;
|
||||||
import org.apache.lucene.search.Similarity;
|
import java.io.IOException;
|
||||||
|
import java.util.Collection;
|
||||||
|
|
||||||
/** IndexReader is an abstract class, providing an interface for accessing an
|
/** IndexReader is an abstract class, providing an interface for accessing an
|
||||||
index. Search of an index is done entirely through this abstract interface,
|
index. Search of an index is done entirely through this abstract interface,
|
||||||
|
@ -209,23 +209,37 @@ public abstract class IndexReader {
|
||||||
return SegmentInfos.readCurrentVersion(directory);
|
return SegmentInfos.readCurrentVersion(directory);
|
||||||
}
|
}
|
||||||
|
|
||||||
/** Return an array of term frequency vectors for the specified document.
|
/**
|
||||||
|
* Return an array of term frequency vectors for the specified document.
|
||||||
* The array contains a vector for each vectorized field in the document.
|
* The array contains a vector for each vectorized field in the document.
|
||||||
* Each vector contains terms and frequencies for all terms
|
* Each vector contains terms and frequencies for all terms in a given vectorized field.
|
||||||
* in a given vectorized field.
|
* If no such fields existed, the method returns null. The term vectors that are
|
||||||
* If no such fields existed, the method returns null.
|
* returned my either be of type TermFreqVector or of type TermPositionsVector if
|
||||||
|
* positions or offsets have been stored.
|
||||||
*
|
*
|
||||||
* @see Field#isTermVectorStored()
|
* @param docNumber document for which term frequency vectors are returned
|
||||||
|
* @return array of term frequency vectors. May be null if no term vectors have been
|
||||||
|
* stored for the specified document.
|
||||||
|
* @throws IOException if index cannot be accessed
|
||||||
|
* @see Field#TermVector
|
||||||
*/
|
*/
|
||||||
abstract public TermFreqVector[] getTermFreqVectors(int docNumber)
|
abstract public TermFreqVector[] getTermFreqVectors(int docNumber)
|
||||||
throws IOException;
|
throws IOException;
|
||||||
|
|
||||||
/** Return a term frequency vector for the specified document and field. The
|
|
||||||
* vector returned contains terms and frequencies for those terms in
|
/**
|
||||||
* the specified field of this document, if the field had storeTermVector
|
* Return a term frequency vector for the specified document and field. The
|
||||||
* flag set. If the flag was not set, the method returns null.
|
* returned vector contains terms and frequencies for the terms in
|
||||||
|
* the specified field of this document, if the field had the storeTermVector
|
||||||
|
* flag set. If termvectors had been stored with positions or offsets, a
|
||||||
|
* TermPositionsVector is returned.
|
||||||
*
|
*
|
||||||
* @see Field#isTermVectorStored()
|
* @param docNumber document for which the term frequency vector is returned
|
||||||
|
* @param field field for which the term frequency vector is returned.
|
||||||
|
* @return term frequency vector May be null if field does not exist in the specified
|
||||||
|
* document or term vector was not stored.
|
||||||
|
* @throws IOException if index cannot be accessed
|
||||||
|
* @see Field#TermVector
|
||||||
*/
|
*/
|
||||||
abstract public TermFreqVector getTermFreqVector(int docNumber, String field)
|
abstract public TermFreqVector getTermFreqVector(int docNumber, String field)
|
||||||
throws IOException;
|
throws IOException;
|
||||||
|
@ -547,9 +561,20 @@ public abstract class IndexReader {
|
||||||
* @param storedTermVector if true, returns only Indexed fields that have term vector info,
|
* @param storedTermVector if true, returns only Indexed fields that have term vector info,
|
||||||
* else only indexed fields without term vector info
|
* else only indexed fields without term vector info
|
||||||
* @return Collection of Strings indicating the names of the fields
|
* @return Collection of Strings indicating the names of the fields
|
||||||
|
*
|
||||||
|
* @deprecated Replaced by {@link #getIndexedFieldNames (Field.TermVector tvSpec)}
|
||||||
*/
|
*/
|
||||||
public abstract Collection getIndexedFieldNames(boolean storedTermVector);
|
public abstract Collection getIndexedFieldNames(boolean storedTermVector);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Get a list of unique field names that exist in this index, are indexed, and have
|
||||||
|
* the specified term vector information.
|
||||||
|
*
|
||||||
|
* @param tvSpec specifies which term vector information shoul dbe available for the fields
|
||||||
|
* @return Collection of Strings indicating the names of the fields
|
||||||
|
*/
|
||||||
|
public abstract Collection getIndexedFieldNames(Field.TermVector tvSpec);
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Returns <code>true</code> iff the index in the named directory is
|
* Returns <code>true</code> iff the index in the named directory is
|
||||||
* currently locked.
|
* currently locked.
|
||||||
|
@ -560,7 +585,6 @@ public abstract class IndexReader {
|
||||||
return
|
return
|
||||||
directory.makeLock(IndexWriter.WRITE_LOCK_NAME).isLocked() ||
|
directory.makeLock(IndexWriter.WRITE_LOCK_NAME).isLocked() ||
|
||||||
directory.makeLock(IndexWriter.COMMIT_LOCK_NAME).isLocked();
|
directory.makeLock(IndexWriter.COMMIT_LOCK_NAME).isLocked();
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|
|
@ -16,16 +16,13 @@ package org.apache.lucene.index;
|
||||||
* limitations under the License.
|
* limitations under the License.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
import java.io.IOException;
|
|
||||||
import java.util.Collection;
|
|
||||||
import java.util.HashSet;
|
|
||||||
import java.util.Hashtable;
|
|
||||||
import java.util.Iterator;
|
|
||||||
import java.util.Set;
|
|
||||||
|
|
||||||
import org.apache.lucene.document.Document;
|
import org.apache.lucene.document.Document;
|
||||||
|
import org.apache.lucene.document.Field;
|
||||||
import org.apache.lucene.store.Directory;
|
import org.apache.lucene.store.Directory;
|
||||||
|
|
||||||
|
import java.io.IOException;
|
||||||
|
import java.util.*;
|
||||||
|
|
||||||
/** An IndexReader which reads multiple indexes, appending their content.
|
/** An IndexReader which reads multiple indexes, appending their content.
|
||||||
*
|
*
|
||||||
* @version $Id$
|
* @version $Id$
|
||||||
|
@ -219,11 +216,7 @@ public class MultiReader extends IndexReader {
|
||||||
for (int i = 0; i < subReaders.length; i++) {
|
for (int i = 0; i < subReaders.length; i++) {
|
||||||
IndexReader reader = subReaders[i];
|
IndexReader reader = subReaders[i];
|
||||||
Collection names = reader.getFieldNames();
|
Collection names = reader.getFieldNames();
|
||||||
// iterate through the field names and add them to the set
|
fieldSet.addAll(names);
|
||||||
for (Iterator iterator = names.iterator(); iterator.hasNext();) {
|
|
||||||
String s = (String) iterator.next();
|
|
||||||
fieldSet.add(s);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
return fieldSet;
|
return fieldSet;
|
||||||
}
|
}
|
||||||
|
@ -253,6 +246,17 @@ public class MultiReader extends IndexReader {
|
||||||
return fieldSet;
|
return fieldSet;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public Collection getIndexedFieldNames (Field.TermVector tvSpec){
|
||||||
|
// maintain a unique set of field names
|
||||||
|
Set fieldSet = new HashSet();
|
||||||
|
for (int i = 0; i < subReaders.length; i++) {
|
||||||
|
IndexReader reader = subReaders[i];
|
||||||
|
Collection names = reader.getIndexedFieldNames(tvSpec);
|
||||||
|
fieldSet.addAll(names);
|
||||||
|
}
|
||||||
|
return fieldSet;
|
||||||
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
class MultiTermEnum extends TermEnum {
|
class MultiTermEnum extends TermEnum {
|
||||||
|
|
|
@ -20,6 +20,7 @@ import java.util.Vector;
|
||||||
import java.util.Iterator;
|
import java.util.Iterator;
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
|
|
||||||
|
import org.apache.lucene.document.Field;
|
||||||
import org.apache.lucene.store.Directory;
|
import org.apache.lucene.store.Directory;
|
||||||
import org.apache.lucene.store.IndexOutput;
|
import org.apache.lucene.store.IndexOutput;
|
||||||
import org.apache.lucene.store.RAMOutputStream;
|
import org.apache.lucene.store.RAMOutputStream;
|
||||||
|
@ -157,8 +158,11 @@ final class SegmentMerger {
|
||||||
int docCount = 0;
|
int docCount = 0;
|
||||||
for (int i = 0; i < readers.size(); i++) {
|
for (int i = 0; i < readers.size(); i++) {
|
||||||
IndexReader reader = (IndexReader) readers.elementAt(i);
|
IndexReader reader = (IndexReader) readers.elementAt(i);
|
||||||
fieldInfos.addIndexed(reader.getIndexedFieldNames(true), true);
|
fieldInfos.addIndexed(reader.getIndexedFieldNames(Field.TermVector.WITH_POSITIONS_OFFSETS), true, true, true);
|
||||||
fieldInfos.addIndexed(reader.getIndexedFieldNames(false), false);
|
fieldInfos.addIndexed(reader.getIndexedFieldNames(Field.TermVector.WITH_POSITIONS), true, true, false);
|
||||||
|
fieldInfos.addIndexed(reader.getIndexedFieldNames(Field.TermVector.WITH_OFFSETS), true, false, true);
|
||||||
|
fieldInfos.addIndexed(reader.getIndexedFieldNames(Field.TermVector.YES), true, false, false);
|
||||||
|
fieldInfos.addIndexed(reader.getIndexedFieldNames(Field.TermVector.NO), false, false, false);
|
||||||
fieldInfos.add(reader.getFieldNames(false), false);
|
fieldInfos.add(reader.getFieldNames(false), false);
|
||||||
}
|
}
|
||||||
fieldInfos.write(directory, segment + ".fnm");
|
fieldInfos.write(directory, segment + ".fnm");
|
||||||
|
@ -195,29 +199,9 @@ final class SegmentMerger {
|
||||||
int maxDoc = reader.maxDoc();
|
int maxDoc = reader.maxDoc();
|
||||||
for (int docNum = 0; docNum < maxDoc; docNum++) {
|
for (int docNum = 0; docNum < maxDoc; docNum++) {
|
||||||
// skip deleted docs
|
// skip deleted docs
|
||||||
if (reader.isDeleted(docNum)) {
|
if (reader.isDeleted(docNum))
|
||||||
continue;
|
continue;
|
||||||
}
|
termVectorsWriter.addAllDocVectors(reader.getTermFreqVectors(docNum));
|
||||||
termVectorsWriter.openDocument();
|
|
||||||
|
|
||||||
// get all term vectors
|
|
||||||
TermFreqVector[] sourceTermVector =
|
|
||||||
reader.getTermFreqVectors(docNum);
|
|
||||||
|
|
||||||
if (sourceTermVector != null) {
|
|
||||||
for (int f = 0; f < sourceTermVector.length; f++) {
|
|
||||||
// translate field numbers
|
|
||||||
TermFreqVector termVector = sourceTermVector[f];
|
|
||||||
termVectorsWriter.openField(termVector.getField());
|
|
||||||
String [] terms = termVector.getTerms();
|
|
||||||
int [] freqs = termVector.getTermFrequencies();
|
|
||||||
|
|
||||||
for (int t = 0; t < terms.length; t++) {
|
|
||||||
termVectorsWriter.addTerm(terms[t], freqs[t]);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
termVectorsWriter.closeDocument();
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
} finally {
|
} finally {
|
||||||
|
|
|
@ -25,6 +25,7 @@ import java.util.Set;
|
||||||
import java.util.Vector;
|
import java.util.Vector;
|
||||||
|
|
||||||
import org.apache.lucene.document.Document;
|
import org.apache.lucene.document.Document;
|
||||||
|
import org.apache.lucene.document.Field;
|
||||||
import org.apache.lucene.store.IndexInput;
|
import org.apache.lucene.store.IndexInput;
|
||||||
import org.apache.lucene.store.IndexOutput;
|
import org.apache.lucene.store.IndexOutput;
|
||||||
import org.apache.lucene.store.Directory;
|
import org.apache.lucene.store.Directory;
|
||||||
|
@ -191,7 +192,9 @@ class SegmentReader extends IndexReader {
|
||||||
proxStream.close();
|
proxStream.close();
|
||||||
|
|
||||||
closeNorms();
|
closeNorms();
|
||||||
if (termVectorsReader != null) termVectorsReader.close();
|
|
||||||
|
if (termVectorsReader != null)
|
||||||
|
termVectorsReader.close();
|
||||||
|
|
||||||
if (cfsReader != null)
|
if (cfsReader != null)
|
||||||
cfsReader.close();
|
cfsReader.close();
|
||||||
|
@ -342,16 +345,63 @@ class SegmentReader extends IndexReader {
|
||||||
* @return Collection of Strings indicating the names of the fields
|
* @return Collection of Strings indicating the names of the fields
|
||||||
*/
|
*/
|
||||||
public Collection getIndexedFieldNames(boolean storedTermVector) {
|
public Collection getIndexedFieldNames(boolean storedTermVector) {
|
||||||
|
if(storedTermVector){
|
||||||
|
Set fieldSet = new HashSet();
|
||||||
|
fieldSet.addAll(getIndexedFieldNames(Field.TermVector.YES));
|
||||||
|
fieldSet.addAll(getIndexedFieldNames(Field.TermVector.WITH_POSITIONS));
|
||||||
|
fieldSet.addAll(getIndexedFieldNames(Field.TermVector.WITH_OFFSETS));
|
||||||
|
fieldSet.addAll(getIndexedFieldNames(Field.TermVector.WITH_POSITIONS_OFFSETS));
|
||||||
|
return fieldSet;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
return getIndexedFieldNames(Field.TermVector.NO);
|
||||||
|
}
|
||||||
|
|
||||||
|
public Collection getIndexedFieldNames (Field.TermVector tvSpec){
|
||||||
|
boolean storedTermVector;
|
||||||
|
boolean storePositionWithTermVector;
|
||||||
|
boolean storeOffsetWithTermVector;
|
||||||
|
|
||||||
|
if(tvSpec == Field.TermVector.NO){
|
||||||
|
storedTermVector = false;
|
||||||
|
storePositionWithTermVector = false;
|
||||||
|
storeOffsetWithTermVector = false;
|
||||||
|
}
|
||||||
|
else if(tvSpec == Field.TermVector.YES){
|
||||||
|
storedTermVector = true;
|
||||||
|
storePositionWithTermVector = false;
|
||||||
|
storeOffsetWithTermVector = false;
|
||||||
|
}
|
||||||
|
else if(tvSpec == Field.TermVector.WITH_POSITIONS){
|
||||||
|
storedTermVector = true;
|
||||||
|
storePositionWithTermVector = true;
|
||||||
|
storeOffsetWithTermVector = false;
|
||||||
|
}
|
||||||
|
else if(tvSpec == Field.TermVector.WITH_OFFSETS){
|
||||||
|
storedTermVector = true;
|
||||||
|
storePositionWithTermVector = false;
|
||||||
|
storeOffsetWithTermVector = true;
|
||||||
|
}
|
||||||
|
else if(tvSpec == Field.TermVector.WITH_POSITIONS_OFFSETS){
|
||||||
|
storedTermVector = true;
|
||||||
|
storePositionWithTermVector = true;
|
||||||
|
storeOffsetWithTermVector = true;
|
||||||
|
}
|
||||||
|
else{
|
||||||
|
throw new IllegalArgumentException("unknown termVector parameter " + tvSpec);
|
||||||
|
}
|
||||||
|
|
||||||
// maintain a unique set of field names
|
// maintain a unique set of field names
|
||||||
Set fieldSet = new HashSet();
|
Set fieldSet = new HashSet();
|
||||||
for (int i = 0; i < fieldInfos.size(); i++) {
|
for (int i = 0; i < fieldInfos.size(); i++) {
|
||||||
FieldInfo fi = fieldInfos.fieldInfo(i);
|
FieldInfo fi = fieldInfos.fieldInfo(i);
|
||||||
if (fi.isIndexed == true && fi.storeTermVector == storedTermVector){
|
if (fi.isIndexed && fi.storeTermVector == storedTermVector &&
|
||||||
|
fi.storePositionWithTermVector == storePositionWithTermVector &&
|
||||||
|
fi.storeOffsetWithTermVector == storeOffsetWithTermVector){
|
||||||
fieldSet.add(fi.name);
|
fieldSet.add(fi.name);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
return fieldSet;
|
return fieldSet;
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
public synchronized byte[] norms(String field) throws IOException {
|
public synchronized byte[] norms(String field) throws IOException {
|
||||||
|
@ -429,11 +479,13 @@ class SegmentReader extends IndexReader {
|
||||||
* vector returned contains term numbers and frequencies for all terms in
|
* vector returned contains term numbers and frequencies for all terms in
|
||||||
* the specified field of this document, if the field had storeTermVector
|
* the specified field of this document, if the field had storeTermVector
|
||||||
* flag set. If the flag was not set, the method returns null.
|
* flag set. If the flag was not set, the method returns null.
|
||||||
|
* @throws IOException
|
||||||
*/
|
*/
|
||||||
public TermFreqVector getTermFreqVector(int docNumber, String field) {
|
public TermFreqVector getTermFreqVector(int docNumber, String field) throws IOException {
|
||||||
// Check if this field is invalid or has no stored term vector
|
// Check if this field is invalid or has no stored term vector
|
||||||
FieldInfo fi = fieldInfos.fieldInfo(field);
|
FieldInfo fi = fieldInfos.fieldInfo(field);
|
||||||
if (fi == null || !fi.storeTermVector) return null;
|
if (fi == null || !fi.storeTermVector || termVectorsReader == null)
|
||||||
|
return null;
|
||||||
|
|
||||||
return termVectorsReader.get(docNumber, field);
|
return termVectorsReader.get(docNumber, field);
|
||||||
}
|
}
|
||||||
|
@ -444,8 +496,9 @@ class SegmentReader extends IndexReader {
|
||||||
* Each vector vector contains term numbers and frequencies for all terms
|
* Each vector vector contains term numbers and frequencies for all terms
|
||||||
* in a given vectorized field.
|
* in a given vectorized field.
|
||||||
* If no such fields existed, the method returns null.
|
* If no such fields existed, the method returns null.
|
||||||
|
* @throws IOException
|
||||||
*/
|
*/
|
||||||
public TermFreqVector[] getTermFreqVectors(int docNumber) {
|
public TermFreqVector[] getTermFreqVectors(int docNumber) throws IOException {
|
||||||
if (termVectorsReader == null)
|
if (termVectorsReader == null)
|
||||||
return null;
|
return null;
|
||||||
|
|
||||||
|
|
|
@ -0,0 +1,64 @@
|
||||||
|
package org.apache.lucene.index;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Copyright 2004 The Apache Software Foundation
|
||||||
|
*
|
||||||
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
* you may not use this file except in compliance with the License.
|
||||||
|
* You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
public class SegmentTermPositionVector extends SegmentTermVector implements TermPositionVector {
|
||||||
|
protected int[][] positions;
|
||||||
|
protected TermVectorOffsetInfo[][] offsets;
|
||||||
|
public static final int[] EMPTY_TERM_POS = new int[0];
|
||||||
|
|
||||||
|
public SegmentTermPositionVector(String field, String terms[], int termFreqs[], int[][] positions, TermVectorOffsetInfo[][] offsets) {
|
||||||
|
super(field, terms, termFreqs);
|
||||||
|
this.offsets = offsets;
|
||||||
|
this.positions = positions;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Returns an array of TermVectorOffsetInfo in which the term is found.
|
||||||
|
*
|
||||||
|
* @param index The position in the array to get the offsets from
|
||||||
|
* @return An array of TermVectorOffsetInfo objects or the empty list
|
||||||
|
* @see org.apache.lucene.analysis.Token
|
||||||
|
*/
|
||||||
|
public TermVectorOffsetInfo[] getOffsets(int index) {
|
||||||
|
TermVectorOffsetInfo[] result = TermVectorOffsetInfo.EMPTY_OFFSET_INFO;
|
||||||
|
if(offsets == null)
|
||||||
|
return null;
|
||||||
|
if (index >=0 && index < offsets.length)
|
||||||
|
{
|
||||||
|
result = offsets[index];
|
||||||
|
}
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Returns an array of positions in which the term is found.
|
||||||
|
* Terms are identified by the index at which its number appears in the
|
||||||
|
* term String array obtained from the <code>indexOf</code> method.
|
||||||
|
*/
|
||||||
|
public int[] getTermPositions(int index) {
|
||||||
|
int[] result = EMPTY_TERM_POS;
|
||||||
|
if(positions == null)
|
||||||
|
return null;
|
||||||
|
if (index >=0 && index < positions.length)
|
||||||
|
{
|
||||||
|
result = positions[index];
|
||||||
|
}
|
||||||
|
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
}
|
|
@ -1,4 +1,21 @@
|
||||||
package org.apache.lucene.index;
|
package org.apache.lucene.index;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Copyright 2004 The Apache Software Foundation
|
||||||
|
*
|
||||||
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
* you may not use this file except in compliance with the License.
|
||||||
|
* You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
import java.util.*;
|
import java.util.*;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -26,11 +43,14 @@ class SegmentTermVector implements TermFreqVector {
|
||||||
StringBuffer sb = new StringBuffer();
|
StringBuffer sb = new StringBuffer();
|
||||||
sb.append('{');
|
sb.append('{');
|
||||||
sb.append(field).append(": ");
|
sb.append(field).append(": ");
|
||||||
|
if(terms != null){
|
||||||
for (int i=0; i<terms.length; i++) {
|
for (int i=0; i<terms.length; i++) {
|
||||||
if (i>0) sb.append(", ");
|
if (i>0) sb.append(", ");
|
||||||
sb.append(terms[i]).append('/').append(termFreqs[i]);
|
sb.append(terms[i]).append('/').append(termFreqs[i]);
|
||||||
}
|
}
|
||||||
|
}
|
||||||
sb.append('}');
|
sb.append('}');
|
||||||
|
|
||||||
return sb.toString();
|
return sb.toString();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -47,6 +67,8 @@ class SegmentTermVector implements TermFreqVector {
|
||||||
}
|
}
|
||||||
|
|
||||||
public int indexOf(String termText) {
|
public int indexOf(String termText) {
|
||||||
|
if(terms == null)
|
||||||
|
return -1;
|
||||||
int res = Arrays.binarySearch(terms, termText);
|
int res = Arrays.binarySearch(terms, termText);
|
||||||
return res >= 0 ? res : -1;
|
return res >= 0 ? res : -1;
|
||||||
}
|
}
|
||||||
|
@ -60,7 +82,7 @@ class SegmentTermVector implements TermFreqVector {
|
||||||
int res[] = new int[len];
|
int res[] = new int[len];
|
||||||
|
|
||||||
for (int i=0; i < len; i++) {
|
for (int i=0; i < len; i++) {
|
||||||
res[i] = indexOf(termNumbers[i]);
|
res[i] = indexOf(termNumbers[start+ i]);
|
||||||
}
|
}
|
||||||
return res;
|
return res;
|
||||||
}
|
}
|
||||||
|
|
|
@ -1,5 +1,21 @@
|
||||||
package org.apache.lucene.index;
|
package org.apache.lucene.index;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Copyright 2004 The Apache Software Foundation
|
||||||
|
*
|
||||||
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
* you may not use this file except in compliance with the License.
|
||||||
|
* You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
/** Provides access to stored term vector of
|
/** Provides access to stored term vector of
|
||||||
* a document field.
|
* a document field.
|
||||||
*/
|
*/
|
||||||
|
|
|
@ -1,13 +1,42 @@
|
||||||
package org.apache.lucene.index;
|
package org.apache.lucene.index;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Copyright 2004 The Apache Software Foundation
|
||||||
|
*
|
||||||
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
* you may not use this file except in compliance with the License.
|
||||||
|
* You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
/** Extends <code>TermFreqVector</code> to provide additional information about
|
/** Extends <code>TermFreqVector</code> to provide additional information about
|
||||||
* positions in which each of the terms is found.
|
* positions in which each of the terms is found. A TermPositionVector not necessarily
|
||||||
|
* contains both positions and offsets, but at least one of these arrays exists.
|
||||||
*/
|
*/
|
||||||
public interface TermPositionVector extends TermFreqVector {
|
public interface TermPositionVector extends TermFreqVector {
|
||||||
|
|
||||||
/** Returns an array of positions in which the term is found.
|
/** Returns an array of positions in which the term is found.
|
||||||
* Terms are identified by the index at which its number appears in the
|
* Terms are identified by the index at which its number appears in the
|
||||||
* term number array obtained from <code>getTermNumbers</code> method.
|
* term String array obtained from the <code>indexOf</code> method.
|
||||||
|
* May return null if positions have not been stored.
|
||||||
*/
|
*/
|
||||||
public int[] getTermPositions(int index);
|
public int[] getTermPositions(int index);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Returns an array of TermVectorOffsetInfo in which the term is found.
|
||||||
|
* May return null if offsets have not been stored.
|
||||||
|
*
|
||||||
|
* @see org.apache.lucene.analysis.Token
|
||||||
|
*
|
||||||
|
* @param index The position in the array to get the offsets from
|
||||||
|
* @return An array of TermVectorOffsetInfo objects or the empty list
|
||||||
|
*/
|
||||||
|
public TermVectorOffsetInfo [] getOffsets(int index);
|
||||||
}
|
}
|
|
@ -0,0 +1,66 @@
|
||||||
|
package org.apache.lucene.index;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Copyright 2004 The Apache Software Foundation
|
||||||
|
*
|
||||||
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
* you may not use this file except in compliance with the License.
|
||||||
|
* You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
public class TermVectorOffsetInfo {
|
||||||
|
public static final TermVectorOffsetInfo [] EMPTY_OFFSET_INFO = new TermVectorOffsetInfo[0];
|
||||||
|
private int startOffset;
|
||||||
|
private int endOffset;
|
||||||
|
|
||||||
|
public TermVectorOffsetInfo() {
|
||||||
|
}
|
||||||
|
|
||||||
|
public TermVectorOffsetInfo(int startOffset, int endOffset) {
|
||||||
|
this.endOffset = endOffset;
|
||||||
|
this.startOffset = startOffset;
|
||||||
|
}
|
||||||
|
|
||||||
|
public int getEndOffset() {
|
||||||
|
return endOffset;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setEndOffset(int endOffset) {
|
||||||
|
this.endOffset = endOffset;
|
||||||
|
}
|
||||||
|
|
||||||
|
public int getStartOffset() {
|
||||||
|
return startOffset;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setStartOffset(int startOffset) {
|
||||||
|
this.startOffset = startOffset;
|
||||||
|
}
|
||||||
|
|
||||||
|
public boolean equals(Object o) {
|
||||||
|
if (this == o) return true;
|
||||||
|
if (!(o instanceof TermVectorOffsetInfo)) return false;
|
||||||
|
|
||||||
|
final TermVectorOffsetInfo termVectorOffsetInfo = (TermVectorOffsetInfo) o;
|
||||||
|
|
||||||
|
if (endOffset != termVectorOffsetInfo.endOffset) return false;
|
||||||
|
if (startOffset != termVectorOffsetInfo.startOffset) return false;
|
||||||
|
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
public int hashCode() {
|
||||||
|
int result;
|
||||||
|
result = startOffset;
|
||||||
|
result = 29 * result + endOffset;
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
}
|
|
@ -34,22 +34,25 @@ class TermVectorsReader {
|
||||||
private IndexInput tvf;
|
private IndexInput tvf;
|
||||||
private int size;
|
private int size;
|
||||||
|
|
||||||
|
private int tvdFormat;
|
||||||
|
private int tvfFormat;
|
||||||
|
|
||||||
TermVectorsReader(Directory d, String segment, FieldInfos fieldInfos)
|
TermVectorsReader(Directory d, String segment, FieldInfos fieldInfos)
|
||||||
throws IOException {
|
throws IOException {
|
||||||
if (d.fileExists(segment + TermVectorsWriter.TVX_EXTENSION)) {
|
if (d.fileExists(segment + TermVectorsWriter.TVX_EXTENSION)) {
|
||||||
tvx = d.openInput(segment + TermVectorsWriter.TVX_EXTENSION);
|
tvx = d.openInput(segment + TermVectorsWriter.TVX_EXTENSION);
|
||||||
checkValidFormat(tvx);
|
checkValidFormat(tvx);
|
||||||
tvd = d.openInput(segment + TermVectorsWriter.TVD_EXTENSION);
|
tvd = d.openInput(segment + TermVectorsWriter.TVD_EXTENSION);
|
||||||
checkValidFormat(tvd);
|
tvdFormat = checkValidFormat(tvd);
|
||||||
tvf = d.openInput(segment + TermVectorsWriter.TVF_EXTENSION);
|
tvf = d.openInput(segment + TermVectorsWriter.TVF_EXTENSION);
|
||||||
checkValidFormat(tvf);
|
tvfFormat = checkValidFormat(tvf);
|
||||||
size = (int) tvx.length() / 8;
|
size = (int) tvx.length() / 8;
|
||||||
}
|
}
|
||||||
|
|
||||||
this.fieldInfos = fieldInfos;
|
this.fieldInfos = fieldInfos;
|
||||||
}
|
}
|
||||||
|
|
||||||
private void checkValidFormat(IndexInput in) throws IOException
|
private int checkValidFormat(IndexInput in) throws IOException
|
||||||
{
|
{
|
||||||
int format = in.readInt();
|
int format = in.readInt();
|
||||||
if (format > TermVectorsWriter.FORMAT_VERSION)
|
if (format > TermVectorsWriter.FORMAT_VERSION)
|
||||||
|
@ -57,7 +60,7 @@ class TermVectorsReader {
|
||||||
throw new IOException("Incompatible format version: " + format + " expected "
|
throw new IOException("Incompatible format version: " + format + " expected "
|
||||||
+ TermVectorsWriter.FORMAT_VERSION + " or less");
|
+ TermVectorsWriter.FORMAT_VERSION + " or less");
|
||||||
}
|
}
|
||||||
|
return format;
|
||||||
}
|
}
|
||||||
|
|
||||||
void close() throws IOException {
|
void close() throws IOException {
|
||||||
|
@ -82,16 +85,17 @@ class TermVectorsReader {
|
||||||
* Retrieve the term vector for the given document and field
|
* Retrieve the term vector for the given document and field
|
||||||
* @param docNum The document number to retrieve the vector for
|
* @param docNum The document number to retrieve the vector for
|
||||||
* @param field The field within the document to retrieve
|
* @param field The field within the document to retrieve
|
||||||
* @return The TermFreqVector for the document and field or null
|
* @return The TermFreqVector for the document and field or null if there is no termVector for this field.
|
||||||
|
* @throws IOException
|
||||||
*/
|
*/
|
||||||
synchronized TermFreqVector get(int docNum, String field) {
|
synchronized TermFreqVector get(int docNum, String field) throws IOException {
|
||||||
// Check if no term vectors are available for this segment at all
|
// Check if no term vectors are available for this segment at all
|
||||||
int fieldNumber = fieldInfos.fieldNumber(field);
|
int fieldNumber = fieldInfos.fieldNumber(field);
|
||||||
TermFreqVector result = null;
|
TermFreqVector result = null;
|
||||||
if (tvx != null) {
|
if (tvx != null) {
|
||||||
try {
|
|
||||||
//We need to account for the FORMAT_SIZE at when seeking in the tvx
|
//We need to account for the FORMAT_SIZE at when seeking in the tvx
|
||||||
//We don't need to do this in other seeks because we already have the file pointer
|
//We don't need to do this in other seeks because we already have the
|
||||||
|
// file pointer
|
||||||
//that was written in another file
|
//that was written in another file
|
||||||
tvx.seek((docNum * 8L) + TermVectorsWriter.FORMAT_SIZE);
|
tvx.seek((docNum * 8L) + TermVectorsWriter.FORMAT_SIZE);
|
||||||
//System.out.println("TVX Pointer: " + tvx.getFilePointer());
|
//System.out.println("TVX Pointer: " + tvx.getFilePointer());
|
||||||
|
@ -106,42 +110,43 @@ class TermVectorsReader {
|
||||||
int number = 0;
|
int number = 0;
|
||||||
int found = -1;
|
int found = -1;
|
||||||
for (int i = 0; i < fieldCount; i++) {
|
for (int i = 0; i < fieldCount; i++) {
|
||||||
|
if(tvdFormat == TermVectorsWriter.FORMAT_VERSION)
|
||||||
|
number = tvd.readVInt();
|
||||||
|
else
|
||||||
number += tvd.readVInt();
|
number += tvd.readVInt();
|
||||||
if (number == fieldNumber) found = i;
|
|
||||||
|
if (number == fieldNumber)
|
||||||
|
found = i;
|
||||||
}
|
}
|
||||||
|
|
||||||
// This field, although valid in the segment, was not found in this document
|
// This field, although valid in the segment, was not found in this
|
||||||
|
// document
|
||||||
if (found != -1) {
|
if (found != -1) {
|
||||||
// Compute position in the tvf file
|
// Compute position in the tvf file
|
||||||
position = 0;
|
position = 0;
|
||||||
for (int i = 0; i <= found; i++)
|
for (int i = 0; i <= found; i++)
|
||||||
{
|
|
||||||
position += tvd.readVLong();
|
position += tvd.readVLong();
|
||||||
}
|
|
||||||
result = readTermVector(field, position);
|
result = readTermVector(field, position);
|
||||||
}
|
} else {
|
||||||
else {
|
|
||||||
//System.out.println("Field not found");
|
//System.out.println("Field not found");
|
||||||
}
|
}
|
||||||
|
} else {
|
||||||
} catch (Exception e) {
|
//System.out.println("No tvx file");
|
||||||
//e.printStackTrace();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
System.out.println("No tvx file");
|
|
||||||
}
|
}
|
||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
/** Return all term vectors stored for this document or null if the could not be read in. */
|
/**
|
||||||
synchronized TermFreqVector[] get(int docNum) {
|
* Return all term vectors stored for this document or null if there are no term vectors
|
||||||
|
* for the document.
|
||||||
|
* @throws IOException
|
||||||
|
*/
|
||||||
|
synchronized TermFreqVector[] get(int docNum) throws IOException {
|
||||||
TermFreqVector[] result = null;
|
TermFreqVector[] result = null;
|
||||||
// Check if no term vectors are available for this segment at all
|
// Check if no term vectors are available for this segment at all
|
||||||
if (tvx != null) {
|
if (tvx != null) {
|
||||||
try {
|
|
||||||
//We need to offset by
|
//We need to offset by
|
||||||
tvx.seek((docNum * 8L) + TermVectorsWriter.FORMAT_SIZE);
|
tvx.seek((docNum * 8L) + TermVectorsWriter.FORMAT_SIZE);
|
||||||
long position = tvx.readLong();
|
long position = tvx.readLong();
|
||||||
|
@ -155,7 +160,11 @@ class TermVectorsReader {
|
||||||
String[] fields = new String[fieldCount];
|
String[] fields = new String[fieldCount];
|
||||||
|
|
||||||
for (int i = 0; i < fieldCount; i++) {
|
for (int i = 0; i < fieldCount; i++) {
|
||||||
|
if(tvdFormat == TermVectorsWriter.FORMAT_VERSION)
|
||||||
|
number = tvd.readVInt();
|
||||||
|
else
|
||||||
number += tvd.readVInt();
|
number += tvd.readVInt();
|
||||||
|
|
||||||
fields[i] = fieldInfos.fieldName(number);
|
fields[i] = fieldInfos.fieldName(number);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -169,13 +178,8 @@ class TermVectorsReader {
|
||||||
|
|
||||||
result = readTermVectors(fields, tvfPointers);
|
result = readTermVectors(fields, tvfPointers);
|
||||||
}
|
}
|
||||||
} catch (IOException e) {
|
} else {
|
||||||
e.printStackTrace();
|
//System.out.println("No tvx file");
|
||||||
}
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
System.out.println("No tvx file");
|
|
||||||
}
|
}
|
||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
|
@ -206,20 +210,41 @@ class TermVectorsReader {
|
||||||
|
|
||||||
int numTerms = tvf.readVInt();
|
int numTerms = tvf.readVInt();
|
||||||
//System.out.println("Num Terms: " + numTerms);
|
//System.out.println("Num Terms: " + numTerms);
|
||||||
// If no terms - return a constant empty termvector
|
// If no terms - return a constant empty termvector. However, this should never occur!
|
||||||
if (numTerms == 0) return new SegmentTermVector(field, null, null);
|
if (numTerms == 0)
|
||||||
|
return new SegmentTermVector(field, null, null);
|
||||||
|
|
||||||
|
boolean storePositions;
|
||||||
|
boolean storeOffsets;
|
||||||
|
|
||||||
|
if(tvfFormat == TermVectorsWriter.FORMAT_VERSION){
|
||||||
|
byte bits = tvf.readByte();
|
||||||
|
storePositions = (bits & TermVectorsWriter.STORE_POSITIONS_WITH_TERMVECTOR) != 0;
|
||||||
|
storeOffsets = (bits & TermVectorsWriter.STORE_OFFSET_WITH_TERMVECTOR) != 0;
|
||||||
|
}
|
||||||
|
else{
|
||||||
tvf.readVInt();
|
tvf.readVInt();
|
||||||
|
storePositions = false;
|
||||||
|
storeOffsets = false;
|
||||||
|
}
|
||||||
|
|
||||||
String terms[] = new String[numTerms];
|
String terms[] = new String[numTerms];
|
||||||
|
|
||||||
int termFreqs[] = new int[numTerms];
|
int termFreqs[] = new int[numTerms];
|
||||||
|
|
||||||
|
// we may not need these, but declare them
|
||||||
|
int positions[][] = null;
|
||||||
|
TermVectorOffsetInfo offsets[][] = null;
|
||||||
|
if(storePositions)
|
||||||
|
positions = new int[numTerms][];
|
||||||
|
if(storeOffsets)
|
||||||
|
offsets = new TermVectorOffsetInfo[numTerms][];
|
||||||
|
|
||||||
int start = 0;
|
int start = 0;
|
||||||
int deltaLength = 0;
|
int deltaLength = 0;
|
||||||
int totalLength = 0;
|
int totalLength = 0;
|
||||||
char [] buffer = {};
|
char [] buffer = {};
|
||||||
String previousString = "";
|
String previousString = "";
|
||||||
|
|
||||||
for (int i = 0; i < numTerms; i++) {
|
for (int i = 0; i < numTerms; i++) {
|
||||||
start = tvf.readVInt();
|
start = tvf.readVInt();
|
||||||
deltaLength = tvf.readVInt();
|
deltaLength = tvf.readVInt();
|
||||||
|
@ -233,9 +258,40 @@ class TermVectorsReader {
|
||||||
tvf.readChars(buffer, start, deltaLength);
|
tvf.readChars(buffer, start, deltaLength);
|
||||||
terms[i] = new String(buffer, 0, totalLength);
|
terms[i] = new String(buffer, 0, totalLength);
|
||||||
previousString = terms[i];
|
previousString = terms[i];
|
||||||
termFreqs[i] = tvf.readVInt();
|
int freq = tvf.readVInt();
|
||||||
|
termFreqs[i] = freq;
|
||||||
|
|
||||||
|
if (storePositions) { //read in the positions
|
||||||
|
int [] pos = new int[freq];
|
||||||
|
positions[i] = pos;
|
||||||
|
int prevPosition = 0;
|
||||||
|
for (int j = 0; j < freq; j++)
|
||||||
|
{
|
||||||
|
pos[j] = prevPosition + tvf.readVInt();
|
||||||
|
prevPosition = pos[j];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (storeOffsets) {
|
||||||
|
TermVectorOffsetInfo[] offs = new TermVectorOffsetInfo[freq];
|
||||||
|
offsets[i] = offs;
|
||||||
|
int prevOffset = 0;
|
||||||
|
for (int j = 0; j < freq; j++) {
|
||||||
|
int startOffset = prevOffset + tvf.readVInt();
|
||||||
|
int endOffset = startOffset + tvf.readVInt();
|
||||||
|
offs[j] = new TermVectorOffsetInfo(startOffset, endOffset);
|
||||||
|
prevOffset = endOffset;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
SegmentTermVector tv;
|
||||||
|
if (storePositions || storeOffsets){
|
||||||
|
tv = new SegmentTermPositionVector(field, terms, termFreqs, positions, offsets);
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
tv = new SegmentTermVector(field, terms, termFreqs);
|
||||||
}
|
}
|
||||||
SegmentTermVector tv = new SegmentTermVector(field, terms, termFreqs);
|
|
||||||
return tv;
|
return tv;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -50,14 +50,17 @@ import java.util.Vector;
|
||||||
*
|
*
|
||||||
*/
|
*/
|
||||||
final class TermVectorsWriter {
|
final class TermVectorsWriter {
|
||||||
public static final int FORMAT_VERSION = 1;
|
public static final byte STORE_POSITIONS_WITH_TERMVECTOR = 0x1;
|
||||||
|
public static final byte STORE_OFFSET_WITH_TERMVECTOR = 0x2;
|
||||||
|
|
||||||
|
public static final int FORMAT_VERSION = 2;
|
||||||
//The size in bytes that the FORMAT_VERSION will take up at the beginning of each file
|
//The size in bytes that the FORMAT_VERSION will take up at the beginning of each file
|
||||||
public static final int FORMAT_SIZE = 4;
|
public static final int FORMAT_SIZE = 4;
|
||||||
|
|
||||||
//TODO: Figure out how to write with or w/o position information and read back in
|
|
||||||
public static final String TVX_EXTENSION = ".tvx";
|
public static final String TVX_EXTENSION = ".tvx";
|
||||||
public static final String TVD_EXTENSION = ".tvd";
|
public static final String TVD_EXTENSION = ".tvd";
|
||||||
public static final String TVF_EXTENSION = ".tvf";
|
public static final String TVF_EXTENSION = ".tvf";
|
||||||
|
|
||||||
private IndexOutput tvx = null, tvd = null, tvf = null;
|
private IndexOutput tvx = null, tvd = null, tvf = null;
|
||||||
private Vector fields = null;
|
private Vector fields = null;
|
||||||
private Vector terms = null;
|
private Vector terms = null;
|
||||||
|
@ -66,13 +69,6 @@ final class TermVectorsWriter {
|
||||||
private TVField currentField = null;
|
private TVField currentField = null;
|
||||||
private long currentDocPointer = -1;
|
private long currentDocPointer = -1;
|
||||||
|
|
||||||
/** Create term vectors writer for the specified segment in specified
|
|
||||||
* directory. A new TermVectorsWriter should be created for each
|
|
||||||
* segment. The parameter <code>maxFields</code> indicates how many total
|
|
||||||
* fields are found in this document. Not all of these fields may require
|
|
||||||
* termvectors to be stored, so the number of calls to
|
|
||||||
* <code>openField</code> is less or equal to this number.
|
|
||||||
*/
|
|
||||||
public TermVectorsWriter(Directory directory, String segment,
|
public TermVectorsWriter(Directory directory, String segment,
|
||||||
FieldInfos fieldInfos)
|
FieldInfos fieldInfos)
|
||||||
throws IOException {
|
throws IOException {
|
||||||
|
@ -93,7 +89,6 @@ final class TermVectorsWriter {
|
||||||
public final void openDocument()
|
public final void openDocument()
|
||||||
throws IOException {
|
throws IOException {
|
||||||
closeDocument();
|
closeDocument();
|
||||||
|
|
||||||
currentDocPointer = tvd.getFilePointer();
|
currentDocPointer = tvd.getFilePointer();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -119,12 +114,17 @@ final class TermVectorsWriter {
|
||||||
* processing of this field. If a field was previously open, it is
|
* processing of this field. If a field was previously open, it is
|
||||||
* closed automatically.
|
* closed automatically.
|
||||||
*/
|
*/
|
||||||
public final void openField(String field)
|
public final void openField(String field) throws IOException {
|
||||||
throws IOException {
|
FieldInfo fieldInfo = fieldInfos.fieldInfo(field);
|
||||||
if (!isDocumentOpen()) throw new IllegalStateException("Cannot open field when no document is open.");
|
openField(fieldInfo.number, fieldInfo.storePositionWithTermVector, fieldInfo.storeOffsetWithTermVector);
|
||||||
|
}
|
||||||
|
|
||||||
|
private void openField(int fieldNumber, boolean storePositionWithTermVector,
|
||||||
|
boolean storeOffsetWithTermVector) throws IOException{
|
||||||
|
if (!isDocumentOpen())
|
||||||
|
throw new IllegalStateException("Cannot open field when no document is open.");
|
||||||
closeField();
|
closeField();
|
||||||
currentField = new TVField(fieldInfos.fieldNumber(field));
|
currentField = new TVField(fieldNumber, storePositionWithTermVector, storeOffsetWithTermVector);
|
||||||
}
|
}
|
||||||
|
|
||||||
/** Finished processing current field. This should be followed by a call to
|
/** Finished processing current field. This should be followed by a call to
|
||||||
|
@ -157,57 +157,80 @@ final class TermVectorsWriter {
|
||||||
* times this term appears in this field, in this document.
|
* times this term appears in this field, in this document.
|
||||||
*/
|
*/
|
||||||
public final void addTerm(String termText, int freq) {
|
public final void addTerm(String termText, int freq) {
|
||||||
if (!isDocumentOpen()) throw new IllegalStateException("Cannot add terms when document is not open");
|
addTerm(termText, freq, null, null);
|
||||||
if (!isFieldOpen()) throw new IllegalStateException("Cannot add terms when field is not open");
|
|
||||||
|
|
||||||
addTermInternal(termText, freq);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
private final void addTermInternal(String termText, int freq) {
|
public final void addTerm(String termText, int freq, int [] positions, TermVectorOffsetInfo [] offsets)
|
||||||
currentField.length += freq;
|
{
|
||||||
|
if (!isDocumentOpen())
|
||||||
|
throw new IllegalStateException("Cannot add terms when document is not open");
|
||||||
|
if (!isFieldOpen())
|
||||||
|
throw new IllegalStateException("Cannot add terms when field is not open");
|
||||||
|
|
||||||
|
addTermInternal(termText, freq, positions, offsets);
|
||||||
|
}
|
||||||
|
|
||||||
|
private final void addTermInternal(String termText, int freq, int [] positions, TermVectorOffsetInfo [] offsets) {
|
||||||
TVTerm term = new TVTerm();
|
TVTerm term = new TVTerm();
|
||||||
term.termText = termText;
|
term.termText = termText;
|
||||||
term.freq = freq;
|
term.freq = freq;
|
||||||
|
term.positions = positions;
|
||||||
|
term.offsets = offsets;
|
||||||
terms.add(term);
|
terms.add(term);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
/** Add specified vectors to the document.
|
* Add a complete document specified by all its term vectors. If document has no
|
||||||
|
* term vectors, add value for tvx.
|
||||||
|
*
|
||||||
|
* @param vectors
|
||||||
|
* @throws IOException
|
||||||
*/
|
*/
|
||||||
public final void addVectors(TermFreqVector[] vectors)
|
public final void addAllDocVectors(TermFreqVector[] vectors)
|
||||||
throws IOException {
|
throws IOException {
|
||||||
if (!isDocumentOpen()) throw new IllegalStateException("Cannot add term vectors when document is not open");
|
openDocument();
|
||||||
if (isFieldOpen()) throw new IllegalStateException("Cannot add term vectors when field is open");
|
|
||||||
|
|
||||||
|
if (vectors != null) {
|
||||||
for (int i = 0; i < vectors.length; i++) {
|
for (int i = 0; i < vectors.length; i++) {
|
||||||
addTermFreqVector(vectors[i]);
|
boolean storePositionWithTermVector = false;
|
||||||
}
|
boolean storeOffsetWithTermVector = false;
|
||||||
}
|
|
||||||
|
|
||||||
|
try {
|
||||||
|
|
||||||
/** Add specified vector to the document. Document must be open but no field
|
TermPositionVector tpVector = (TermPositionVector) vectors[i];
|
||||||
* should be open or exception is thrown. The same document can have <code>addTerm</code>
|
|
||||||
* and <code>addVectors</code> calls mixed, however a given field must either be
|
if (tpVector.size() > 0 && tpVector.getTermPositions(0) != null)
|
||||||
* populated with <code>addTerm</code> or with <code>addVector</code>. *
|
storePositionWithTermVector = true;
|
||||||
*/
|
if (tpVector.size() > 0 && tpVector.getOffsets(0) != null)
|
||||||
public final void addTermFreqVector(TermFreqVector vector)
|
storeOffsetWithTermVector = true;
|
||||||
throws IOException {
|
|
||||||
if (!isDocumentOpen()) throw new IllegalStateException("Cannot add term vector when document is not open");
|
FieldInfo fieldInfo = fieldInfos.fieldInfo(tpVector.getField());
|
||||||
if (isFieldOpen()) throw new IllegalStateException("Cannot add term vector when field is open");
|
openField(fieldInfo.number, storePositionWithTermVector, storeOffsetWithTermVector);
|
||||||
addTermFreqVectorInternal(vector);
|
|
||||||
}
|
for (int j = 0; j < tpVector.size(); j++)
|
||||||
|
addTermInternal(tpVector.getTerms()[j], tpVector.getTermFrequencies()[j], tpVector.getTermPositions(j),
|
||||||
|
tpVector.getOffsets(j));
|
||||||
|
|
||||||
private final void addTermFreqVectorInternal(TermFreqVector vector)
|
|
||||||
throws IOException {
|
|
||||||
openField(vector.getField());
|
|
||||||
for (int i = 0; i < vector.size(); i++) {
|
|
||||||
addTermInternal(vector.getTerms()[i], vector.getTermFrequencies()[i]);
|
|
||||||
}
|
|
||||||
closeField();
|
closeField();
|
||||||
|
|
||||||
|
} catch (ClassCastException ignore) {
|
||||||
|
|
||||||
|
TermFreqVector tfVector = vectors[i];
|
||||||
|
|
||||||
|
FieldInfo fieldInfo = fieldInfos.fieldInfo(tfVector.getField());
|
||||||
|
openField(fieldInfo.number, storePositionWithTermVector, storeOffsetWithTermVector);
|
||||||
|
|
||||||
|
for (int j = 0; j < tfVector.size(); j++)
|
||||||
|
addTermInternal(tfVector.getTerms()[j], tfVector.getTermFrequencies()[j], null, null);
|
||||||
|
|
||||||
|
closeField();
|
||||||
|
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
closeDocument();
|
||||||
|
}
|
||||||
|
|
||||||
/** Close all streams. */
|
/** Close all streams. */
|
||||||
final void close() throws IOException {
|
final void close() throws IOException {
|
||||||
|
@ -245,15 +268,22 @@ final class TermVectorsWriter {
|
||||||
// remember where this field is written
|
// remember where this field is written
|
||||||
currentField.tvfPointer = tvf.getFilePointer();
|
currentField.tvfPointer = tvf.getFilePointer();
|
||||||
//System.out.println("Field Pointer: " + currentField.tvfPointer);
|
//System.out.println("Field Pointer: " + currentField.tvfPointer);
|
||||||
final int size;
|
|
||||||
|
|
||||||
tvf.writeVInt(size = terms.size());
|
final int size = terms.size();
|
||||||
tvf.writeVInt(currentField.length - size);
|
tvf.writeVInt(size);
|
||||||
|
|
||||||
|
boolean storePositions = currentField.storePositions;
|
||||||
|
boolean storeOffsets = currentField.storeOffsets;
|
||||||
|
byte bits = 0x0;
|
||||||
|
if (storePositions)
|
||||||
|
bits |= STORE_POSITIONS_WITH_TERMVECTOR;
|
||||||
|
if (storeOffsets)
|
||||||
|
bits |= STORE_OFFSET_WITH_TERMVECTOR;
|
||||||
|
tvf.writeByte(bits);
|
||||||
|
|
||||||
String lastTermText = "";
|
String lastTermText = "";
|
||||||
// write term ids and positions
|
|
||||||
for (int i = 0; i < size; i++) {
|
for (int i = 0; i < size; i++) {
|
||||||
TVTerm term = (TVTerm) terms.elementAt(i);
|
TVTerm term = (TVTerm) terms.elementAt(i);
|
||||||
//tvf.writeString(term.termText);
|
|
||||||
int start = StringHelper.stringDifference(lastTermText, term.termText);
|
int start = StringHelper.stringDifference(lastTermText, term.termText);
|
||||||
int length = term.termText.length() - start;
|
int length = term.termText.length() - start;
|
||||||
tvf.writeVInt(start); // write shared prefix length
|
tvf.writeVInt(start); // write shared prefix length
|
||||||
|
@ -261,31 +291,51 @@ final class TermVectorsWriter {
|
||||||
tvf.writeChars(term.termText, start, length); // write delta chars
|
tvf.writeChars(term.termText, start, length); // write delta chars
|
||||||
tvf.writeVInt(term.freq);
|
tvf.writeVInt(term.freq);
|
||||||
lastTermText = term.termText;
|
lastTermText = term.termText;
|
||||||
|
|
||||||
|
if(storePositions){
|
||||||
|
if(term.positions == null)
|
||||||
|
throw new IllegalStateException("Trying to write positions that are null!");
|
||||||
|
|
||||||
|
// use delta encoding for positions
|
||||||
|
int position = 0;
|
||||||
|
for (int j = 0; j < term.freq; j++){
|
||||||
|
tvf.writeVInt(term.positions[j] - position);
|
||||||
|
position = term.positions[j];
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if(storeOffsets){
|
||||||
|
if(term.offsets == null)
|
||||||
|
throw new IllegalStateException("Trying to write offsets that are null!");
|
||||||
|
|
||||||
|
// use delta encoding for offsets
|
||||||
|
int position = 0;
|
||||||
|
for (int j = 0; j < term.freq; j++) {
|
||||||
|
tvf.writeVInt(term.offsets[j].getStartOffset() - position);
|
||||||
|
tvf.writeVInt(term.offsets[j].getEndOffset() - term.offsets[j].getStartOffset()); //Save the diff between the two.
|
||||||
|
position = term.offsets[j].getEndOffset();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
private void writeDoc() throws IOException {
|
private void writeDoc() throws IOException {
|
||||||
if (isFieldOpen()) throw new IllegalStateException("Field is still open while writing document");
|
if (isFieldOpen())
|
||||||
|
throw new IllegalStateException("Field is still open while writing document");
|
||||||
//System.out.println("Writing doc pointer: " + currentDocPointer);
|
//System.out.println("Writing doc pointer: " + currentDocPointer);
|
||||||
// write document index record
|
// write document index record
|
||||||
tvx.writeLong(currentDocPointer);
|
tvx.writeLong(currentDocPointer);
|
||||||
|
|
||||||
// write document data record
|
// write document data record
|
||||||
final int size;
|
final int size = fields.size();
|
||||||
|
|
||||||
// write the number of fields
|
// write the number of fields
|
||||||
tvd.writeVInt(size = fields.size());
|
tvd.writeVInt(size);
|
||||||
|
|
||||||
// write field numbers
|
// write field numbers
|
||||||
int lastFieldNumber = 0;
|
|
||||||
for (int i = 0; i < size; i++) {
|
for (int i = 0; i < size; i++) {
|
||||||
TVField field = (TVField) fields.elementAt(i);
|
TVField field = (TVField) fields.elementAt(i);
|
||||||
tvd.writeVInt(field.number - lastFieldNumber);
|
tvd.writeVInt(field.number);
|
||||||
|
|
||||||
lastFieldNumber = field.number;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// write field pointers
|
// write field pointers
|
||||||
|
@ -293,7 +343,6 @@ final class TermVectorsWriter {
|
||||||
for (int i = 0; i < size; i++) {
|
for (int i = 0; i < size; i++) {
|
||||||
TVField field = (TVField) fields.elementAt(i);
|
TVField field = (TVField) fields.elementAt(i);
|
||||||
tvd.writeVLong(field.tvfPointer - lastFieldPointer);
|
tvd.writeVLong(field.tvfPointer - lastFieldPointer);
|
||||||
|
|
||||||
lastFieldPointer = field.tvfPointer;
|
lastFieldPointer = field.tvfPointer;
|
||||||
}
|
}
|
||||||
//System.out.println("After writing doc pointer: " + tvx.getFilePointer());
|
//System.out.println("After writing doc pointer: " + tvx.getFilePointer());
|
||||||
|
@ -303,17 +352,20 @@ final class TermVectorsWriter {
|
||||||
private static class TVField {
|
private static class TVField {
|
||||||
int number;
|
int number;
|
||||||
long tvfPointer = 0;
|
long tvfPointer = 0;
|
||||||
int length = 0; // number of distinct term positions
|
boolean storePositions = false;
|
||||||
|
boolean storeOffsets = false;
|
||||||
TVField(int number) {
|
TVField(int number, boolean storePos, boolean storeOff) {
|
||||||
this.number = number;
|
this.number = number;
|
||||||
|
storePositions = storePos;
|
||||||
|
storeOffsets = storeOff;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
private static class TVTerm {
|
private static class TVTerm {
|
||||||
String termText;
|
String termText;
|
||||||
int freq = 0;
|
int freq = 0;
|
||||||
//int positions[] = null;
|
int positions[] = null;
|
||||||
|
TermVectorOffsetInfo [] offsets = null;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -34,7 +34,7 @@ class DocHelper {
|
||||||
//Fields will be lexicographically sorted. So, the order is: field, text, two
|
//Fields will be lexicographically sorted. So, the order is: field, text, two
|
||||||
public static final int [] FIELD_2_FREQS = {3, 1, 1};
|
public static final int [] FIELD_2_FREQS = {3, 1, 1};
|
||||||
public static final String TEXT_FIELD_2_KEY = "textField2";
|
public static final String TEXT_FIELD_2_KEY = "textField2";
|
||||||
public static Field textField2 = Field.Text(TEXT_FIELD_2_KEY, FIELD_2_TEXT, true);
|
public static Field textField2 = new Field(TEXT_FIELD_2_KEY, FIELD_2_TEXT, Field.Store.YES, Field.Index.TOKENIZED, Field.TermVector.WITH_POSITIONS_OFFSETS);
|
||||||
|
|
||||||
public static final String KEYWORD_TEXT = "Keyword";
|
public static final String KEYWORD_TEXT = "Keyword";
|
||||||
public static final String KEYWORD_FIELD_KEY = "keyField";
|
public static final String KEYWORD_FIELD_KEY = "keyField";
|
||||||
|
@ -135,7 +135,7 @@ class DocHelper {
|
||||||
Enumeration fields = doc.fields();
|
Enumeration fields = doc.fields();
|
||||||
int result = 0;
|
int result = 0;
|
||||||
while (fields.hasMoreElements()) {
|
while (fields.hasMoreElements()) {
|
||||||
fields.nextElement();
|
String name = fields.nextElement().toString();
|
||||||
result++;
|
result++;
|
||||||
}
|
}
|
||||||
return result;
|
return result;
|
||||||
|
|
|
@ -109,6 +109,7 @@ public class TestSegmentMerger extends TestCase {
|
||||||
int [] freqs = vector.getTermFrequencies();
|
int [] freqs = vector.getTermFrequencies();
|
||||||
assertTrue(freqs != null);
|
assertTrue(freqs != null);
|
||||||
//System.out.println("Freqs size: " + freqs.length);
|
//System.out.println("Freqs size: " + freqs.length);
|
||||||
|
assertTrue(vector instanceof TermPositionVector == true);
|
||||||
|
|
||||||
for (int i = 0; i < terms.length; i++) {
|
for (int i = 0; i < terms.length; i++) {
|
||||||
String term = terms[i];
|
String term = terms[i];
|
||||||
|
|
|
@ -178,7 +178,7 @@ public class TestSegmentReader extends TestCase {
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
public void testTermVectors() {
|
public void testTermVectors() throws IOException {
|
||||||
TermFreqVector result = reader.getTermFreqVector(0, DocHelper.TEXT_FIELD_2_KEY);
|
TermFreqVector result = reader.getTermFreqVector(0, DocHelper.TEXT_FIELD_2_KEY);
|
||||||
assertTrue(result != null);
|
assertTrue(result != null);
|
||||||
String [] terms = result.getTerms();
|
String [] terms = result.getTerms();
|
||||||
|
|
|
@ -11,7 +11,11 @@ public class TestTermVectorsReader extends TestCase {
|
||||||
private TermVectorsWriter writer = null;
|
private TermVectorsWriter writer = null;
|
||||||
//Must be lexicographically sorted, will do in setup, versus trying to maintain here
|
//Must be lexicographically sorted, will do in setup, versus trying to maintain here
|
||||||
private String [] testFields = {"f1", "f2", "f3"};
|
private String [] testFields = {"f1", "f2", "f3"};
|
||||||
|
private boolean [] testFieldsStorePos = {true, false, true, false};
|
||||||
|
private boolean [] testFieldsStoreOff = {true, false, false, true};
|
||||||
private String [] testTerms = {"this", "is", "a", "test"};
|
private String [] testTerms = {"this", "is", "a", "test"};
|
||||||
|
private int [][] positions = new int[testTerms.length][];
|
||||||
|
private TermVectorOffsetInfo [][] offsets = new TermVectorOffsetInfo[testTerms.length][];
|
||||||
private RAMDirectory dir = new RAMDirectory();
|
private RAMDirectory dir = new RAMDirectory();
|
||||||
private String seg = "testSegment";
|
private String seg = "testSegment";
|
||||||
private FieldInfos fieldInfos = new FieldInfos();
|
private FieldInfos fieldInfos = new FieldInfos();
|
||||||
|
@ -22,9 +26,22 @@ public class TestTermVectorsReader extends TestCase {
|
||||||
|
|
||||||
protected void setUp() {
|
protected void setUp() {
|
||||||
for (int i = 0; i < testFields.length; i++) {
|
for (int i = 0; i < testFields.length; i++) {
|
||||||
fieldInfos.add(testFields[i], true, true);
|
fieldInfos.add(testFields[i], true, true, testFieldsStorePos[i], testFieldsStoreOff[i]);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
for (int i = 0; i < testTerms.length; i++)
|
||||||
|
{
|
||||||
|
positions[i] = new int[3];
|
||||||
|
for (int j = 0; j < positions[i].length; j++) {
|
||||||
|
// poditions are always sorted in increasing order
|
||||||
|
positions[i][j] = (int)(j * 10 + Math.random() * 10);
|
||||||
|
}
|
||||||
|
offsets[i] = new TermVectorOffsetInfo[3];
|
||||||
|
for (int j = 0; j < offsets[i].length; j++){
|
||||||
|
// ofsets are alway sorted in increasing order
|
||||||
|
offsets[i][j] = new TermVectorOffsetInfo(j * 10, j * 10 + testTerms[i].length());
|
||||||
|
}
|
||||||
|
}
|
||||||
try {
|
try {
|
||||||
Arrays.sort(testTerms);
|
Arrays.sort(testTerms);
|
||||||
for (int j = 0; j < 5; j++) {
|
for (int j = 0; j < 5; j++) {
|
||||||
|
@ -34,7 +51,7 @@ public class TestTermVectorsReader extends TestCase {
|
||||||
for (int k = 0; k < testFields.length; k++) {
|
for (int k = 0; k < testFields.length; k++) {
|
||||||
writer.openField(testFields[k]);
|
writer.openField(testFields[k]);
|
||||||
for (int i = 0; i < testTerms.length; i++) {
|
for (int i = 0; i < testTerms.length; i++) {
|
||||||
writer.addTerm(testTerms[i], i);
|
writer.addTerm(testTerms[i], 3, positions[i], offsets[i]);
|
||||||
}
|
}
|
||||||
writer.closeField();
|
writer.closeField();
|
||||||
}
|
}
|
||||||
|
@ -80,6 +97,103 @@ public class TestTermVectorsReader extends TestCase {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public void testPositionReader() {
|
||||||
|
try {
|
||||||
|
TermVectorsReader reader = new TermVectorsReader(dir, seg, fieldInfos);
|
||||||
|
assertTrue(reader != null);
|
||||||
|
TermPositionVector vector;
|
||||||
|
String [] terms;
|
||||||
|
vector = (TermPositionVector)reader.get(0, testFields[0]);
|
||||||
|
assertTrue(vector != null);
|
||||||
|
terms = vector.getTerms();
|
||||||
|
assertTrue(terms != null);
|
||||||
|
assertTrue(terms.length == testTerms.length);
|
||||||
|
for (int i = 0; i < terms.length; i++) {
|
||||||
|
String term = terms[i];
|
||||||
|
//System.out.println("Term: " + term);
|
||||||
|
assertTrue(term.equals(testTerms[i]));
|
||||||
|
int [] positions = vector.getTermPositions(i);
|
||||||
|
assertTrue(positions != null);
|
||||||
|
assertTrue(positions.length == this.positions[i].length);
|
||||||
|
for (int j = 0; j < positions.length; j++) {
|
||||||
|
int position = positions[j];
|
||||||
|
assertTrue(position == this.positions[i][j]);
|
||||||
|
}
|
||||||
|
TermVectorOffsetInfo [] offset = vector.getOffsets(i);
|
||||||
|
assertTrue(offset != null);
|
||||||
|
assertTrue(offset.length == this.offsets[i].length);
|
||||||
|
for (int j = 0; j < offset.length; j++) {
|
||||||
|
TermVectorOffsetInfo termVectorOffsetInfo = offset[j];
|
||||||
|
assertTrue(termVectorOffsetInfo.equals(offsets[i][j]));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
TermFreqVector freqVector = (TermFreqVector)reader.get(0, testFields[1]); //no pos, no offset
|
||||||
|
assertTrue(freqVector != null);
|
||||||
|
assertTrue(freqVector instanceof TermPositionVector == false);
|
||||||
|
terms = freqVector.getTerms();
|
||||||
|
assertTrue(terms != null);
|
||||||
|
assertTrue(terms.length == testTerms.length);
|
||||||
|
for (int i = 0; i < terms.length; i++) {
|
||||||
|
String term = terms[i];
|
||||||
|
//System.out.println("Term: " + term);
|
||||||
|
assertTrue(term.equals(testTerms[i]));
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
} catch (IOException e) {
|
||||||
|
e.printStackTrace();
|
||||||
|
assertTrue(false);
|
||||||
|
}
|
||||||
|
catch (ClassCastException cce)
|
||||||
|
{
|
||||||
|
cce.printStackTrace();
|
||||||
|
assertTrue(false);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public void testOffsetReader() {
|
||||||
|
try {
|
||||||
|
TermVectorsReader reader = new TermVectorsReader(dir, seg, fieldInfos);
|
||||||
|
assertTrue(reader != null);
|
||||||
|
TermPositionVector vector = (TermPositionVector)reader.get(0, testFields[0]);
|
||||||
|
assertTrue(vector != null);
|
||||||
|
String [] terms = vector.getTerms();
|
||||||
|
assertTrue(terms != null);
|
||||||
|
assertTrue(terms.length == testTerms.length);
|
||||||
|
for (int i = 0; i < terms.length; i++) {
|
||||||
|
String term = terms[i];
|
||||||
|
//System.out.println("Term: " + term);
|
||||||
|
assertTrue(term.equals(testTerms[i]));
|
||||||
|
int [] positions = vector.getTermPositions(i);
|
||||||
|
assertTrue(positions != null);
|
||||||
|
assertTrue(positions.length == this.positions[i].length);
|
||||||
|
for (int j = 0; j < positions.length; j++) {
|
||||||
|
int position = positions[j];
|
||||||
|
assertTrue(position == this.positions[i][j]);
|
||||||
|
}
|
||||||
|
TermVectorOffsetInfo [] offset = vector.getOffsets(i);
|
||||||
|
assertTrue(offset != null);
|
||||||
|
assertTrue(offset.length == this.offsets[i].length);
|
||||||
|
for (int j = 0; j < offset.length; j++) {
|
||||||
|
TermVectorOffsetInfo termVectorOffsetInfo = offset[j];
|
||||||
|
assertTrue(termVectorOffsetInfo.equals(offsets[i][j]));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
} catch (IOException e) {
|
||||||
|
e.printStackTrace();
|
||||||
|
assertTrue(false);
|
||||||
|
}
|
||||||
|
catch (ClassCastException cce)
|
||||||
|
{
|
||||||
|
cce.printStackTrace();
|
||||||
|
assertTrue(false);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Make sure exceptions and bad params are handled appropriately
|
* Make sure exceptions and bad params are handled appropriately
|
||||||
*/
|
*/
|
||||||
|
@ -89,9 +203,9 @@ public class TestTermVectorsReader extends TestCase {
|
||||||
assertTrue(reader != null);
|
assertTrue(reader != null);
|
||||||
//Bad document number, good field number
|
//Bad document number, good field number
|
||||||
TermFreqVector vector = reader.get(50, testFields[0]);
|
TermFreqVector vector = reader.get(50, testFields[0]);
|
||||||
assertTrue(vector == null);
|
|
||||||
} catch (Exception e) {
|
|
||||||
assertTrue(false);
|
assertTrue(false);
|
||||||
|
} catch (Exception e) {
|
||||||
|
assertTrue(true);
|
||||||
}
|
}
|
||||||
try {
|
try {
|
||||||
TermVectorsReader reader = new TermVectorsReader(dir, seg, fieldInfos);
|
TermVectorsReader reader = new TermVectorsReader(dir, seg, fieldInfos);
|
||||||
|
|
|
@ -44,7 +44,7 @@ public class TestTermVectorsWriter extends TestCase {
|
||||||
for (int i = 0; i < testTerms.length; i++) {
|
for (int i = 0; i < testTerms.length; i++) {
|
||||||
positions[i] = new int[5];
|
positions[i] = new int[5];
|
||||||
for (int j = 0; j < positions[i].length; j++) {
|
for (int j = 0; j < positions[i].length; j++) {
|
||||||
positions[i][j] = i * 100;
|
positions[i][j] = j * 10;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -107,7 +107,7 @@ public class TestTermVectorsWriter extends TestCase {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
private void checkTermVector(TermVectorsReader reader, int docNum, String field) {
|
private void checkTermVector(TermVectorsReader reader, int docNum, String field) throws IOException {
|
||||||
TermFreqVector vector = reader.get(docNum, field);
|
TermFreqVector vector = reader.get(docNum, field);
|
||||||
assertTrue(vector != null);
|
assertTrue(vector != null);
|
||||||
String[] terms = vector.getTerms();
|
String[] terms = vector.getTerms();
|
||||||
|
|
|
@ -43,8 +43,23 @@ public class TestTermVectors extends TestCase {
|
||||||
//writer.infoStream = System.out;
|
//writer.infoStream = System.out;
|
||||||
for (int i = 0; i < 1000; i++) {
|
for (int i = 0; i < 1000; i++) {
|
||||||
Document doc = new Document();
|
Document doc = new Document();
|
||||||
|
Field.TermVector termVector;
|
||||||
|
int mod3 = i % 3;
|
||||||
|
int mod2 = i % 2;
|
||||||
|
if (mod2 == 0 && mod3 == 0){
|
||||||
|
termVector = Field.TermVector.WITH_POSITIONS_OFFSETS;
|
||||||
|
}
|
||||||
|
else if (mod2 == 0){
|
||||||
|
termVector = Field.TermVector.WITH_POSITIONS;
|
||||||
|
}
|
||||||
|
else if (mod3 == 0){
|
||||||
|
termVector = Field.TermVector.WITH_OFFSETS;
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
termVector = Field.TermVector.YES;
|
||||||
|
}
|
||||||
doc.add(new Field("field", English.intToEnglish(i),
|
doc.add(new Field("field", English.intToEnglish(i),
|
||||||
Field.Store.YES, Field.Index.TOKENIZED, Field.TermVector.YES));
|
Field.Store.YES, Field.Index.TOKENIZED, termVector));
|
||||||
writer.addDocument(doc);
|
writer.addDocument(doc);
|
||||||
}
|
}
|
||||||
writer.close();
|
writer.close();
|
||||||
|
@ -70,17 +85,74 @@ public class TestTermVectors extends TestCase {
|
||||||
TermFreqVector [] vector = searcher.reader.getTermFreqVectors(hits.id(i));
|
TermFreqVector [] vector = searcher.reader.getTermFreqVectors(hits.id(i));
|
||||||
assertTrue(vector != null);
|
assertTrue(vector != null);
|
||||||
assertTrue(vector.length == 1);
|
assertTrue(vector.length == 1);
|
||||||
//assertTrue();
|
|
||||||
}
|
}
|
||||||
TermFreqVector [] vector = searcher.reader.getTermFreqVectors(hits.id(50));
|
|
||||||
//System.out.println("Explain: " + searcher.explain(query, hits.id(50)));
|
|
||||||
//System.out.println("Vector: " + vector[0].toString());
|
|
||||||
} catch (IOException e) {
|
} catch (IOException e) {
|
||||||
assertTrue(false);
|
assertTrue(false);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
public void testTermPositionVectors() {
|
public void testTermPositionVectors() {
|
||||||
|
Query query = new TermQuery(new Term("field", "zero"));
|
||||||
|
try {
|
||||||
|
Hits hits = searcher.search(query);
|
||||||
|
assertEquals(1, hits.length());
|
||||||
|
|
||||||
|
for (int i = 0; i < hits.length(); i++)
|
||||||
|
{
|
||||||
|
TermFreqVector [] vector = searcher.reader.getTermFreqVectors(hits.id(i));
|
||||||
|
assertTrue(vector != null);
|
||||||
|
assertTrue(vector.length == 1);
|
||||||
|
|
||||||
|
boolean shouldBePosVector = (hits.id(i) % 2 == 0) ? true : false;
|
||||||
|
assertTrue((shouldBePosVector == false) || (shouldBePosVector == true && (vector[0] instanceof TermPositionVector == true)));
|
||||||
|
|
||||||
|
boolean shouldBeOffVector = (hits.id(i) % 3 == 0) ? true : false;
|
||||||
|
assertTrue((shouldBeOffVector == false) || (shouldBeOffVector == true && (vector[0] instanceof TermPositionVector == true)));
|
||||||
|
|
||||||
|
if(shouldBePosVector || shouldBeOffVector){
|
||||||
|
TermPositionVector posVec = (TermPositionVector)vector[0];
|
||||||
|
String [] terms = posVec.getTerms();
|
||||||
|
assertTrue(terms != null && terms.length > 0);
|
||||||
|
|
||||||
|
for (int j = 0; j < terms.length; j++) {
|
||||||
|
int [] positions = posVec.getTermPositions(j);
|
||||||
|
TermVectorOffsetInfo [] offsets = posVec.getOffsets(j);
|
||||||
|
|
||||||
|
if(shouldBePosVector){
|
||||||
|
assertTrue(positions != null);
|
||||||
|
assertTrue(positions.length > 0);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
assertTrue(positions == null);
|
||||||
|
|
||||||
|
if(shouldBeOffVector){
|
||||||
|
assertTrue(offsets != null);
|
||||||
|
assertTrue(offsets.length > 0);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
assertTrue(offsets == null);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else{
|
||||||
|
try{
|
||||||
|
TermPositionVector posVec = (TermPositionVector)vector[0];
|
||||||
|
assertTrue(false);
|
||||||
|
}
|
||||||
|
catch(ClassCastException ignore){
|
||||||
|
TermFreqVector freqVec = vector[0];
|
||||||
|
String [] terms = freqVec.getTerms();
|
||||||
|
assertTrue(terms != null && terms.length > 0);
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
} catch (IOException e) {
|
||||||
|
assertTrue(false);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public void testTermOffsetVectors() {
|
||||||
Query query = new TermQuery(new Term("field", "fifty"));
|
Query query = new TermQuery(new Term("field", "fifty"));
|
||||||
try {
|
try {
|
||||||
Hits hits = searcher.search(query);
|
Hits hits = searcher.search(query);
|
||||||
|
@ -91,6 +163,7 @@ public class TestTermVectors extends TestCase {
|
||||||
TermFreqVector [] vector = searcher.reader.getTermFreqVectors(hits.id(i));
|
TermFreqVector [] vector = searcher.reader.getTermFreqVectors(hits.id(i));
|
||||||
assertTrue(vector != null);
|
assertTrue(vector != null);
|
||||||
assertTrue(vector.length == 1);
|
assertTrue(vector.length == 1);
|
||||||
|
|
||||||
//assertTrue();
|
//assertTrue();
|
||||||
}
|
}
|
||||||
} catch (IOException e) {
|
} catch (IOException e) {
|
||||||
|
@ -164,7 +237,7 @@ public class TestTermVectors extends TestCase {
|
||||||
int [] freqs = vector.getTermFrequencies();
|
int [] freqs = vector.getTermFrequencies();
|
||||||
for (int i = 0; i < vTerms.length; i++)
|
for (int i = 0; i < vTerms.length; i++)
|
||||||
{
|
{
|
||||||
if (term.text().equals(vTerms[i]) == true)
|
if (term.text().equals(vTerms[i]))
|
||||||
{
|
{
|
||||||
assertTrue(freqs[i] == freq);
|
assertTrue(freqs[i] == freq);
|
||||||
}
|
}
|
||||||
|
@ -184,9 +257,9 @@ public class TestTermVectors extends TestCase {
|
||||||
System.out.println("Explain: " + knownSearcher.explain(query, hits.id(1)));
|
System.out.println("Explain: " + knownSearcher.explain(query, hits.id(1)));
|
||||||
System.out.println("Hit 2: " + hits.id(2) + " Score: " + hits.score(2) + " String: " + hits.doc(2).toString());
|
System.out.println("Hit 2: " + hits.id(2) + " Score: " + hits.score(2) + " String: " + hits.doc(2).toString());
|
||||||
System.out.println("Explain: " + knownSearcher.explain(query, hits.id(2)));*/
|
System.out.println("Explain: " + knownSearcher.explain(query, hits.id(2)));*/
|
||||||
assertTrue(testDoc3.toString().equals(hits.doc(0).toString()));
|
assertTrue(hits.id(0) == 2);
|
||||||
assertTrue(testDoc4.toString().equals(hits.doc(1).toString()));
|
assertTrue(hits.id(1) == 3);
|
||||||
assertTrue(testDoc1.toString().equals(hits.doc(2).toString()));
|
assertTrue(hits.id(2) == 0);
|
||||||
TermFreqVector vector = knownSearcher.reader.getTermFreqVector(hits.id(1), "field");
|
TermFreqVector vector = knownSearcher.reader.getTermFreqVector(hits.id(1), "field");
|
||||||
assertTrue(vector != null);
|
assertTrue(vector != null);
|
||||||
//System.out.println("Vector: " + vector);
|
//System.out.println("Vector: " + vector);
|
||||||
|
|
Loading…
Reference in New Issue