mirror of https://github.com/apache/lucene.git
LUCENE-2048: omit positions but keep term freq
git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1145594 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
dfc5ce1cff
commit
1c646d24c9
|
@ -523,6 +523,15 @@ New Features
|
||||||
(grow on demand if you set/get/clear too-large indices). (Mike
|
(grow on demand if you set/get/clear too-large indices). (Mike
|
||||||
McCandless)
|
McCandless)
|
||||||
|
|
||||||
|
* LUCENE-2048: Added the ability to omit positions but still index
|
||||||
|
term frequencies, you can now control what is indexed into
|
||||||
|
the postings via AbstractField.setIndexOptions:
|
||||||
|
DOCS_ONLY: only documents are indexed: term frequencies and positions are omitted
|
||||||
|
DOCS_AND_FREQS: only documents and term frequencies are indexed: positions are omitted
|
||||||
|
DOCS_AND_FREQS_AND_POSITIONS: full postings: documents, frequencies, and positions
|
||||||
|
AbstractField.setOmitTermFrequenciesAndPositions is deprecated,
|
||||||
|
you should use DOCS_ONLY instead. (Robert Muir)
|
||||||
|
|
||||||
Optimizations
|
Optimizations
|
||||||
|
|
||||||
* LUCENE-3201, LUCENE-3218: CompoundFileSystem code has been consolidated
|
* LUCENE-3201, LUCENE-3218: CompoundFileSystem code has been consolidated
|
||||||
|
|
|
@ -22,6 +22,7 @@ import org.apache.lucene.analysis.standard.StandardAnalyzer;
|
||||||
import org.apache.lucene.document.Document;
|
import org.apache.lucene.document.Document;
|
||||||
import org.apache.lucene.document.Field;
|
import org.apache.lucene.document.Field;
|
||||||
import org.apache.lucene.document.NumericField;
|
import org.apache.lucene.document.NumericField;
|
||||||
|
import org.apache.lucene.index.FieldInfo.IndexOptions;
|
||||||
import org.apache.lucene.index.IndexWriter;
|
import org.apache.lucene.index.IndexWriter;
|
||||||
import org.apache.lucene.index.IndexWriterConfig.OpenMode;
|
import org.apache.lucene.index.IndexWriterConfig.OpenMode;
|
||||||
import org.apache.lucene.index.IndexWriterConfig;
|
import org.apache.lucene.index.IndexWriterConfig;
|
||||||
|
@ -173,7 +174,7 @@ public class IndexFiles {
|
||||||
// the field into separate words and don't index term frequency
|
// the field into separate words and don't index term frequency
|
||||||
// or positional information:
|
// or positional information:
|
||||||
Field pathField = new Field("path", file.getPath(), Field.Store.YES, Field.Index.NOT_ANALYZED_NO_NORMS);
|
Field pathField = new Field("path", file.getPath(), Field.Store.YES, Field.Index.NOT_ANALYZED_NO_NORMS);
|
||||||
pathField.setOmitTermFreqAndPositions(true);
|
pathField.setIndexOptions(IndexOptions.DOCS_ONLY);
|
||||||
doc.add(pathField);
|
doc.add(pathField);
|
||||||
|
|
||||||
// Add the last modified date of the file a field named "modified".
|
// Add the last modified date of the file a field named "modified".
|
||||||
|
|
|
@ -87,9 +87,7 @@ public class TestNRTManager extends LuceneTestCase {
|
||||||
if (field1.getOmitNorms()) {
|
if (field1.getOmitNorms()) {
|
||||||
field2.setOmitNorms(true);
|
field2.setOmitNorms(true);
|
||||||
}
|
}
|
||||||
if (field1.getOmitTermFreqAndPositions()) {
|
field2.setIndexOptions(field1.getIndexOptions());
|
||||||
field2.setOmitTermFreqAndPositions(true);
|
|
||||||
}
|
|
||||||
doc2.add(field2);
|
doc2.add(field2);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -49,7 +49,7 @@ import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
|
||||||
* <pre>
|
* <pre>
|
||||||
* Field field = new Field(name, new NumericTokenStream(precisionStep).setIntValue(value));
|
* Field field = new Field(name, new NumericTokenStream(precisionStep).setIntValue(value));
|
||||||
* field.setOmitNorms(true);
|
* field.setOmitNorms(true);
|
||||||
* field.setOmitTermFreqAndPositions(true);
|
* field.setIndexOptions(IndexOptions.DOCS_ONLY);
|
||||||
* document.add(field);
|
* document.add(field);
|
||||||
* </pre>
|
* </pre>
|
||||||
*
|
*
|
||||||
|
@ -60,7 +60,7 @@ import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
|
||||||
* NumericTokenStream stream = new NumericTokenStream(precisionStep);
|
* NumericTokenStream stream = new NumericTokenStream(precisionStep);
|
||||||
* Field field = new Field(name, stream);
|
* Field field = new Field(name, stream);
|
||||||
* field.setOmitNorms(true);
|
* field.setOmitNorms(true);
|
||||||
* field.setOmitTermFreqAndPositions(true);
|
* field.setIndexOptions(IndexOptions.DOCS_ONLY);
|
||||||
* Document document = new Document();
|
* Document document = new Document();
|
||||||
* document.add(field);
|
* document.add(field);
|
||||||
*
|
*
|
||||||
|
|
|
@ -18,6 +18,7 @@ package org.apache.lucene.document;
|
||||||
import org.apache.lucene.search.PhraseQuery; // for javadocs
|
import org.apache.lucene.search.PhraseQuery; // for javadocs
|
||||||
import org.apache.lucene.search.spans.SpanQuery; // for javadocs
|
import org.apache.lucene.search.spans.SpanQuery; // for javadocs
|
||||||
import org.apache.lucene.analysis.TokenStream;
|
import org.apache.lucene.analysis.TokenStream;
|
||||||
|
import org.apache.lucene.index.FieldInfo.IndexOptions;
|
||||||
import org.apache.lucene.index.FieldInvertState; // for javadocs
|
import org.apache.lucene.index.FieldInvertState; // for javadocs
|
||||||
import org.apache.lucene.index.values.PerDocFieldValues;
|
import org.apache.lucene.index.values.PerDocFieldValues;
|
||||||
import org.apache.lucene.index.values.ValueType;
|
import org.apache.lucene.index.values.ValueType;
|
||||||
|
@ -39,7 +40,7 @@ public abstract class AbstractField implements Fieldable {
|
||||||
protected boolean isTokenized = true;
|
protected boolean isTokenized = true;
|
||||||
protected boolean isBinary = false;
|
protected boolean isBinary = false;
|
||||||
protected boolean lazy = false;
|
protected boolean lazy = false;
|
||||||
protected boolean omitTermFreqAndPositions = false;
|
protected IndexOptions indexOptions = IndexOptions.DOCS_AND_FREQS_AND_POSITIONS;
|
||||||
protected float boost = 1.0f;
|
protected float boost = 1.0f;
|
||||||
// the data object for all different kind of field values
|
// the data object for all different kind of field values
|
||||||
protected Object fieldsData = null;
|
protected Object fieldsData = null;
|
||||||
|
@ -50,7 +51,6 @@ public abstract class AbstractField implements Fieldable {
|
||||||
protected int binaryOffset;
|
protected int binaryOffset;
|
||||||
protected PerDocFieldValues docValues;
|
protected PerDocFieldValues docValues;
|
||||||
|
|
||||||
|
|
||||||
protected AbstractField()
|
protected AbstractField()
|
||||||
{
|
{
|
||||||
}
|
}
|
||||||
|
@ -208,8 +208,8 @@ public abstract class AbstractField implements Fieldable {
|
||||||
/** True if norms are omitted for this indexed field */
|
/** True if norms are omitted for this indexed field */
|
||||||
public boolean getOmitNorms() { return omitNorms; }
|
public boolean getOmitNorms() { return omitNorms; }
|
||||||
|
|
||||||
/** @see #setOmitTermFreqAndPositions */
|
/** @see #setIndexOptions */
|
||||||
public boolean getOmitTermFreqAndPositions() { return omitTermFreqAndPositions; }
|
public IndexOptions getIndexOptions() { return indexOptions; }
|
||||||
|
|
||||||
/** Expert:
|
/** Expert:
|
||||||
*
|
*
|
||||||
|
@ -220,7 +220,7 @@ public abstract class AbstractField implements Fieldable {
|
||||||
|
|
||||||
/** Expert:
|
/** Expert:
|
||||||
*
|
*
|
||||||
* If set, omit term freq, positions and payloads from
|
* If set, omit term freq, and optionally also positions and payloads from
|
||||||
* postings for this field.
|
* postings for this field.
|
||||||
*
|
*
|
||||||
* <p><b>NOTE</b>: While this option reduces storage space
|
* <p><b>NOTE</b>: While this option reduces storage space
|
||||||
|
@ -229,7 +229,7 @@ public abstract class AbstractField implements Fieldable {
|
||||||
* PhraseQuery} or {@link SpanQuery} subclasses will
|
* PhraseQuery} or {@link SpanQuery} subclasses will
|
||||||
* silently fail to find results.
|
* silently fail to find results.
|
||||||
*/
|
*/
|
||||||
public void setOmitTermFreqAndPositions(boolean omitTermFreqAndPositions) { this.omitTermFreqAndPositions=omitTermFreqAndPositions; }
|
public void setIndexOptions(IndexOptions indexOptions) { this.indexOptions=indexOptions; }
|
||||||
|
|
||||||
public boolean isLazy() {
|
public boolean isLazy() {
|
||||||
return lazy;
|
return lazy;
|
||||||
|
@ -275,8 +275,9 @@ public abstract class AbstractField implements Fieldable {
|
||||||
if (omitNorms) {
|
if (omitNorms) {
|
||||||
result.append(",omitNorms");
|
result.append(",omitNorms");
|
||||||
}
|
}
|
||||||
if (omitTermFreqAndPositions) {
|
if (indexOptions != IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) {
|
||||||
result.append(",omitTermFreqAndPositions");
|
result.append(",indexOptions=");
|
||||||
|
result.append(indexOptions);
|
||||||
}
|
}
|
||||||
if (lazy){
|
if (lazy){
|
||||||
result.append(",lazy");
|
result.append(",lazy");
|
||||||
|
|
|
@ -20,6 +20,7 @@ package org.apache.lucene.document;
|
||||||
import java.io.Reader;
|
import java.io.Reader;
|
||||||
|
|
||||||
import org.apache.lucene.analysis.TokenStream;
|
import org.apache.lucene.analysis.TokenStream;
|
||||||
|
import org.apache.lucene.index.FieldInfo.IndexOptions;
|
||||||
import org.apache.lucene.index.IndexWriter;
|
import org.apache.lucene.index.IndexWriter;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -389,7 +390,8 @@ public final class Field extends AbstractField implements Fieldable {
|
||||||
this.isTokenized = index.isAnalyzed();
|
this.isTokenized = index.isAnalyzed();
|
||||||
this.omitNorms = index.omitNorms();
|
this.omitNorms = index.omitNorms();
|
||||||
if (index == Index.NO) {
|
if (index == Index.NO) {
|
||||||
this.omitTermFreqAndPositions = false;
|
// note: now this reads even wierder than before
|
||||||
|
this.indexOptions = IndexOptions.DOCS_AND_FREQS_AND_POSITIONS;
|
||||||
}
|
}
|
||||||
|
|
||||||
this.isBinary = false;
|
this.isBinary = false;
|
||||||
|
@ -520,7 +522,7 @@ public final class Field extends AbstractField implements Fieldable {
|
||||||
isStored = true;
|
isStored = true;
|
||||||
isIndexed = false;
|
isIndexed = false;
|
||||||
isTokenized = false;
|
isTokenized = false;
|
||||||
omitTermFreqAndPositions = false;
|
indexOptions = IndexOptions.DOCS_AND_FREQS_AND_POSITIONS;
|
||||||
omitNorms = true;
|
omitNorms = true;
|
||||||
|
|
||||||
isBinary = true;
|
isBinary = true;
|
||||||
|
|
|
@ -17,6 +17,7 @@ package org.apache.lucene.document;
|
||||||
*/
|
*/
|
||||||
|
|
||||||
import org.apache.lucene.analysis.TokenStream;
|
import org.apache.lucene.analysis.TokenStream;
|
||||||
|
import org.apache.lucene.index.FieldInfo.IndexOptions;
|
||||||
import org.apache.lucene.index.FieldInvertState; // for javadocs
|
import org.apache.lucene.index.FieldInvertState; // for javadocs
|
||||||
import org.apache.lucene.index.values.IndexDocValues;
|
import org.apache.lucene.index.values.IndexDocValues;
|
||||||
import org.apache.lucene.index.values.PerDocFieldValues;
|
import org.apache.lucene.index.values.PerDocFieldValues;
|
||||||
|
@ -194,12 +195,12 @@ public interface Fieldable {
|
||||||
*/
|
*/
|
||||||
abstract byte[] getBinaryValue(byte[] result);
|
abstract byte[] getBinaryValue(byte[] result);
|
||||||
|
|
||||||
/** @see #setOmitTermFreqAndPositions */
|
/** @see #setIndexOptions */
|
||||||
boolean getOmitTermFreqAndPositions();
|
IndexOptions getIndexOptions();
|
||||||
|
|
||||||
/** Expert:
|
/** Expert:
|
||||||
*
|
*
|
||||||
* If set, omit term freq, positions and payloads from
|
* If set, omit term freq, and optionally positions and payloads from
|
||||||
* postings for this field.
|
* postings for this field.
|
||||||
*
|
*
|
||||||
* <p><b>NOTE</b>: While this option reduces storage space
|
* <p><b>NOTE</b>: While this option reduces storage space
|
||||||
|
@ -208,7 +209,7 @@ public interface Fieldable {
|
||||||
* PhraseQuery} or {@link SpanQuery} subclasses will
|
* PhraseQuery} or {@link SpanQuery} subclasses will
|
||||||
* fail with an exception.
|
* fail with an exception.
|
||||||
*/
|
*/
|
||||||
void setOmitTermFreqAndPositions(boolean omitTermFreqAndPositions);
|
void setIndexOptions(IndexOptions indexOptions);
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Returns the {@link PerDocFieldValues}
|
* Returns the {@link PerDocFieldValues}
|
||||||
|
|
|
@ -21,6 +21,7 @@ import java.io.Reader;
|
||||||
|
|
||||||
import org.apache.lucene.analysis.TokenStream;
|
import org.apache.lucene.analysis.TokenStream;
|
||||||
import org.apache.lucene.analysis.NumericTokenStream;
|
import org.apache.lucene.analysis.NumericTokenStream;
|
||||||
|
import org.apache.lucene.index.FieldInfo.IndexOptions;
|
||||||
import org.apache.lucene.util.NumericUtils;
|
import org.apache.lucene.util.NumericUtils;
|
||||||
import org.apache.lucene.search.NumericRangeQuery; // javadocs
|
import org.apache.lucene.search.NumericRangeQuery; // javadocs
|
||||||
import org.apache.lucene.search.NumericRangeFilter; // javadocs
|
import org.apache.lucene.search.NumericRangeFilter; // javadocs
|
||||||
|
@ -192,7 +193,7 @@ public final class NumericField extends AbstractField {
|
||||||
public NumericField(String name, int precisionStep, Field.Store store, boolean index) {
|
public NumericField(String name, int precisionStep, Field.Store store, boolean index) {
|
||||||
super(name, store, index ? Field.Index.ANALYZED_NO_NORMS : Field.Index.NO, Field.TermVector.NO);
|
super(name, store, index ? Field.Index.ANALYZED_NO_NORMS : Field.Index.NO, Field.TermVector.NO);
|
||||||
this.precisionStep = precisionStep;
|
this.precisionStep = precisionStep;
|
||||||
setOmitTermFreqAndPositions(true);
|
setIndexOptions(IndexOptions.DOCS_ONLY);
|
||||||
}
|
}
|
||||||
|
|
||||||
/** Returns a {@link NumericTokenStream} for indexing the numeric value. */
|
/** Returns a {@link NumericTokenStream} for indexing the numeric value. */
|
||||||
|
|
|
@ -186,8 +186,8 @@ public class CheckIndex {
|
||||||
int numFields;
|
int numFields;
|
||||||
|
|
||||||
/** True if at least one of the fields in this segment
|
/** True if at least one of the fields in this segment
|
||||||
* does not omitTermFreqAndPositions.
|
* has position data
|
||||||
* @see AbstractField#setOmitTermFreqAndPositions */
|
* @see AbstractField#setIndexOptions(org.apache.lucene.index.FieldInfo.IndexOptions) */
|
||||||
public boolean hasProx;
|
public boolean hasProx;
|
||||||
|
|
||||||
/** Map that includes certain
|
/** Map that includes certain
|
||||||
|
|
|
@ -233,7 +233,7 @@ final class DocFieldProcessor extends DocConsumer {
|
||||||
// easily add it
|
// easily add it
|
||||||
FieldInfo fi = fieldInfos.addOrUpdate(fieldName, field.isIndexed(), field.isTermVectorStored(),
|
FieldInfo fi = fieldInfos.addOrUpdate(fieldName, field.isIndexed(), field.isTermVectorStored(),
|
||||||
field.isStorePositionWithTermVector(), field.isStoreOffsetWithTermVector(),
|
field.isStorePositionWithTermVector(), field.isStoreOffsetWithTermVector(),
|
||||||
field.getOmitNorms(), false, field.getOmitTermFreqAndPositions(), field.docValuesType());
|
field.getOmitNorms(), false, field.getIndexOptions(), field.docValuesType());
|
||||||
|
|
||||||
fp = new DocFieldProcessorPerField(this, fi);
|
fp = new DocFieldProcessorPerField(this, fi);
|
||||||
fp.next = fieldHash[hashPos];
|
fp.next = fieldHash[hashPos];
|
||||||
|
@ -245,7 +245,7 @@ final class DocFieldProcessor extends DocConsumer {
|
||||||
} else {
|
} else {
|
||||||
fieldInfos.addOrUpdate(fp.fieldInfo.name, field.isIndexed(), field.isTermVectorStored(),
|
fieldInfos.addOrUpdate(fp.fieldInfo.name, field.isIndexed(), field.isTermVectorStored(),
|
||||||
field.isStorePositionWithTermVector(), field.isStoreOffsetWithTermVector(),
|
field.isStorePositionWithTermVector(), field.isStoreOffsetWithTermVector(),
|
||||||
field.getOmitNorms(), false, field.getOmitTermFreqAndPositions(), field.docValuesType());
|
field.getOmitNorms(), false, field.getIndexOptions(), field.docValuesType());
|
||||||
}
|
}
|
||||||
|
|
||||||
if (thisFieldGen != fp.lastGen) {
|
if (thisFieldGen != fp.lastGen) {
|
||||||
|
|
|
@ -35,14 +35,27 @@ public final class FieldInfo {
|
||||||
boolean storePositionWithTermVector;
|
boolean storePositionWithTermVector;
|
||||||
|
|
||||||
public boolean omitNorms; // omit norms associated with indexed fields
|
public boolean omitNorms; // omit norms associated with indexed fields
|
||||||
public boolean omitTermFreqAndPositions;
|
public IndexOptions indexOptions;
|
||||||
|
|
||||||
public boolean storePayloads; // whether this field stores payloads together with term positions
|
public boolean storePayloads; // whether this field stores payloads together with term positions
|
||||||
private int codecId = UNASSIGNED_CODEC_ID; // set inside SegmentCodecs#build() during segment flush - this is used to identify the codec used to write this field
|
private int codecId = UNASSIGNED_CODEC_ID; // set inside SegmentCodecs#build() during segment flush - this is used to identify the codec used to write this field
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Controls how much information is stored in the postings lists.
|
||||||
|
* @lucene.experimental
|
||||||
|
*/
|
||||||
|
public static enum IndexOptions {
|
||||||
|
/** only documents are indexed: term frequencies and positions are omitted */
|
||||||
|
DOCS_ONLY,
|
||||||
|
/** only documents and term frequencies are indexed: positions are omitted */
|
||||||
|
DOCS_AND_FREQS,
|
||||||
|
/** full postings: documents, frequencies, and positions */
|
||||||
|
DOCS_AND_FREQS_AND_POSITIONS
|
||||||
|
};
|
||||||
|
|
||||||
FieldInfo(String na, boolean tk, int nu, boolean storeTermVector,
|
FieldInfo(String na, boolean tk, int nu, boolean storeTermVector,
|
||||||
boolean storePositionWithTermVector, boolean storeOffsetWithTermVector,
|
boolean storePositionWithTermVector, boolean storeOffsetWithTermVector,
|
||||||
boolean omitNorms, boolean storePayloads, boolean omitTermFreqAndPositions, ValueType docValues) {
|
boolean omitNorms, boolean storePayloads, IndexOptions indexOptions, ValueType docValues) {
|
||||||
name = na;
|
name = na;
|
||||||
isIndexed = tk;
|
isIndexed = tk;
|
||||||
number = nu;
|
number = nu;
|
||||||
|
@ -53,16 +66,16 @@ public final class FieldInfo {
|
||||||
this.storePositionWithTermVector = storePositionWithTermVector;
|
this.storePositionWithTermVector = storePositionWithTermVector;
|
||||||
this.storePayloads = storePayloads;
|
this.storePayloads = storePayloads;
|
||||||
this.omitNorms = omitNorms;
|
this.omitNorms = omitNorms;
|
||||||
this.omitTermFreqAndPositions = omitTermFreqAndPositions;
|
this.indexOptions = indexOptions;
|
||||||
} else { // for non-indexed fields, leave defaults
|
} else { // for non-indexed fields, leave defaults
|
||||||
this.storeTermVector = false;
|
this.storeTermVector = false;
|
||||||
this.storeOffsetWithTermVector = false;
|
this.storeOffsetWithTermVector = false;
|
||||||
this.storePositionWithTermVector = false;
|
this.storePositionWithTermVector = false;
|
||||||
this.storePayloads = false;
|
this.storePayloads = false;
|
||||||
this.omitNorms = false;
|
this.omitNorms = false;
|
||||||
this.omitTermFreqAndPositions = false;
|
this.indexOptions = IndexOptions.DOCS_AND_FREQS_AND_POSITIONS;
|
||||||
}
|
}
|
||||||
assert !omitTermFreqAndPositions || !storePayloads;
|
assert indexOptions == IndexOptions.DOCS_AND_FREQS_AND_POSITIONS || !storePayloads;
|
||||||
}
|
}
|
||||||
|
|
||||||
void setCodecId(int codecId) {
|
void setCodecId(int codecId) {
|
||||||
|
@ -77,14 +90,14 @@ public final class FieldInfo {
|
||||||
@Override
|
@Override
|
||||||
public Object clone() {
|
public Object clone() {
|
||||||
FieldInfo clone = new FieldInfo(name, isIndexed, number, storeTermVector, storePositionWithTermVector,
|
FieldInfo clone = new FieldInfo(name, isIndexed, number, storeTermVector, storePositionWithTermVector,
|
||||||
storeOffsetWithTermVector, omitNorms, storePayloads, omitTermFreqAndPositions, docValues);
|
storeOffsetWithTermVector, omitNorms, storePayloads, indexOptions, docValues);
|
||||||
clone.codecId = this.codecId;
|
clone.codecId = this.codecId;
|
||||||
return clone;
|
return clone;
|
||||||
}
|
}
|
||||||
|
|
||||||
// should only be called by FieldInfos#addOrUpdate
|
// should only be called by FieldInfos#addOrUpdate
|
||||||
void update(boolean isIndexed, boolean storeTermVector, boolean storePositionWithTermVector,
|
void update(boolean isIndexed, boolean storeTermVector, boolean storePositionWithTermVector,
|
||||||
boolean storeOffsetWithTermVector, boolean omitNorms, boolean storePayloads, boolean omitTermFreqAndPositions) {
|
boolean storeOffsetWithTermVector, boolean omitNorms, boolean storePayloads, IndexOptions indexOptions) {
|
||||||
|
|
||||||
if (this.isIndexed != isIndexed) {
|
if (this.isIndexed != isIndexed) {
|
||||||
this.isIndexed = true; // once indexed, always index
|
this.isIndexed = true; // once indexed, always index
|
||||||
|
@ -105,12 +118,13 @@ public final class FieldInfo {
|
||||||
if (this.omitNorms != omitNorms) {
|
if (this.omitNorms != omitNorms) {
|
||||||
this.omitNorms = true; // if one require omitNorms at least once, it remains off for life
|
this.omitNorms = true; // if one require omitNorms at least once, it remains off for life
|
||||||
}
|
}
|
||||||
if (this.omitTermFreqAndPositions != omitTermFreqAndPositions) {
|
if (this.indexOptions != indexOptions) {
|
||||||
this.omitTermFreqAndPositions = true; // if one require omitTermFreqAndPositions at least once, it remains off for life
|
// downgrade
|
||||||
|
this.indexOptions = this.indexOptions.compareTo(indexOptions) < 0 ? this.indexOptions : indexOptions;
|
||||||
this.storePayloads = false;
|
this.storePayloads = false;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
assert !this.omitTermFreqAndPositions || !this.storePayloads;
|
assert this.indexOptions == IndexOptions.DOCS_AND_FREQS_AND_POSITIONS || !this.storePayloads;
|
||||||
}
|
}
|
||||||
void setDocValues(ValueType v) {
|
void setDocValues(ValueType v) {
|
||||||
if (docValues == null) {
|
if (docValues == null) {
|
||||||
|
|
|
@ -28,6 +28,7 @@ import java.util.SortedMap;
|
||||||
import java.util.TreeMap;
|
import java.util.TreeMap;
|
||||||
import java.util.Map.Entry;
|
import java.util.Map.Entry;
|
||||||
|
|
||||||
|
import org.apache.lucene.index.FieldInfo.IndexOptions;
|
||||||
import org.apache.lucene.index.SegmentCodecs; // Required for Java 1.5 javadocs
|
import org.apache.lucene.index.SegmentCodecs; // Required for Java 1.5 javadocs
|
||||||
import org.apache.lucene.index.SegmentCodecs.SegmentCodecsBuilder;
|
import org.apache.lucene.index.SegmentCodecs.SegmentCodecsBuilder;
|
||||||
import org.apache.lucene.index.codecs.CodecProvider;
|
import org.apache.lucene.index.codecs.CodecProvider;
|
||||||
|
@ -201,13 +202,13 @@ public final class FieldInfos implements Iterable<FieldInfo> {
|
||||||
|
|
||||||
// First used in 2.9; prior to 2.9 there was no format header
|
// First used in 2.9; prior to 2.9 there was no format header
|
||||||
public static final int FORMAT_START = -2;
|
public static final int FORMAT_START = -2;
|
||||||
public static final int FORMAT_PER_FIELD_CODEC = -3;
|
// First used in 3.4: omit only positional information
|
||||||
|
public static final int FORMAT_OMIT_POSITIONS = -3;
|
||||||
// Records index values for this field
|
// per-field codec support, records index values for fields
|
||||||
public static final int FORMAT_INDEX_VALUES = -3;
|
public static final int FORMAT_FLEX = -4;
|
||||||
|
|
||||||
// whenever you add a new format, make it 1 smaller (negative version logic)!
|
// whenever you add a new format, make it 1 smaller (negative version logic)!
|
||||||
static final int FORMAT_CURRENT = FORMAT_PER_FIELD_CODEC;
|
static final int FORMAT_CURRENT = FORMAT_FLEX;
|
||||||
|
|
||||||
static final int FORMAT_MINIMUM = FORMAT_START;
|
static final int FORMAT_MINIMUM = FORMAT_START;
|
||||||
|
|
||||||
|
@ -218,8 +219,10 @@ public final class FieldInfos implements Iterable<FieldInfo> {
|
||||||
static final byte OMIT_NORMS = 0x10;
|
static final byte OMIT_NORMS = 0x10;
|
||||||
static final byte STORE_PAYLOADS = 0x20;
|
static final byte STORE_PAYLOADS = 0x20;
|
||||||
static final byte OMIT_TERM_FREQ_AND_POSITIONS = 0x40;
|
static final byte OMIT_TERM_FREQ_AND_POSITIONS = 0x40;
|
||||||
|
static final byte OMIT_POSITIONS = -128;
|
||||||
|
|
||||||
private int format;
|
private int format;
|
||||||
|
private boolean hasFreq; // only set if readonly
|
||||||
private boolean hasProx; // only set if readonly
|
private boolean hasProx; // only set if readonly
|
||||||
private boolean hasVectors; // only set if readonly
|
private boolean hasVectors; // only set if readonly
|
||||||
private long version; // internal use to track changes
|
private long version; // internal use to track changes
|
||||||
|
@ -308,6 +311,7 @@ public final class FieldInfos implements Iterable<FieldInfo> {
|
||||||
synchronized public Object clone() {
|
synchronized public Object clone() {
|
||||||
FieldInfos fis = new FieldInfos(globalFieldNumbers, segmentCodecsBuilder);
|
FieldInfos fis = new FieldInfos(globalFieldNumbers, segmentCodecsBuilder);
|
||||||
fis.format = format;
|
fis.format = format;
|
||||||
|
fis.hasFreq = hasFreq;
|
||||||
fis.hasProx = hasProx;
|
fis.hasProx = hasProx;
|
||||||
fis.hasVectors = hasVectors;
|
fis.hasVectors = hasVectors;
|
||||||
for (FieldInfo fi : this) {
|
for (FieldInfo fi : this) {
|
||||||
|
@ -317,14 +321,28 @@ public final class FieldInfos implements Iterable<FieldInfo> {
|
||||||
return fis;
|
return fis;
|
||||||
}
|
}
|
||||||
|
|
||||||
/** Returns true if any fields do not omitTermFreqAndPositions */
|
/** Returns true if any fields do not positions */
|
||||||
public boolean hasProx() {
|
public boolean hasProx() {
|
||||||
if (isReadOnly()) {
|
if (isReadOnly()) {
|
||||||
return hasProx;
|
return hasProx;
|
||||||
}
|
}
|
||||||
// mutable FIs must check!
|
// mutable FIs must check!
|
||||||
for (FieldInfo fi : this) {
|
for (FieldInfo fi : this) {
|
||||||
if (fi.isIndexed && !fi.omitTermFreqAndPositions) {
|
if (fi.isIndexed && fi.indexOptions == IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
/** Returns true if any fields have freqs */
|
||||||
|
public boolean hasFreq() {
|
||||||
|
if (isReadOnly()) {
|
||||||
|
return hasFreq;
|
||||||
|
}
|
||||||
|
// mutable FIs must check!
|
||||||
|
for (FieldInfo fi : this) {
|
||||||
|
if (fi.isIndexed && fi.indexOptions != IndexOptions.DOCS_ONLY) {
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -414,7 +432,7 @@ public final class FieldInfos implements Iterable<FieldInfo> {
|
||||||
synchronized public void addOrUpdate(String name, boolean isIndexed, boolean storeTermVector,
|
synchronized public void addOrUpdate(String name, boolean isIndexed, boolean storeTermVector,
|
||||||
boolean storePositionWithTermVector, boolean storeOffsetWithTermVector, boolean omitNorms) {
|
boolean storePositionWithTermVector, boolean storeOffsetWithTermVector, boolean omitNorms) {
|
||||||
addOrUpdate(name, isIndexed, storeTermVector, storePositionWithTermVector,
|
addOrUpdate(name, isIndexed, storeTermVector, storePositionWithTermVector,
|
||||||
storeOffsetWithTermVector, omitNorms, false, false, null);
|
storeOffsetWithTermVector, omitNorms, false, IndexOptions.DOCS_AND_FREQS_AND_POSITIONS, null);
|
||||||
}
|
}
|
||||||
|
|
||||||
/** If the field is not yet known, adds it. If it is known, checks to make
|
/** If the field is not yet known, adds it. If it is known, checks to make
|
||||||
|
@ -429,18 +447,18 @@ public final class FieldInfos implements Iterable<FieldInfo> {
|
||||||
* @param storeOffsetWithTermVector true if the term vector with offsets should be stored
|
* @param storeOffsetWithTermVector true if the term vector with offsets should be stored
|
||||||
* @param omitNorms true if the norms for the indexed field should be omitted
|
* @param omitNorms true if the norms for the indexed field should be omitted
|
||||||
* @param storePayloads true if payloads should be stored for this field
|
* @param storePayloads true if payloads should be stored for this field
|
||||||
* @param omitTermFreqAndPositions true if term freqs should be omitted for this field
|
* @param indexOptions if term freqs should be omitted for this field
|
||||||
*/
|
*/
|
||||||
synchronized public FieldInfo addOrUpdate(String name, boolean isIndexed, boolean storeTermVector,
|
synchronized public FieldInfo addOrUpdate(String name, boolean isIndexed, boolean storeTermVector,
|
||||||
boolean storePositionWithTermVector, boolean storeOffsetWithTermVector,
|
boolean storePositionWithTermVector, boolean storeOffsetWithTermVector,
|
||||||
boolean omitNorms, boolean storePayloads, boolean omitTermFreqAndPositions, ValueType docValues) {
|
boolean omitNorms, boolean storePayloads, IndexOptions indexOptions, ValueType docValues) {
|
||||||
return addOrUpdateInternal(name, -1, isIndexed, storeTermVector, storePositionWithTermVector,
|
return addOrUpdateInternal(name, -1, isIndexed, storeTermVector, storePositionWithTermVector,
|
||||||
storeOffsetWithTermVector, omitNorms, storePayloads, omitTermFreqAndPositions, docValues);
|
storeOffsetWithTermVector, omitNorms, storePayloads, indexOptions, docValues);
|
||||||
}
|
}
|
||||||
|
|
||||||
synchronized private FieldInfo addOrUpdateInternal(String name, int preferredFieldNumber, boolean isIndexed,
|
synchronized private FieldInfo addOrUpdateInternal(String name, int preferredFieldNumber, boolean isIndexed,
|
||||||
boolean storeTermVector, boolean storePositionWithTermVector, boolean storeOffsetWithTermVector,
|
boolean storeTermVector, boolean storePositionWithTermVector, boolean storeOffsetWithTermVector,
|
||||||
boolean omitNorms, boolean storePayloads, boolean omitTermFreqAndPositions, ValueType docValues) {
|
boolean omitNorms, boolean storePayloads, IndexOptions indexOptions, ValueType docValues) {
|
||||||
if (globalFieldNumbers == null) {
|
if (globalFieldNumbers == null) {
|
||||||
throw new IllegalStateException("FieldInfos are read-only, create a new instance with a global field map to make modifications to FieldInfos");
|
throw new IllegalStateException("FieldInfos are read-only, create a new instance with a global field map to make modifications to FieldInfos");
|
||||||
}
|
}
|
||||||
|
@ -448,9 +466,9 @@ public final class FieldInfos implements Iterable<FieldInfo> {
|
||||||
FieldInfo fi = fieldInfo(name);
|
FieldInfo fi = fieldInfo(name);
|
||||||
if (fi == null) {
|
if (fi == null) {
|
||||||
final int fieldNumber = nextFieldNumber(name, preferredFieldNumber);
|
final int fieldNumber = nextFieldNumber(name, preferredFieldNumber);
|
||||||
fi = addInternal(name, fieldNumber, isIndexed, storeTermVector, storePositionWithTermVector, storeOffsetWithTermVector, omitNorms, storePayloads, omitTermFreqAndPositions, docValues);
|
fi = addInternal(name, fieldNumber, isIndexed, storeTermVector, storePositionWithTermVector, storeOffsetWithTermVector, omitNorms, storePayloads, indexOptions, docValues);
|
||||||
} else {
|
} else {
|
||||||
fi.update(isIndexed, storeTermVector, storePositionWithTermVector, storeOffsetWithTermVector, omitNorms, storePayloads, omitTermFreqAndPositions);
|
fi.update(isIndexed, storeTermVector, storePositionWithTermVector, storeOffsetWithTermVector, omitNorms, storePayloads, indexOptions);
|
||||||
fi.setDocValues(docValues);
|
fi.setDocValues(docValues);
|
||||||
}
|
}
|
||||||
if ((fi.isIndexed || fi.hasDocValues()) && fi.getCodecId() == FieldInfo.UNASSIGNED_CODEC_ID) {
|
if ((fi.isIndexed || fi.hasDocValues()) && fi.getCodecId() == FieldInfo.UNASSIGNED_CODEC_ID) {
|
||||||
|
@ -465,7 +483,7 @@ public final class FieldInfos implements Iterable<FieldInfo> {
|
||||||
return addOrUpdateInternal(fi.name, fi.number, fi.isIndexed, fi.storeTermVector,
|
return addOrUpdateInternal(fi.name, fi.number, fi.isIndexed, fi.storeTermVector,
|
||||||
fi.storePositionWithTermVector, fi.storeOffsetWithTermVector,
|
fi.storePositionWithTermVector, fi.storeOffsetWithTermVector,
|
||||||
fi.omitNorms, fi.storePayloads,
|
fi.omitNorms, fi.storePayloads,
|
||||||
fi.omitTermFreqAndPositions, fi.docValues);
|
fi.indexOptions, fi.docValues);
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
@ -473,13 +491,13 @@ public final class FieldInfos implements Iterable<FieldInfo> {
|
||||||
*/
|
*/
|
||||||
private FieldInfo addInternal(String name, int fieldNumber, boolean isIndexed,
|
private FieldInfo addInternal(String name, int fieldNumber, boolean isIndexed,
|
||||||
boolean storeTermVector, boolean storePositionWithTermVector,
|
boolean storeTermVector, boolean storePositionWithTermVector,
|
||||||
boolean storeOffsetWithTermVector, boolean omitNorms, boolean storePayloads, boolean omitTermFreqAndPositions, ValueType docValuesType) {
|
boolean storeOffsetWithTermVector, boolean omitNorms, boolean storePayloads, IndexOptions indexOptions, ValueType docValuesType) {
|
||||||
// don't check modifiable here since we use that to initially build up FIs
|
// don't check modifiable here since we use that to initially build up FIs
|
||||||
if (globalFieldNumbers != null) {
|
if (globalFieldNumbers != null) {
|
||||||
globalFieldNumbers.setIfNotSet(fieldNumber, name);
|
globalFieldNumbers.setIfNotSet(fieldNumber, name);
|
||||||
}
|
}
|
||||||
final FieldInfo fi = new FieldInfo(name, isIndexed, fieldNumber, storeTermVector, storePositionWithTermVector,
|
final FieldInfo fi = new FieldInfo(name, isIndexed, fieldNumber, storeTermVector, storePositionWithTermVector,
|
||||||
storeOffsetWithTermVector, omitNorms, storePayloads, omitTermFreqAndPositions, docValuesType);
|
storeOffsetWithTermVector, omitNorms, storePayloads, indexOptions, docValuesType);
|
||||||
putInternal(fi);
|
putInternal(fi);
|
||||||
return fi;
|
return fi;
|
||||||
}
|
}
|
||||||
|
@ -590,7 +608,7 @@ public final class FieldInfos implements Iterable<FieldInfo> {
|
||||||
output.writeVInt(FORMAT_CURRENT);
|
output.writeVInt(FORMAT_CURRENT);
|
||||||
output.writeVInt(size());
|
output.writeVInt(size());
|
||||||
for (FieldInfo fi : this) {
|
for (FieldInfo fi : this) {
|
||||||
assert !fi.omitTermFreqAndPositions || !fi.storePayloads;
|
assert fi.indexOptions == IndexOptions.DOCS_AND_FREQS_AND_POSITIONS || !fi.storePayloads;
|
||||||
byte bits = 0x0;
|
byte bits = 0x0;
|
||||||
if (fi.isIndexed) bits |= IS_INDEXED;
|
if (fi.isIndexed) bits |= IS_INDEXED;
|
||||||
if (fi.storeTermVector) bits |= STORE_TERMVECTOR;
|
if (fi.storeTermVector) bits |= STORE_TERMVECTOR;
|
||||||
|
@ -598,7 +616,10 @@ public final class FieldInfos implements Iterable<FieldInfo> {
|
||||||
if (fi.storeOffsetWithTermVector) bits |= STORE_OFFSET_WITH_TERMVECTOR;
|
if (fi.storeOffsetWithTermVector) bits |= STORE_OFFSET_WITH_TERMVECTOR;
|
||||||
if (fi.omitNorms) bits |= OMIT_NORMS;
|
if (fi.omitNorms) bits |= OMIT_NORMS;
|
||||||
if (fi.storePayloads) bits |= STORE_PAYLOADS;
|
if (fi.storePayloads) bits |= STORE_PAYLOADS;
|
||||||
if (fi.omitTermFreqAndPositions) bits |= OMIT_TERM_FREQ_AND_POSITIONS;
|
if (fi.indexOptions == IndexOptions.DOCS_ONLY)
|
||||||
|
bits |= OMIT_TERM_FREQ_AND_POSITIONS;
|
||||||
|
else if (fi.indexOptions == IndexOptions.DOCS_AND_FREQS)
|
||||||
|
bits |= OMIT_POSITIONS;
|
||||||
output.writeString(fi.name);
|
output.writeString(fi.name);
|
||||||
output.writeInt(fi.number);
|
output.writeInt(fi.number);
|
||||||
output.writeInt(fi.getCodecId());
|
output.writeInt(fi.getCodecId());
|
||||||
|
@ -673,8 +694,8 @@ public final class FieldInfos implements Iterable<FieldInfo> {
|
||||||
for (int i = 0; i < size; i++) {
|
for (int i = 0; i < size; i++) {
|
||||||
String name = input.readString();
|
String name = input.readString();
|
||||||
// if this is a previous format codec 0 will be preflex!
|
// if this is a previous format codec 0 will be preflex!
|
||||||
final int fieldNumber = format <= FORMAT_PER_FIELD_CODEC? input.readInt():i;
|
final int fieldNumber = format <= FORMAT_FLEX? input.readInt():i;
|
||||||
final int codecId = format <= FORMAT_PER_FIELD_CODEC? input.readInt():0;
|
final int codecId = format <= FORMAT_FLEX? input.readInt():0;
|
||||||
byte bits = input.readByte();
|
byte bits = input.readByte();
|
||||||
boolean isIndexed = (bits & IS_INDEXED) != 0;
|
boolean isIndexed = (bits & IS_INDEXED) != 0;
|
||||||
boolean storeTermVector = (bits & STORE_TERMVECTOR) != 0;
|
boolean storeTermVector = (bits & STORE_TERMVECTOR) != 0;
|
||||||
|
@ -682,18 +703,30 @@ public final class FieldInfos implements Iterable<FieldInfo> {
|
||||||
boolean storeOffsetWithTermVector = (bits & STORE_OFFSET_WITH_TERMVECTOR) != 0;
|
boolean storeOffsetWithTermVector = (bits & STORE_OFFSET_WITH_TERMVECTOR) != 0;
|
||||||
boolean omitNorms = (bits & OMIT_NORMS) != 0;
|
boolean omitNorms = (bits & OMIT_NORMS) != 0;
|
||||||
boolean storePayloads = (bits & STORE_PAYLOADS) != 0;
|
boolean storePayloads = (bits & STORE_PAYLOADS) != 0;
|
||||||
boolean omitTermFreqAndPositions = (bits & OMIT_TERM_FREQ_AND_POSITIONS) != 0;
|
final IndexOptions indexOptions;
|
||||||
|
if ((bits & OMIT_TERM_FREQ_AND_POSITIONS) != 0) {
|
||||||
|
indexOptions = IndexOptions.DOCS_ONLY;
|
||||||
|
} else if ((bits & OMIT_POSITIONS) != 0) {
|
||||||
|
if (format <= FORMAT_OMIT_POSITIONS) {
|
||||||
|
indexOptions = IndexOptions.DOCS_AND_FREQS;
|
||||||
|
} else {
|
||||||
|
throw new CorruptIndexException("Corrupt fieldinfos, OMIT_POSITIONS set but format=" + format);
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
indexOptions = IndexOptions.DOCS_AND_FREQS_AND_POSITIONS;
|
||||||
|
}
|
||||||
|
|
||||||
// LUCENE-3027: past indices were able to write
|
// LUCENE-3027: past indices were able to write
|
||||||
// storePayloads=true when omitTFAP is also true,
|
// storePayloads=true when omitTFAP is also true,
|
||||||
// which is invalid. We correct that, here:
|
// which is invalid. We correct that, here:
|
||||||
if (omitTermFreqAndPositions) {
|
if (indexOptions != IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) {
|
||||||
storePayloads = false;
|
storePayloads = false;
|
||||||
}
|
}
|
||||||
hasVectors |= storeTermVector;
|
hasVectors |= storeTermVector;
|
||||||
hasProx |= isIndexed && !omitTermFreqAndPositions;
|
hasProx |= isIndexed && indexOptions == IndexOptions.DOCS_AND_FREQS_AND_POSITIONS;
|
||||||
|
hasFreq |= isIndexed && indexOptions != IndexOptions.DOCS_ONLY;
|
||||||
ValueType docValuesType = null;
|
ValueType docValuesType = null;
|
||||||
if (format <= FORMAT_INDEX_VALUES) {
|
if (format <= FORMAT_FLEX) {
|
||||||
final byte b = input.readByte();
|
final byte b = input.readByte();
|
||||||
switch(b) {
|
switch(b) {
|
||||||
case 0:
|
case 0:
|
||||||
|
@ -743,7 +776,7 @@ public final class FieldInfos implements Iterable<FieldInfo> {
|
||||||
throw new IllegalStateException("unhandled indexValues type " + b);
|
throw new IllegalStateException("unhandled indexValues type " + b);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
final FieldInfo addInternal = addInternal(name, fieldNumber, isIndexed, storeTermVector, storePositionsWithTermVector, storeOffsetWithTermVector, omitNorms, storePayloads, omitTermFreqAndPositions, docValuesType);
|
final FieldInfo addInternal = addInternal(name, fieldNumber, isIndexed, storeTermVector, storePositionsWithTermVector, storeOffsetWithTermVector, omitNorms, storePayloads, indexOptions, docValuesType);
|
||||||
addInternal.setCodecId(codecId);
|
addInternal.setCodecId(codecId);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -771,7 +804,8 @@ public final class FieldInfos implements Iterable<FieldInfo> {
|
||||||
FieldInfo clone = (FieldInfo) (fieldInfo).clone();
|
FieldInfo clone = (FieldInfo) (fieldInfo).clone();
|
||||||
roFis.putInternal(clone);
|
roFis.putInternal(clone);
|
||||||
roFis.hasVectors |= clone.storeTermVector;
|
roFis.hasVectors |= clone.storeTermVector;
|
||||||
roFis.hasProx |= clone.isIndexed && !clone.omitTermFreqAndPositions;
|
roFis.hasProx |= clone.isIndexed && clone.indexOptions == IndexOptions.DOCS_AND_FREQS_AND_POSITIONS;
|
||||||
|
roFis.hasFreq |= clone.isIndexed && clone.indexOptions != IndexOptions.DOCS_ONLY;
|
||||||
}
|
}
|
||||||
return roFis;
|
return roFis;
|
||||||
}
|
}
|
||||||
|
|
|
@ -340,7 +340,7 @@ public final class FieldsReader implements Cloneable, Closeable {
|
||||||
}
|
}
|
||||||
|
|
||||||
f.setOmitNorms(fi.omitNorms);
|
f.setOmitNorms(fi.omitNorms);
|
||||||
f.setOmitTermFreqAndPositions(fi.omitTermFreqAndPositions);
|
f.setIndexOptions(fi.indexOptions);
|
||||||
doc.add(f);
|
doc.add(f);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -364,7 +364,7 @@ public final class FieldsReader implements Cloneable, Closeable {
|
||||||
termVector);
|
termVector);
|
||||||
}
|
}
|
||||||
|
|
||||||
f.setOmitTermFreqAndPositions(fi.omitTermFreqAndPositions);
|
f.setIndexOptions(fi.indexOptions);
|
||||||
f.setOmitNorms(fi.omitNorms);
|
f.setOmitNorms(fi.omitNorms);
|
||||||
doc.add(f);
|
doc.add(f);
|
||||||
}
|
}
|
||||||
|
|
|
@ -22,6 +22,7 @@ import java.util.ArrayList;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
import java.util.Map;
|
import java.util.Map;
|
||||||
|
|
||||||
|
import org.apache.lucene.index.FieldInfo.IndexOptions;
|
||||||
import org.apache.lucene.index.codecs.FieldsConsumer;
|
import org.apache.lucene.index.codecs.FieldsConsumer;
|
||||||
import org.apache.lucene.util.BytesRef;
|
import org.apache.lucene.util.BytesRef;
|
||||||
import org.apache.lucene.util.CollectionUtil;
|
import org.apache.lucene.util.CollectionUtil;
|
||||||
|
@ -79,7 +80,7 @@ final class FreqProxTermsWriter extends TermsHashConsumer {
|
||||||
|
|
||||||
// Aggregate the storePayload as seen by the same
|
// Aggregate the storePayload as seen by the same
|
||||||
// field across multiple threads
|
// field across multiple threads
|
||||||
if (!fieldInfo.omitTermFreqAndPositions) {
|
if (fieldInfo.indexOptions == IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) {
|
||||||
fieldInfo.storePayloads |= fieldWriter.hasPayloads;
|
fieldInfo.storePayloads |= fieldWriter.hasPayloads;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -23,6 +23,7 @@ import java.util.Map;
|
||||||
|
|
||||||
import org.apache.lucene.analysis.tokenattributes.PayloadAttribute;
|
import org.apache.lucene.analysis.tokenattributes.PayloadAttribute;
|
||||||
import org.apache.lucene.document.Fieldable;
|
import org.apache.lucene.document.Fieldable;
|
||||||
|
import org.apache.lucene.index.FieldInfo.IndexOptions;
|
||||||
import org.apache.lucene.index.codecs.FieldsConsumer;
|
import org.apache.lucene.index.codecs.FieldsConsumer;
|
||||||
import org.apache.lucene.index.codecs.PostingsConsumer;
|
import org.apache.lucene.index.codecs.PostingsConsumer;
|
||||||
import org.apache.lucene.index.codecs.TermStats;
|
import org.apache.lucene.index.codecs.TermStats;
|
||||||
|
@ -41,7 +42,7 @@ final class FreqProxTermsWriterPerField extends TermsHashConsumerPerField implem
|
||||||
final FieldInfo fieldInfo;
|
final FieldInfo fieldInfo;
|
||||||
final DocumentsWriterPerThread.DocState docState;
|
final DocumentsWriterPerThread.DocState docState;
|
||||||
final FieldInvertState fieldState;
|
final FieldInvertState fieldState;
|
||||||
boolean omitTermFreqAndPositions;
|
IndexOptions indexOptions;
|
||||||
PayloadAttribute payloadAttribute;
|
PayloadAttribute payloadAttribute;
|
||||||
|
|
||||||
public FreqProxTermsWriterPerField(TermsHashPerField termsHashPerField, FreqProxTermsWriter parent, FieldInfo fieldInfo) {
|
public FreqProxTermsWriterPerField(TermsHashPerField termsHashPerField, FreqProxTermsWriter parent, FieldInfo fieldInfo) {
|
||||||
|
@ -50,12 +51,12 @@ final class FreqProxTermsWriterPerField extends TermsHashConsumerPerField implem
|
||||||
this.fieldInfo = fieldInfo;
|
this.fieldInfo = fieldInfo;
|
||||||
docState = termsHashPerField.docState;
|
docState = termsHashPerField.docState;
|
||||||
fieldState = termsHashPerField.fieldState;
|
fieldState = termsHashPerField.fieldState;
|
||||||
omitTermFreqAndPositions = fieldInfo.omitTermFreqAndPositions;
|
indexOptions = fieldInfo.indexOptions;
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
int getStreamCount() {
|
int getStreamCount() {
|
||||||
if (fieldInfo.omitTermFreqAndPositions)
|
if (fieldInfo.indexOptions != IndexOptions.DOCS_AND_FREQS_AND_POSITIONS)
|
||||||
return 1;
|
return 1;
|
||||||
else
|
else
|
||||||
return 2;
|
return 2;
|
||||||
|
@ -76,7 +77,7 @@ final class FreqProxTermsWriterPerField extends TermsHashConsumerPerField implem
|
||||||
void reset() {
|
void reset() {
|
||||||
// Record, up front, whether our in-RAM format will be
|
// Record, up front, whether our in-RAM format will be
|
||||||
// with or without term freqs:
|
// with or without term freqs:
|
||||||
omitTermFreqAndPositions = fieldInfo.omitTermFreqAndPositions;
|
indexOptions = fieldInfo.indexOptions;
|
||||||
payloadAttribute = null;
|
payloadAttribute = null;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -126,12 +127,14 @@ final class FreqProxTermsWriterPerField extends TermsHashConsumerPerField implem
|
||||||
|
|
||||||
FreqProxPostingsArray postings = (FreqProxPostingsArray) termsHashPerField.postingsArray;
|
FreqProxPostingsArray postings = (FreqProxPostingsArray) termsHashPerField.postingsArray;
|
||||||
postings.lastDocIDs[termID] = docState.docID;
|
postings.lastDocIDs[termID] = docState.docID;
|
||||||
if (omitTermFreqAndPositions) {
|
if (indexOptions == IndexOptions.DOCS_ONLY) {
|
||||||
postings.lastDocCodes[termID] = docState.docID;
|
postings.lastDocCodes[termID] = docState.docID;
|
||||||
} else {
|
} else {
|
||||||
postings.lastDocCodes[termID] = docState.docID << 1;
|
postings.lastDocCodes[termID] = docState.docID << 1;
|
||||||
postings.docFreqs[termID] = 1;
|
postings.docFreqs[termID] = 1;
|
||||||
writeProx(termID, fieldState.position);
|
if (indexOptions == IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) {
|
||||||
|
writeProx(termID, fieldState.position);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
fieldState.maxTermFrequency = Math.max(1, fieldState.maxTermFrequency);
|
fieldState.maxTermFrequency = Math.max(1, fieldState.maxTermFrequency);
|
||||||
fieldState.uniqueTermCount++;
|
fieldState.uniqueTermCount++;
|
||||||
|
@ -144,9 +147,9 @@ final class FreqProxTermsWriterPerField extends TermsHashConsumerPerField implem
|
||||||
|
|
||||||
FreqProxPostingsArray postings = (FreqProxPostingsArray) termsHashPerField.postingsArray;
|
FreqProxPostingsArray postings = (FreqProxPostingsArray) termsHashPerField.postingsArray;
|
||||||
|
|
||||||
assert omitTermFreqAndPositions || postings.docFreqs[termID] > 0;
|
assert indexOptions == IndexOptions.DOCS_ONLY || postings.docFreqs[termID] > 0;
|
||||||
|
|
||||||
if (omitTermFreqAndPositions) {
|
if (indexOptions == IndexOptions.DOCS_ONLY) {
|
||||||
if (docState.docID != postings.lastDocIDs[termID]) {
|
if (docState.docID != postings.lastDocIDs[termID]) {
|
||||||
assert docState.docID > postings.lastDocIDs[termID];
|
assert docState.docID > postings.lastDocIDs[termID];
|
||||||
termsHashPerField.writeVInt(0, postings.lastDocCodes[termID]);
|
termsHashPerField.writeVInt(0, postings.lastDocCodes[termID]);
|
||||||
|
@ -172,11 +175,15 @@ final class FreqProxTermsWriterPerField extends TermsHashConsumerPerField implem
|
||||||
fieldState.maxTermFrequency = Math.max(1, fieldState.maxTermFrequency);
|
fieldState.maxTermFrequency = Math.max(1, fieldState.maxTermFrequency);
|
||||||
postings.lastDocCodes[termID] = (docState.docID - postings.lastDocIDs[termID]) << 1;
|
postings.lastDocCodes[termID] = (docState.docID - postings.lastDocIDs[termID]) << 1;
|
||||||
postings.lastDocIDs[termID] = docState.docID;
|
postings.lastDocIDs[termID] = docState.docID;
|
||||||
writeProx(termID, fieldState.position);
|
if (indexOptions == IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) {
|
||||||
|
writeProx(termID, fieldState.position);
|
||||||
|
}
|
||||||
fieldState.uniqueTermCount++;
|
fieldState.uniqueTermCount++;
|
||||||
} else {
|
} else {
|
||||||
fieldState.maxTermFrequency = Math.max(fieldState.maxTermFrequency, ++postings.docFreqs[termID]);
|
fieldState.maxTermFrequency = Math.max(fieldState.maxTermFrequency, ++postings.docFreqs[termID]);
|
||||||
writeProx(termID, fieldState.position-postings.lastPositions[termID]);
|
if (indexOptions == IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) {
|
||||||
|
writeProx(termID, fieldState.position-postings.lastPositions[termID]);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -237,7 +244,7 @@ final class FreqProxTermsWriterPerField extends TermsHashConsumerPerField implem
|
||||||
final TermsConsumer termsConsumer = consumer.addField(fieldInfo);
|
final TermsConsumer termsConsumer = consumer.addField(fieldInfo);
|
||||||
final Comparator<BytesRef> termComp = termsConsumer.getComparator();
|
final Comparator<BytesRef> termComp = termsConsumer.getComparator();
|
||||||
|
|
||||||
final boolean currentFieldOmitTermFreqAndPositions = fieldInfo.omitTermFreqAndPositions;
|
final IndexOptions currentFieldIndexOptions = fieldInfo.indexOptions;
|
||||||
|
|
||||||
final Map<Term,Integer> segDeletes;
|
final Map<Term,Integer> segDeletes;
|
||||||
if (state.segDeletes != null && state.segDeletes.terms.size() > 0) {
|
if (state.segDeletes != null && state.segDeletes.terms.size() > 0) {
|
||||||
|
@ -263,7 +270,7 @@ final class FreqProxTermsWriterPerField extends TermsHashConsumerPerField implem
|
||||||
termsHashPerField.bytePool.setBytesRef(text, textStart);
|
termsHashPerField.bytePool.setBytesRef(text, textStart);
|
||||||
|
|
||||||
termsHashPerField.initReader(freq, termID, 0);
|
termsHashPerField.initReader(freq, termID, 0);
|
||||||
if (!fieldInfo.omitTermFreqAndPositions) {
|
if (fieldInfo.indexOptions == IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) {
|
||||||
termsHashPerField.initReader(prox, termID, 1);
|
termsHashPerField.initReader(prox, termID, 1);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -300,7 +307,7 @@ final class FreqProxTermsWriterPerField extends TermsHashConsumerPerField implem
|
||||||
if (postings.lastDocCodes[termID] != -1) {
|
if (postings.lastDocCodes[termID] != -1) {
|
||||||
// Return last doc
|
// Return last doc
|
||||||
docID = postings.lastDocIDs[termID];
|
docID = postings.lastDocIDs[termID];
|
||||||
if (!omitTermFreqAndPositions) {
|
if (indexOptions != IndexOptions.DOCS_ONLY) {
|
||||||
termFreq = postings.docFreqs[termID];
|
termFreq = postings.docFreqs[termID];
|
||||||
}
|
}
|
||||||
postings.lastDocCodes[termID] = -1;
|
postings.lastDocCodes[termID] = -1;
|
||||||
|
@ -310,7 +317,7 @@ final class FreqProxTermsWriterPerField extends TermsHashConsumerPerField implem
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
final int code = freq.readVInt();
|
final int code = freq.readVInt();
|
||||||
if (omitTermFreqAndPositions) {
|
if (indexOptions == IndexOptions.DOCS_ONLY) {
|
||||||
docID += code;
|
docID += code;
|
||||||
} else {
|
} else {
|
||||||
docID += code >>> 1;
|
docID += code >>> 1;
|
||||||
|
@ -351,14 +358,17 @@ final class FreqProxTermsWriterPerField extends TermsHashConsumerPerField implem
|
||||||
state.liveDocs.clear(docID);
|
state.liveDocs.clear(docID);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (currentFieldIndexOptions != IndexOptions.DOCS_ONLY) {
|
||||||
|
totTF += termDocFreq;
|
||||||
|
}
|
||||||
|
|
||||||
// Carefully copy over the prox + payload info,
|
// Carefully copy over the prox + payload info,
|
||||||
// changing the format to match Lucene's segment
|
// changing the format to match Lucene's segment
|
||||||
// format.
|
// format.
|
||||||
if (!currentFieldOmitTermFreqAndPositions) {
|
|
||||||
// omitTermFreqAndPositions == false so we do write positions &
|
if (currentFieldIndexOptions == IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) {
|
||||||
// payload
|
// we do write positions & payload
|
||||||
int position = 0;
|
int position = 0;
|
||||||
totTF += termDocFreq;
|
|
||||||
for(int j=0;j<termDocFreq;j++) {
|
for(int j=0;j<termDocFreq;j++) {
|
||||||
final int code = prox.readVInt();
|
final int code = prox.readVInt();
|
||||||
position += code >> 1;
|
position += code >> 1;
|
||||||
|
|
|
@ -153,6 +153,8 @@ public abstract class IndexReader implements Cloneable,Closeable {
|
||||||
STORES_PAYLOADS,
|
STORES_PAYLOADS,
|
||||||
/** All fields that omit tf */
|
/** All fields that omit tf */
|
||||||
OMIT_TERM_FREQ_AND_POSITIONS,
|
OMIT_TERM_FREQ_AND_POSITIONS,
|
||||||
|
/** All fields that omit positions */
|
||||||
|
OMIT_POSITIONS,
|
||||||
/** All fields which are not indexed */
|
/** All fields which are not indexed */
|
||||||
UNINDEXED,
|
UNINDEXED,
|
||||||
/** All fields which are indexed with termvectors enabled */
|
/** All fields which are indexed with termvectors enabled */
|
||||||
|
|
|
@ -91,7 +91,7 @@ public final class SegmentInfo implements Cloneable {
|
||||||
//TODO: remove when we don't have to support old indexes anymore that had this field
|
//TODO: remove when we don't have to support old indexes anymore that had this field
|
||||||
private int hasVectors = CHECK_FIELDINFO;
|
private int hasVectors = CHECK_FIELDINFO;
|
||||||
//TODO: remove when we don't have to support old indexes anymore that had this field
|
//TODO: remove when we don't have to support old indexes anymore that had this field
|
||||||
private int hasProx = CHECK_FIELDINFO; // True if this segment has any fields with omitTermFreqAndPositions==false
|
private int hasProx = CHECK_FIELDINFO; // True if this segment has any fields with positional information
|
||||||
|
|
||||||
|
|
||||||
private FieldInfos fieldInfos;
|
private FieldInfos fieldInfos;
|
||||||
|
|
|
@ -24,6 +24,7 @@ import java.util.Collection;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
|
|
||||||
import org.apache.lucene.document.Document;
|
import org.apache.lucene.document.Document;
|
||||||
|
import org.apache.lucene.index.FieldInfo.IndexOptions;
|
||||||
import org.apache.lucene.index.IndexReader.FieldOption;
|
import org.apache.lucene.index.IndexReader.FieldOption;
|
||||||
import org.apache.lucene.index.MergePolicy.MergeAbortedException;
|
import org.apache.lucene.index.MergePolicy.MergeAbortedException;
|
||||||
import org.apache.lucene.index.codecs.Codec;
|
import org.apache.lucene.index.codecs.Codec;
|
||||||
|
@ -158,12 +159,12 @@ final class SegmentMerger {
|
||||||
private static void addIndexed(IndexReader reader, FieldInfos fInfos,
|
private static void addIndexed(IndexReader reader, FieldInfos fInfos,
|
||||||
Collection<String> names, boolean storeTermVectors,
|
Collection<String> names, boolean storeTermVectors,
|
||||||
boolean storePositionWithTermVector, boolean storeOffsetWithTermVector,
|
boolean storePositionWithTermVector, boolean storeOffsetWithTermVector,
|
||||||
boolean storePayloads, boolean omitTFAndPositions)
|
boolean storePayloads, IndexOptions indexOptions)
|
||||||
throws IOException {
|
throws IOException {
|
||||||
for (String field : names) {
|
for (String field : names) {
|
||||||
fInfos.addOrUpdate(field, true, storeTermVectors,
|
fInfos.addOrUpdate(field, true, storeTermVectors,
|
||||||
storePositionWithTermVector, storeOffsetWithTermVector, !reader
|
storePositionWithTermVector, storeOffsetWithTermVector, !reader
|
||||||
.hasNorms(field), storePayloads, omitTFAndPositions, null);
|
.hasNorms(field), storePayloads, indexOptions, null);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -223,13 +224,14 @@ final class SegmentMerger {
|
||||||
fieldInfos.add(fi);
|
fieldInfos.add(fi);
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
addIndexed(reader, fieldInfos, reader.getFieldNames(FieldOption.TERMVECTOR_WITH_POSITION_OFFSET), true, true, true, false, false);
|
addIndexed(reader, fieldInfos, reader.getFieldNames(FieldOption.TERMVECTOR_WITH_POSITION_OFFSET), true, true, true, false, IndexOptions.DOCS_AND_FREQS_AND_POSITIONS);
|
||||||
addIndexed(reader, fieldInfos, reader.getFieldNames(FieldOption.TERMVECTOR_WITH_POSITION), true, true, false, false, false);
|
addIndexed(reader, fieldInfos, reader.getFieldNames(FieldOption.TERMVECTOR_WITH_POSITION), true, true, false, false, IndexOptions.DOCS_AND_FREQS_AND_POSITIONS);
|
||||||
addIndexed(reader, fieldInfos, reader.getFieldNames(FieldOption.TERMVECTOR_WITH_OFFSET), true, false, true, false, false);
|
addIndexed(reader, fieldInfos, reader.getFieldNames(FieldOption.TERMVECTOR_WITH_OFFSET), true, false, true, false, IndexOptions.DOCS_AND_FREQS_AND_POSITIONS);
|
||||||
addIndexed(reader, fieldInfos, reader.getFieldNames(FieldOption.TERMVECTOR), true, false, false, false, false);
|
addIndexed(reader, fieldInfos, reader.getFieldNames(FieldOption.TERMVECTOR), true, false, false, false, IndexOptions.DOCS_AND_FREQS_AND_POSITIONS);
|
||||||
addIndexed(reader, fieldInfos, reader.getFieldNames(FieldOption.OMIT_TERM_FREQ_AND_POSITIONS), false, false, false, false, true);
|
addIndexed(reader, fieldInfos, reader.getFieldNames(FieldOption.OMIT_POSITIONS), false, false, false, false, IndexOptions.DOCS_AND_FREQS);
|
||||||
addIndexed(reader, fieldInfos, reader.getFieldNames(FieldOption.STORES_PAYLOADS), false, false, false, true, false);
|
addIndexed(reader, fieldInfos, reader.getFieldNames(FieldOption.OMIT_TERM_FREQ_AND_POSITIONS), false, false, false, false, IndexOptions.DOCS_ONLY);
|
||||||
addIndexed(reader, fieldInfos, reader.getFieldNames(FieldOption.INDEXED), false, false, false, false, false);
|
addIndexed(reader, fieldInfos, reader.getFieldNames(FieldOption.STORES_PAYLOADS), false, false, false, true, IndexOptions.DOCS_AND_FREQS_AND_POSITIONS);
|
||||||
|
addIndexed(reader, fieldInfos, reader.getFieldNames(FieldOption.INDEXED), false, false, false, false, IndexOptions.DOCS_AND_FREQS_AND_POSITIONS);
|
||||||
fieldInfos.addOrUpdate(reader.getFieldNames(FieldOption.UNINDEXED), false);
|
fieldInfos.addOrUpdate(reader.getFieldNames(FieldOption.UNINDEXED), false);
|
||||||
fieldInfos.addOrUpdate(reader.getFieldNames(FieldOption.DOC_VALUES), false);
|
fieldInfos.addOrUpdate(reader.getFieldNames(FieldOption.DOC_VALUES), false);
|
||||||
}
|
}
|
||||||
|
|
|
@ -29,6 +29,7 @@ import java.util.concurrent.atomic.AtomicInteger;
|
||||||
|
|
||||||
import org.apache.lucene.document.Document;
|
import org.apache.lucene.document.Document;
|
||||||
import org.apache.lucene.document.FieldSelector;
|
import org.apache.lucene.document.FieldSelector;
|
||||||
|
import org.apache.lucene.index.FieldInfo.IndexOptions;
|
||||||
import org.apache.lucene.index.codecs.PerDocValues;
|
import org.apache.lucene.index.codecs.PerDocValues;
|
||||||
import org.apache.lucene.store.BufferedIndexInput;
|
import org.apache.lucene.store.BufferedIndexInput;
|
||||||
import org.apache.lucene.store.Directory;
|
import org.apache.lucene.store.Directory;
|
||||||
|
@ -513,7 +514,10 @@ public class SegmentReader extends IndexReader implements Cloneable {
|
||||||
else if (!fi.isIndexed && fieldOption == IndexReader.FieldOption.UNINDEXED) {
|
else if (!fi.isIndexed && fieldOption == IndexReader.FieldOption.UNINDEXED) {
|
||||||
fieldSet.add(fi.name);
|
fieldSet.add(fi.name);
|
||||||
}
|
}
|
||||||
else if (fi.omitTermFreqAndPositions && fieldOption == IndexReader.FieldOption.OMIT_TERM_FREQ_AND_POSITIONS) {
|
else if (fi.indexOptions == IndexOptions.DOCS_ONLY && fieldOption == IndexReader.FieldOption.OMIT_TERM_FREQ_AND_POSITIONS) {
|
||||||
|
fieldSet.add(fi.name);
|
||||||
|
}
|
||||||
|
else if (fi.indexOptions == IndexOptions.DOCS_AND_FREQS && fieldOption == IndexReader.FieldOption.OMIT_POSITIONS) {
|
||||||
fieldSet.add(fi.name);
|
fieldSet.add(fi.name);
|
||||||
}
|
}
|
||||||
else if (fi.storePayloads && fieldOption == IndexReader.FieldOption.STORES_PAYLOADS) {
|
else if (fi.storePayloads && fieldOption == IndexReader.FieldOption.STORES_PAYLOADS) {
|
||||||
|
|
|
@ -27,6 +27,7 @@ import java.util.TreeMap;
|
||||||
import org.apache.lucene.index.DocsAndPositionsEnum;
|
import org.apache.lucene.index.DocsAndPositionsEnum;
|
||||||
import org.apache.lucene.index.DocsEnum;
|
import org.apache.lucene.index.DocsEnum;
|
||||||
import org.apache.lucene.index.FieldInfo;
|
import org.apache.lucene.index.FieldInfo;
|
||||||
|
import org.apache.lucene.index.FieldInfo.IndexOptions;
|
||||||
import org.apache.lucene.index.FieldInfos;
|
import org.apache.lucene.index.FieldInfos;
|
||||||
import org.apache.lucene.index.FieldsEnum;
|
import org.apache.lucene.index.FieldsEnum;
|
||||||
import org.apache.lucene.index.IndexFileNames;
|
import org.apache.lucene.index.IndexFileNames;
|
||||||
|
@ -136,7 +137,7 @@ public class BlockTermsReader extends FieldsProducer {
|
||||||
assert numTerms >= 0;
|
assert numTerms >= 0;
|
||||||
final long termsStartPointer = in.readVLong();
|
final long termsStartPointer = in.readVLong();
|
||||||
final FieldInfo fieldInfo = fieldInfos.fieldInfo(field);
|
final FieldInfo fieldInfo = fieldInfos.fieldInfo(field);
|
||||||
final long sumTotalTermFreq = fieldInfo.omitTermFreqAndPositions ? -1 : in.readVLong();
|
final long sumTotalTermFreq = fieldInfo.indexOptions == IndexOptions.DOCS_ONLY ? -1 : in.readVLong();
|
||||||
final long sumDocFreq = in.readVLong();
|
final long sumDocFreq = in.readVLong();
|
||||||
assert !fields.containsKey(fieldInfo.name);
|
assert !fields.containsKey(fieldInfo.name);
|
||||||
fields.put(fieldInfo.name, new FieldReader(fieldInfo, numTerms, termsStartPointer, sumTotalTermFreq, sumDocFreq));
|
fields.put(fieldInfo.name, new FieldReader(fieldInfo, numTerms, termsStartPointer, sumTotalTermFreq, sumDocFreq));
|
||||||
|
@ -709,7 +710,7 @@ public class BlockTermsReader extends FieldsProducer {
|
||||||
public DocsAndPositionsEnum docsAndPositions(Bits liveDocs, DocsAndPositionsEnum reuse) throws IOException {
|
public DocsAndPositionsEnum docsAndPositions(Bits liveDocs, DocsAndPositionsEnum reuse) throws IOException {
|
||||||
//System.out.println("BTR.d&p this=" + this);
|
//System.out.println("BTR.d&p this=" + this);
|
||||||
decodeMetaData();
|
decodeMetaData();
|
||||||
if (fieldInfo.omitTermFreqAndPositions) {
|
if (fieldInfo.indexOptions != IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) {
|
||||||
return null;
|
return null;
|
||||||
} else {
|
} else {
|
||||||
DocsAndPositionsEnum dpe = postingsReader.docsAndPositions(fieldInfo, state, liveDocs, reuse);
|
DocsAndPositionsEnum dpe = postingsReader.docsAndPositions(fieldInfo, state, liveDocs, reuse);
|
||||||
|
@ -867,7 +868,7 @@ public class BlockTermsReader extends FieldsProducer {
|
||||||
// just skipN here:
|
// just skipN here:
|
||||||
state.docFreq = freqReader.readVInt();
|
state.docFreq = freqReader.readVInt();
|
||||||
//System.out.println(" dF=" + state.docFreq);
|
//System.out.println(" dF=" + state.docFreq);
|
||||||
if (!fieldInfo.omitTermFreqAndPositions) {
|
if (fieldInfo.indexOptions != IndexOptions.DOCS_ONLY) {
|
||||||
state.totalTermFreq = state.docFreq + freqReader.readVLong();
|
state.totalTermFreq = state.docFreq + freqReader.readVLong();
|
||||||
//System.out.println(" totTF=" + state.totalTermFreq);
|
//System.out.println(" totTF=" + state.totalTermFreq);
|
||||||
}
|
}
|
||||||
|
|
|
@ -23,6 +23,7 @@ import java.util.Comparator;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
|
|
||||||
import org.apache.lucene.index.FieldInfo;
|
import org.apache.lucene.index.FieldInfo;
|
||||||
|
import org.apache.lucene.index.FieldInfo.IndexOptions;
|
||||||
import org.apache.lucene.index.FieldInfos;
|
import org.apache.lucene.index.FieldInfos;
|
||||||
import org.apache.lucene.index.IndexFileNames;
|
import org.apache.lucene.index.IndexFileNames;
|
||||||
import org.apache.lucene.index.SegmentWriteState;
|
import org.apache.lucene.index.SegmentWriteState;
|
||||||
|
@ -129,7 +130,7 @@ public class BlockTermsWriter extends FieldsConsumer {
|
||||||
out.writeVInt(field.fieldInfo.number);
|
out.writeVInt(field.fieldInfo.number);
|
||||||
out.writeVLong(field.numTerms);
|
out.writeVLong(field.numTerms);
|
||||||
out.writeVLong(field.termsStartPointer);
|
out.writeVLong(field.termsStartPointer);
|
||||||
if (!field.fieldInfo.omitTermFreqAndPositions) {
|
if (field.fieldInfo.indexOptions != IndexOptions.DOCS_ONLY) {
|
||||||
out.writeVLong(field.sumTotalTermFreq);
|
out.writeVLong(field.sumTotalTermFreq);
|
||||||
}
|
}
|
||||||
out.writeVLong(field.sumDocFreq);
|
out.writeVLong(field.sumDocFreq);
|
||||||
|
@ -298,7 +299,7 @@ public class BlockTermsWriter extends FieldsConsumer {
|
||||||
final TermStats stats = pendingTerms[termCount].stats;
|
final TermStats stats = pendingTerms[termCount].stats;
|
||||||
assert stats != null;
|
assert stats != null;
|
||||||
bytesWriter.writeVInt(stats.docFreq);
|
bytesWriter.writeVInt(stats.docFreq);
|
||||||
if (!fieldInfo.omitTermFreqAndPositions) {
|
if (fieldInfo.indexOptions != IndexOptions.DOCS_ONLY) {
|
||||||
bytesWriter.writeVLong(stats.totalTermFreq-stats.docFreq);
|
bytesWriter.writeVLong(stats.totalTermFreq-stats.docFreq);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -21,6 +21,7 @@ import java.io.IOException;
|
||||||
|
|
||||||
import org.apache.lucene.index.DocsAndPositionsEnum;
|
import org.apache.lucene.index.DocsAndPositionsEnum;
|
||||||
import org.apache.lucene.index.DocsEnum;
|
import org.apache.lucene.index.DocsEnum;
|
||||||
|
import org.apache.lucene.index.FieldInfo.IndexOptions;
|
||||||
import org.apache.lucene.search.DocIdSetIterator;
|
import org.apache.lucene.search.DocIdSetIterator;
|
||||||
import org.apache.lucene.util.BytesRef;
|
import org.apache.lucene.util.BytesRef;
|
||||||
|
|
||||||
|
@ -60,16 +61,17 @@ public abstract class PostingsConsumer {
|
||||||
int df = 0;
|
int df = 0;
|
||||||
long totTF = 0;
|
long totTF = 0;
|
||||||
|
|
||||||
if (mergeState.fieldInfo.omitTermFreqAndPositions) {
|
if (mergeState.fieldInfo.indexOptions != IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) {
|
||||||
while(true) {
|
while(true) {
|
||||||
final int doc = postings.nextDoc();
|
final int doc = postings.nextDoc();
|
||||||
if (doc == DocIdSetIterator.NO_MORE_DOCS) {
|
if (doc == DocIdSetIterator.NO_MORE_DOCS) {
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
this.startDoc(doc, postings.freq());
|
final int freq = postings.freq();
|
||||||
|
this.startDoc(doc, freq);
|
||||||
this.finishDoc();
|
this.finishDoc();
|
||||||
df++;
|
df++;
|
||||||
totTF++;
|
totTF += freq;
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
final DocsAndPositionsEnum postingsEnum = (DocsAndPositionsEnum) postings;
|
final DocsAndPositionsEnum postingsEnum = (DocsAndPositionsEnum) postings;
|
||||||
|
|
|
@ -20,6 +20,7 @@ package org.apache.lucene.index.codecs;
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.util.Comparator;
|
import java.util.Comparator;
|
||||||
|
|
||||||
|
import org.apache.lucene.index.FieldInfo.IndexOptions;
|
||||||
import org.apache.lucene.index.TermsEnum;
|
import org.apache.lucene.index.TermsEnum;
|
||||||
import org.apache.lucene.index.MultiDocsEnum;
|
import org.apache.lucene.index.MultiDocsEnum;
|
||||||
import org.apache.lucene.index.MultiDocsAndPositionsEnum;
|
import org.apache.lucene.index.MultiDocsAndPositionsEnum;
|
||||||
|
@ -59,7 +60,7 @@ public abstract class TermsConsumer {
|
||||||
long sumDocFreq = 0;
|
long sumDocFreq = 0;
|
||||||
long sumDFsinceLastAbortCheck = 0;
|
long sumDFsinceLastAbortCheck = 0;
|
||||||
|
|
||||||
if (mergeState.fieldInfo.omitTermFreqAndPositions) {
|
if (mergeState.fieldInfo.indexOptions != IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) {
|
||||||
if (docsEnum == null) {
|
if (docsEnum == null) {
|
||||||
docsEnum = new MappingMultiDocsEnum();
|
docsEnum = new MappingMultiDocsEnum();
|
||||||
}
|
}
|
||||||
|
@ -75,6 +76,7 @@ public abstract class TermsConsumer {
|
||||||
final TermStats stats = postingsConsumer.merge(mergeState, docsEnum);
|
final TermStats stats = postingsConsumer.merge(mergeState, docsEnum);
|
||||||
if (stats.docFreq > 0) {
|
if (stats.docFreq > 0) {
|
||||||
finishTerm(term, stats);
|
finishTerm(term, stats);
|
||||||
|
sumTotalTermFreq += stats.totalTermFreq;
|
||||||
sumDFsinceLastAbortCheck += stats.docFreq;
|
sumDFsinceLastAbortCheck += stats.docFreq;
|
||||||
sumDocFreq += stats.docFreq;
|
sumDocFreq += stats.docFreq;
|
||||||
if (sumDFsinceLastAbortCheck > 60000) {
|
if (sumDFsinceLastAbortCheck > 60000) {
|
||||||
|
|
|
@ -27,6 +27,7 @@ import java.util.TreeMap;
|
||||||
import org.apache.lucene.index.DocsAndPositionsEnum;
|
import org.apache.lucene.index.DocsAndPositionsEnum;
|
||||||
import org.apache.lucene.index.DocsEnum;
|
import org.apache.lucene.index.DocsEnum;
|
||||||
import org.apache.lucene.index.FieldInfo;
|
import org.apache.lucene.index.FieldInfo;
|
||||||
|
import org.apache.lucene.index.FieldInfo.IndexOptions;
|
||||||
import org.apache.lucene.index.FieldInfos;
|
import org.apache.lucene.index.FieldInfos;
|
||||||
import org.apache.lucene.index.FieldsEnum;
|
import org.apache.lucene.index.FieldsEnum;
|
||||||
import org.apache.lucene.index.IndexFileNames;
|
import org.apache.lucene.index.IndexFileNames;
|
||||||
|
@ -118,7 +119,7 @@ public class MemoryCodec extends Codec {
|
||||||
lastDocID = docID;
|
lastDocID = docID;
|
||||||
docCount++;
|
docCount++;
|
||||||
|
|
||||||
if (field.omitTermFreqAndPositions) {
|
if (field.indexOptions == IndexOptions.DOCS_ONLY) {
|
||||||
buffer.writeVInt(delta);
|
buffer.writeVInt(delta);
|
||||||
} else if (termDocFreq == 1) {
|
} else if (termDocFreq == 1) {
|
||||||
buffer.writeVInt((delta<<1) | 1);
|
buffer.writeVInt((delta<<1) | 1);
|
||||||
|
@ -192,7 +193,7 @@ public class MemoryCodec extends Codec {
|
||||||
assert buffer2.getFilePointer() == 0;
|
assert buffer2.getFilePointer() == 0;
|
||||||
|
|
||||||
buffer2.writeVInt(stats.docFreq);
|
buffer2.writeVInt(stats.docFreq);
|
||||||
if (!field.omitTermFreqAndPositions) {
|
if (field.indexOptions != IndexOptions.DOCS_ONLY) {
|
||||||
buffer2.writeVLong(stats.totalTermFreq-stats.docFreq);
|
buffer2.writeVLong(stats.totalTermFreq-stats.docFreq);
|
||||||
}
|
}
|
||||||
int pos = (int) buffer2.getFilePointer();
|
int pos = (int) buffer2.getFilePointer();
|
||||||
|
@ -223,7 +224,7 @@ public class MemoryCodec extends Codec {
|
||||||
if (termCount > 0) {
|
if (termCount > 0) {
|
||||||
out.writeVInt(termCount);
|
out.writeVInt(termCount);
|
||||||
out.writeVInt(field.number);
|
out.writeVInt(field.number);
|
||||||
if (!field.omitTermFreqAndPositions) {
|
if (field.indexOptions != IndexOptions.DOCS_ONLY) {
|
||||||
out.writeVLong(sumTotalTermFreq);
|
out.writeVLong(sumTotalTermFreq);
|
||||||
}
|
}
|
||||||
out.writeVLong(sumDocFreq);
|
out.writeVLong(sumDocFreq);
|
||||||
|
@ -266,7 +267,7 @@ public class MemoryCodec extends Codec {
|
||||||
}
|
}
|
||||||
|
|
||||||
private final static class FSTDocsEnum extends DocsEnum {
|
private final static class FSTDocsEnum extends DocsEnum {
|
||||||
private final boolean omitTFAP;
|
private final IndexOptions indexOptions;
|
||||||
private final boolean storePayloads;
|
private final boolean storePayloads;
|
||||||
private byte[] buffer = new byte[16];
|
private byte[] buffer = new byte[16];
|
||||||
private final ByteArrayDataInput in = new ByteArrayDataInput(buffer);
|
private final ByteArrayDataInput in = new ByteArrayDataInput(buffer);
|
||||||
|
@ -278,13 +279,13 @@ public class MemoryCodec extends Codec {
|
||||||
private int payloadLen;
|
private int payloadLen;
|
||||||
private int numDocs;
|
private int numDocs;
|
||||||
|
|
||||||
public FSTDocsEnum(boolean omitTFAP, boolean storePayloads) {
|
public FSTDocsEnum(IndexOptions indexOptions, boolean storePayloads) {
|
||||||
this.omitTFAP = omitTFAP;
|
this.indexOptions = indexOptions;
|
||||||
this.storePayloads = storePayloads;
|
this.storePayloads = storePayloads;
|
||||||
}
|
}
|
||||||
|
|
||||||
public boolean canReuse(boolean omitTFAP, boolean storePayloads) {
|
public boolean canReuse(IndexOptions indexOptions, boolean storePayloads) {
|
||||||
return omitTFAP == this.omitTFAP && storePayloads == this.storePayloads;
|
return indexOptions == this.indexOptions && storePayloads == this.storePayloads;
|
||||||
}
|
}
|
||||||
|
|
||||||
public FSTDocsEnum reset(BytesRef bufferIn, Bits liveDocs, int numDocs) {
|
public FSTDocsEnum reset(BytesRef bufferIn, Bits liveDocs, int numDocs) {
|
||||||
|
@ -313,7 +314,7 @@ public class MemoryCodec extends Codec {
|
||||||
return docID = NO_MORE_DOCS;
|
return docID = NO_MORE_DOCS;
|
||||||
}
|
}
|
||||||
docUpto++;
|
docUpto++;
|
||||||
if (omitTFAP) {
|
if (indexOptions == IndexOptions.DOCS_ONLY) {
|
||||||
docID += in.readVInt();
|
docID += in.readVInt();
|
||||||
freq = 1;
|
freq = 1;
|
||||||
} else {
|
} else {
|
||||||
|
@ -327,16 +328,18 @@ public class MemoryCodec extends Codec {
|
||||||
assert freq > 0;
|
assert freq > 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Skip positions
|
if (indexOptions == IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) {
|
||||||
for(int posUpto=0;posUpto<freq;posUpto++) {
|
// Skip positions
|
||||||
if (!storePayloads) {
|
for(int posUpto=0;posUpto<freq;posUpto++) {
|
||||||
in.readVInt();
|
if (!storePayloads) {
|
||||||
} else {
|
in.readVInt();
|
||||||
final int posCode = in.readVInt();
|
} else {
|
||||||
if ((posCode & 1) != 0) {
|
final int posCode = in.readVInt();
|
||||||
payloadLen = in.readVInt();
|
if ((posCode & 1) != 0) {
|
||||||
|
payloadLen = in.readVInt();
|
||||||
|
}
|
||||||
|
in.skipBytes(payloadLen);
|
||||||
}
|
}
|
||||||
in.skipBytes(payloadLen);
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -432,7 +435,7 @@ public class MemoryCodec extends Codec {
|
||||||
return docID = NO_MORE_DOCS;
|
return docID = NO_MORE_DOCS;
|
||||||
}
|
}
|
||||||
docUpto++;
|
docUpto++;
|
||||||
|
|
||||||
final int code = in.readVInt();
|
final int code = in.readVInt();
|
||||||
docID += code >>> 1;
|
docID += code >>> 1;
|
||||||
if ((code & 1) != 0) {
|
if ((code & 1) != 0) {
|
||||||
|
@ -454,8 +457,8 @@ public class MemoryCodec extends Codec {
|
||||||
if (!storePayloads) {
|
if (!storePayloads) {
|
||||||
in.readVInt();
|
in.readVInt();
|
||||||
} else {
|
} else {
|
||||||
final int codeSkip = in.readVInt();
|
final int skipCode = in.readVInt();
|
||||||
if ((codeSkip & 1) != 0) {
|
if ((skipCode & 1) != 0) {
|
||||||
payloadLength = in.readVInt();
|
payloadLength = in.readVInt();
|
||||||
if (VERBOSE) System.out.println(" new payloadLen=" + payloadLength);
|
if (VERBOSE) System.out.println(" new payloadLen=" + payloadLength);
|
||||||
}
|
}
|
||||||
|
@ -548,7 +551,7 @@ public class MemoryCodec extends Codec {
|
||||||
if (!didDecode) {
|
if (!didDecode) {
|
||||||
buffer.reset(current.output.bytes, 0, current.output.length);
|
buffer.reset(current.output.bytes, 0, current.output.length);
|
||||||
docFreq = buffer.readVInt();
|
docFreq = buffer.readVInt();
|
||||||
if (!field.omitTermFreqAndPositions) {
|
if (field.indexOptions != IndexOptions.DOCS_ONLY) {
|
||||||
totalTermFreq = docFreq + buffer.readVLong();
|
totalTermFreq = docFreq + buffer.readVLong();
|
||||||
} else {
|
} else {
|
||||||
totalTermFreq = 0;
|
totalTermFreq = 0;
|
||||||
|
@ -598,11 +601,11 @@ public class MemoryCodec extends Codec {
|
||||||
decodeMetaData();
|
decodeMetaData();
|
||||||
FSTDocsEnum docsEnum;
|
FSTDocsEnum docsEnum;
|
||||||
if (reuse == null || !(reuse instanceof FSTDocsEnum)) {
|
if (reuse == null || !(reuse instanceof FSTDocsEnum)) {
|
||||||
docsEnum = new FSTDocsEnum(field.omitTermFreqAndPositions, field.storePayloads);
|
docsEnum = new FSTDocsEnum(field.indexOptions, field.storePayloads);
|
||||||
} else {
|
} else {
|
||||||
docsEnum = (FSTDocsEnum) reuse;
|
docsEnum = (FSTDocsEnum) reuse;
|
||||||
if (!docsEnum.canReuse(field.omitTermFreqAndPositions, field.storePayloads)) {
|
if (!docsEnum.canReuse(field.indexOptions, field.storePayloads)) {
|
||||||
docsEnum = new FSTDocsEnum(field.omitTermFreqAndPositions, field.storePayloads);
|
docsEnum = new FSTDocsEnum(field.indexOptions, field.storePayloads);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
return docsEnum.reset(current.output, liveDocs, docFreq);
|
return docsEnum.reset(current.output, liveDocs, docFreq);
|
||||||
|
@ -610,7 +613,7 @@ public class MemoryCodec extends Codec {
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public DocsAndPositionsEnum docsAndPositions(Bits liveDocs, DocsAndPositionsEnum reuse) throws IOException {
|
public DocsAndPositionsEnum docsAndPositions(Bits liveDocs, DocsAndPositionsEnum reuse) throws IOException {
|
||||||
if (field.omitTermFreqAndPositions) {
|
if (field.indexOptions != IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) {
|
||||||
return null;
|
return null;
|
||||||
}
|
}
|
||||||
decodeMetaData();
|
decodeMetaData();
|
||||||
|
@ -686,7 +689,7 @@ public class MemoryCodec extends Codec {
|
||||||
public TermsReader(FieldInfos fieldInfos, IndexInput in) throws IOException {
|
public TermsReader(FieldInfos fieldInfos, IndexInput in) throws IOException {
|
||||||
final int fieldNumber = in.readVInt();
|
final int fieldNumber = in.readVInt();
|
||||||
field = fieldInfos.fieldInfo(fieldNumber);
|
field = fieldInfos.fieldInfo(fieldNumber);
|
||||||
if (!field.omitTermFreqAndPositions) {
|
if (field.indexOptions != IndexOptions.DOCS_ONLY) {
|
||||||
sumTotalTermFreq = in.readVLong();
|
sumTotalTermFreq = in.readVLong();
|
||||||
} else {
|
} else {
|
||||||
sumTotalTermFreq = 0;
|
sumTotalTermFreq = 0;
|
||||||
|
|
|
@ -25,9 +25,11 @@ import java.util.Iterator;
|
||||||
import java.util.Map;
|
import java.util.Map;
|
||||||
import java.util.TreeMap;
|
import java.util.TreeMap;
|
||||||
|
|
||||||
|
import org.apache.lucene.index.CorruptIndexException;
|
||||||
import org.apache.lucene.index.DocsAndPositionsEnum;
|
import org.apache.lucene.index.DocsAndPositionsEnum;
|
||||||
import org.apache.lucene.index.DocsEnum;
|
import org.apache.lucene.index.DocsEnum;
|
||||||
import org.apache.lucene.index.FieldInfo;
|
import org.apache.lucene.index.FieldInfo;
|
||||||
|
import org.apache.lucene.index.FieldInfo.IndexOptions;
|
||||||
import org.apache.lucene.index.FieldInfos;
|
import org.apache.lucene.index.FieldInfos;
|
||||||
import org.apache.lucene.index.FieldsEnum;
|
import org.apache.lucene.index.FieldsEnum;
|
||||||
import org.apache.lucene.index.IndexFileNames;
|
import org.apache.lucene.index.IndexFileNames;
|
||||||
|
@ -99,7 +101,7 @@ public class PreFlexFields extends FieldsProducer {
|
||||||
if (fi.isIndexed) {
|
if (fi.isIndexed) {
|
||||||
fields.put(fi.name, fi);
|
fields.put(fi.name, fi);
|
||||||
preTerms.put(fi.name, new PreTerms(fi));
|
preTerms.put(fi.name, new PreTerms(fi));
|
||||||
if (!fi.omitTermFreqAndPositions) {
|
if (fi.indexOptions == IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) {
|
||||||
anyProx = true;
|
anyProx = true;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -973,7 +975,7 @@ public class PreFlexFields extends FieldsProducer {
|
||||||
@Override
|
@Override
|
||||||
public DocsAndPositionsEnum docsAndPositions(Bits liveDocs, DocsAndPositionsEnum reuse) throws IOException {
|
public DocsAndPositionsEnum docsAndPositions(Bits liveDocs, DocsAndPositionsEnum reuse) throws IOException {
|
||||||
PreDocsAndPositionsEnum docsPosEnum;
|
PreDocsAndPositionsEnum docsPosEnum;
|
||||||
if (fieldInfo.omitTermFreqAndPositions) {
|
if (fieldInfo.indexOptions != IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) {
|
||||||
return null;
|
return null;
|
||||||
} else if (reuse == null || !(reuse instanceof PreDocsAndPositionsEnum)) {
|
} else if (reuse == null || !(reuse instanceof PreDocsAndPositionsEnum)) {
|
||||||
docsPosEnum = new PreDocsAndPositionsEnum();
|
docsPosEnum = new PreDocsAndPositionsEnum();
|
||||||
|
|
|
@ -20,6 +20,7 @@ package org.apache.lucene.index.codecs.preflex;
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
|
|
||||||
import org.apache.lucene.index.FieldInfo;
|
import org.apache.lucene.index.FieldInfo;
|
||||||
|
import org.apache.lucene.index.FieldInfo.IndexOptions;
|
||||||
import org.apache.lucene.index.FieldInfos;
|
import org.apache.lucene.index.FieldInfos;
|
||||||
import org.apache.lucene.index.Term;
|
import org.apache.lucene.index.Term;
|
||||||
import org.apache.lucene.index.codecs.standard.DefaultSkipListReader;
|
import org.apache.lucene.index.codecs.standard.DefaultSkipListReader;
|
||||||
|
@ -51,7 +52,7 @@ public class SegmentTermDocs {
|
||||||
private boolean haveSkipped;
|
private boolean haveSkipped;
|
||||||
|
|
||||||
protected boolean currentFieldStoresPayloads;
|
protected boolean currentFieldStoresPayloads;
|
||||||
protected boolean currentFieldOmitTermFreqAndPositions;
|
protected IndexOptions indexOptions;
|
||||||
|
|
||||||
public SegmentTermDocs(IndexInput freqStream, TermInfosReader tis, FieldInfos fieldInfos) {
|
public SegmentTermDocs(IndexInput freqStream, TermInfosReader tis, FieldInfos fieldInfos) {
|
||||||
this.freqStream = (IndexInput) freqStream.clone();
|
this.freqStream = (IndexInput) freqStream.clone();
|
||||||
|
@ -89,7 +90,7 @@ public class SegmentTermDocs {
|
||||||
void seek(TermInfo ti, Term term) throws IOException {
|
void seek(TermInfo ti, Term term) throws IOException {
|
||||||
count = 0;
|
count = 0;
|
||||||
FieldInfo fi = fieldInfos.fieldInfo(term.field());
|
FieldInfo fi = fieldInfos.fieldInfo(term.field());
|
||||||
currentFieldOmitTermFreqAndPositions = (fi != null) ? fi.omitTermFreqAndPositions : false;
|
this.indexOptions = (fi != null) ? fi.indexOptions : IndexOptions.DOCS_AND_FREQS_AND_POSITIONS;
|
||||||
currentFieldStoresPayloads = (fi != null) ? fi.storePayloads : false;
|
currentFieldStoresPayloads = (fi != null) ? fi.storePayloads : false;
|
||||||
if (ti == null) {
|
if (ti == null) {
|
||||||
df = 0;
|
df = 0;
|
||||||
|
@ -122,7 +123,7 @@ public class SegmentTermDocs {
|
||||||
return false;
|
return false;
|
||||||
final int docCode = freqStream.readVInt();
|
final int docCode = freqStream.readVInt();
|
||||||
|
|
||||||
if (currentFieldOmitTermFreqAndPositions) {
|
if (indexOptions == IndexOptions.DOCS_ONLY) {
|
||||||
doc += docCode;
|
doc += docCode;
|
||||||
freq = 1;
|
freq = 1;
|
||||||
} else {
|
} else {
|
||||||
|
@ -149,7 +150,7 @@ public class SegmentTermDocs {
|
||||||
public int read(final int[] docs, final int[] freqs)
|
public int read(final int[] docs, final int[] freqs)
|
||||||
throws IOException {
|
throws IOException {
|
||||||
final int length = docs.length;
|
final int length = docs.length;
|
||||||
if (currentFieldOmitTermFreqAndPositions) {
|
if (indexOptions == IndexOptions.DOCS_ONLY) {
|
||||||
return readNoTf(docs, freqs, length);
|
return readNoTf(docs, freqs, length);
|
||||||
} else {
|
} else {
|
||||||
int i = 0;
|
int i = 0;
|
||||||
|
|
|
@ -21,6 +21,7 @@ import java.io.IOException;
|
||||||
|
|
||||||
import org.apache.lucene.index.FieldInfos;
|
import org.apache.lucene.index.FieldInfos;
|
||||||
import org.apache.lucene.index.Term;
|
import org.apache.lucene.index.Term;
|
||||||
|
import org.apache.lucene.index.FieldInfo.IndexOptions;
|
||||||
import org.apache.lucene.store.IndexInput;
|
import org.apache.lucene.store.IndexInput;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -77,8 +78,8 @@ extends SegmentTermDocs {
|
||||||
}
|
}
|
||||||
|
|
||||||
public final int nextPosition() throws IOException {
|
public final int nextPosition() throws IOException {
|
||||||
if (currentFieldOmitTermFreqAndPositions)
|
if (indexOptions != IndexOptions.DOCS_AND_FREQS_AND_POSITIONS)
|
||||||
// This field does not store term freq, positions, payloads
|
// This field does not store positions, payloads
|
||||||
return 0;
|
return 0;
|
||||||
// perform lazy skips if necessary
|
// perform lazy skips if necessary
|
||||||
lazySkip();
|
lazySkip();
|
||||||
|
@ -140,7 +141,7 @@ extends SegmentTermDocs {
|
||||||
}
|
}
|
||||||
|
|
||||||
private void skipPositions(int n) throws IOException {
|
private void skipPositions(int n) throws IOException {
|
||||||
assert !currentFieldOmitTermFreqAndPositions;
|
assert indexOptions == IndexOptions.DOCS_AND_FREQS_AND_POSITIONS;
|
||||||
for (int f = n; f > 0; f--) { // skip unread positions
|
for (int f = n; f > 0; f--) { // skip unread positions
|
||||||
readDeltaPosition();
|
readDeltaPosition();
|
||||||
skipPayload();
|
skipPayload();
|
||||||
|
|
|
@ -22,6 +22,7 @@ import java.io.IOException;
|
||||||
import org.apache.lucene.index.DocsAndPositionsEnum;
|
import org.apache.lucene.index.DocsAndPositionsEnum;
|
||||||
import org.apache.lucene.index.DocsEnum;
|
import org.apache.lucene.index.DocsEnum;
|
||||||
import org.apache.lucene.index.FieldInfo;
|
import org.apache.lucene.index.FieldInfo;
|
||||||
|
import org.apache.lucene.index.FieldInfo.IndexOptions;
|
||||||
import org.apache.lucene.index.TermState;
|
import org.apache.lucene.index.TermState;
|
||||||
import org.apache.lucene.index.codecs.PostingsReaderBase;
|
import org.apache.lucene.index.codecs.PostingsReaderBase;
|
||||||
import org.apache.lucene.index.codecs.BlockTermState;
|
import org.apache.lucene.index.codecs.BlockTermState;
|
||||||
|
@ -134,8 +135,8 @@ public class PulsingPostingsReaderImpl extends PostingsReaderBase {
|
||||||
//System.out.println("PR nextTerm");
|
//System.out.println("PR nextTerm");
|
||||||
PulsingTermState termState = (PulsingTermState) _termState;
|
PulsingTermState termState = (PulsingTermState) _termState;
|
||||||
|
|
||||||
// total TF, but in the omitTFAP case its computed based on docFreq.
|
// if we have positions, its total TF, otherwise its computed based on docFreq.
|
||||||
long count = fieldInfo.omitTermFreqAndPositions ? termState.docFreq : termState.totalTermFreq;
|
long count = fieldInfo.indexOptions == IndexOptions.DOCS_AND_FREQS_AND_POSITIONS ? termState.totalTermFreq : termState.docFreq;
|
||||||
//System.out.println(" count=" + count + " threshold=" + maxPositions);
|
//System.out.println(" count=" + count + " threshold=" + maxPositions);
|
||||||
|
|
||||||
if (count <= maxPositions) {
|
if (count <= maxPositions) {
|
||||||
|
@ -193,7 +194,7 @@ public class PulsingPostingsReaderImpl extends PostingsReaderBase {
|
||||||
// TODO: -- not great that we can't always reuse
|
// TODO: -- not great that we can't always reuse
|
||||||
@Override
|
@Override
|
||||||
public DocsAndPositionsEnum docsAndPositions(FieldInfo field, BlockTermState _termState, Bits liveDocs, DocsAndPositionsEnum reuse) throws IOException {
|
public DocsAndPositionsEnum docsAndPositions(FieldInfo field, BlockTermState _termState, Bits liveDocs, DocsAndPositionsEnum reuse) throws IOException {
|
||||||
if (field.omitTermFreqAndPositions) {
|
if (field.indexOptions != IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) {
|
||||||
return null;
|
return null;
|
||||||
}
|
}
|
||||||
//System.out.println("D&P: field=" + field.name);
|
//System.out.println("D&P: field=" + field.name);
|
||||||
|
@ -223,7 +224,7 @@ public class PulsingPostingsReaderImpl extends PostingsReaderBase {
|
||||||
|
|
||||||
private static class PulsingDocsEnum extends DocsEnum {
|
private static class PulsingDocsEnum extends DocsEnum {
|
||||||
private final ByteArrayDataInput postings = new ByteArrayDataInput();
|
private final ByteArrayDataInput postings = new ByteArrayDataInput();
|
||||||
private final boolean omitTF;
|
private final IndexOptions indexOptions;
|
||||||
private final boolean storePayloads;
|
private final boolean storePayloads;
|
||||||
private Bits liveDocs;
|
private Bits liveDocs;
|
||||||
private int docID;
|
private int docID;
|
||||||
|
@ -231,7 +232,7 @@ public class PulsingPostingsReaderImpl extends PostingsReaderBase {
|
||||||
private int payloadLength;
|
private int payloadLength;
|
||||||
|
|
||||||
public PulsingDocsEnum(FieldInfo fieldInfo) {
|
public PulsingDocsEnum(FieldInfo fieldInfo) {
|
||||||
omitTF = fieldInfo.omitTermFreqAndPositions;
|
indexOptions = fieldInfo.indexOptions;
|
||||||
storePayloads = fieldInfo.storePayloads;
|
storePayloads = fieldInfo.storePayloads;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -249,7 +250,7 @@ public class PulsingPostingsReaderImpl extends PostingsReaderBase {
|
||||||
}
|
}
|
||||||
|
|
||||||
boolean canReuse(FieldInfo fieldInfo) {
|
boolean canReuse(FieldInfo fieldInfo) {
|
||||||
return omitTF == fieldInfo.omitTermFreqAndPositions && storePayloads == fieldInfo.storePayloads;
|
return indexOptions == fieldInfo.indexOptions && storePayloads == fieldInfo.storePayloads;
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
@ -262,7 +263,7 @@ public class PulsingPostingsReaderImpl extends PostingsReaderBase {
|
||||||
}
|
}
|
||||||
|
|
||||||
final int code = postings.readVInt();
|
final int code = postings.readVInt();
|
||||||
if (omitTF) {
|
if (indexOptions == IndexOptions.DOCS_ONLY) {
|
||||||
docID += code;
|
docID += code;
|
||||||
} else {
|
} else {
|
||||||
docID += code >>> 1; // shift off low bit
|
docID += code >>> 1; // shift off low bit
|
||||||
|
@ -272,22 +273,24 @@ public class PulsingPostingsReaderImpl extends PostingsReaderBase {
|
||||||
freq = postings.readVInt(); // else read freq
|
freq = postings.readVInt(); // else read freq
|
||||||
}
|
}
|
||||||
|
|
||||||
// Skip positions
|
if (indexOptions == IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) {
|
||||||
if (storePayloads) {
|
// Skip positions
|
||||||
for(int pos=0;pos<freq;pos++) {
|
if (storePayloads) {
|
||||||
final int posCode = postings.readVInt();
|
for(int pos=0;pos<freq;pos++) {
|
||||||
if ((posCode & 1) != 0) {
|
final int posCode = postings.readVInt();
|
||||||
payloadLength = postings.readVInt();
|
if ((posCode & 1) != 0) {
|
||||||
|
payloadLength = postings.readVInt();
|
||||||
|
}
|
||||||
|
if (payloadLength != 0) {
|
||||||
|
postings.skipBytes(payloadLength);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
if (payloadLength != 0) {
|
} else {
|
||||||
postings.skipBytes(payloadLength);
|
for(int pos=0;pos<freq;pos++) {
|
||||||
|
// TODO: skipVInt
|
||||||
|
postings.readVInt();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
} else {
|
|
||||||
for(int pos=0;pos<freq;pos++) {
|
|
||||||
// TODO: skipVInt
|
|
||||||
postings.readVInt();
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -20,6 +20,7 @@ package org.apache.lucene.index.codecs.pulsing;
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
|
|
||||||
import org.apache.lucene.index.FieldInfo;
|
import org.apache.lucene.index.FieldInfo;
|
||||||
|
import org.apache.lucene.index.FieldInfo.IndexOptions;
|
||||||
import org.apache.lucene.index.codecs.PostingsWriterBase;
|
import org.apache.lucene.index.codecs.PostingsWriterBase;
|
||||||
import org.apache.lucene.index.codecs.TermStats;
|
import org.apache.lucene.index.codecs.TermStats;
|
||||||
import org.apache.lucene.store.IndexOutput;
|
import org.apache.lucene.store.IndexOutput;
|
||||||
|
@ -46,7 +47,7 @@ public final class PulsingPostingsWriterImpl extends PostingsWriterBase {
|
||||||
|
|
||||||
private IndexOutput termsOut;
|
private IndexOutput termsOut;
|
||||||
|
|
||||||
private boolean omitTF;
|
private IndexOptions indexOptions;
|
||||||
private boolean storePayloads;
|
private boolean storePayloads;
|
||||||
|
|
||||||
// one entry per position
|
// one entry per position
|
||||||
|
@ -102,7 +103,7 @@ public final class PulsingPostingsWriterImpl extends PostingsWriterBase {
|
||||||
// our parent calls setField whenever the field changes
|
// our parent calls setField whenever the field changes
|
||||||
@Override
|
@Override
|
||||||
public void setField(FieldInfo fieldInfo) {
|
public void setField(FieldInfo fieldInfo) {
|
||||||
omitTF = fieldInfo.omitTermFreqAndPositions;
|
this.indexOptions = fieldInfo.indexOptions;
|
||||||
//System.out.println("PW field=" + fieldInfo.name + " omitTF=" + omitTF);
|
//System.out.println("PW field=" + fieldInfo.name + " omitTF=" + omitTF);
|
||||||
storePayloads = fieldInfo.storePayloads;
|
storePayloads = fieldInfo.storePayloads;
|
||||||
wrappedPostingsWriter.setField(fieldInfo);
|
wrappedPostingsWriter.setField(fieldInfo);
|
||||||
|
@ -123,8 +124,11 @@ public final class PulsingPostingsWriterImpl extends PostingsWriterBase {
|
||||||
assert pendingCount < pending.length;
|
assert pendingCount < pending.length;
|
||||||
currentDoc = pending[pendingCount];
|
currentDoc = pending[pendingCount];
|
||||||
currentDoc.docID = docID;
|
currentDoc.docID = docID;
|
||||||
if (omitTF) {
|
if (indexOptions == IndexOptions.DOCS_ONLY) {
|
||||||
pendingCount++;
|
pendingCount++;
|
||||||
|
} else if (indexOptions == IndexOptions.DOCS_AND_FREQS) {
|
||||||
|
pendingCount++;
|
||||||
|
currentDoc.termFreq = termDocFreq;
|
||||||
} else {
|
} else {
|
||||||
currentDoc.termFreq = termDocFreq;
|
currentDoc.termFreq = termDocFreq;
|
||||||
}
|
}
|
||||||
|
@ -196,7 +200,7 @@ public final class PulsingPostingsWriterImpl extends PostingsWriterBase {
|
||||||
// given codec wants to store other interesting
|
// given codec wants to store other interesting
|
||||||
// stuff, it could use this pulsing codec to do so
|
// stuff, it could use this pulsing codec to do so
|
||||||
|
|
||||||
if (!omitTF) {
|
if (indexOptions == IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) {
|
||||||
int lastDocID = 0;
|
int lastDocID = 0;
|
||||||
int pendingIDX = 0;
|
int pendingIDX = 0;
|
||||||
int lastPayloadLength = -1;
|
int lastPayloadLength = -1;
|
||||||
|
@ -239,7 +243,20 @@ public final class PulsingPostingsWriterImpl extends PostingsWriterBase {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
} else {
|
} else if (indexOptions == IndexOptions.DOCS_AND_FREQS) {
|
||||||
|
int lastDocID = 0;
|
||||||
|
for(int posIDX=0;posIDX<pendingCount;posIDX++) {
|
||||||
|
final Position doc = pending[posIDX];
|
||||||
|
final int delta = doc.docID - lastDocID;
|
||||||
|
if (doc.termFreq == 1) {
|
||||||
|
buffer.writeVInt((delta<<1)|1);
|
||||||
|
} else {
|
||||||
|
buffer.writeVInt(delta<<1);
|
||||||
|
buffer.writeVInt(doc.termFreq);
|
||||||
|
}
|
||||||
|
lastDocID = doc.docID;
|
||||||
|
}
|
||||||
|
} else if (indexOptions == IndexOptions.DOCS_ONLY) {
|
||||||
int lastDocID = 0;
|
int lastDocID = 0;
|
||||||
for(int posIDX=0;posIDX<pendingCount;posIDX++) {
|
for(int posIDX=0;posIDX<pendingCount;posIDX++) {
|
||||||
final Position doc = pending[posIDX];
|
final Position doc = pending[posIDX];
|
||||||
|
@ -282,7 +299,7 @@ public final class PulsingPostingsWriterImpl extends PostingsWriterBase {
|
||||||
wrappedPostingsWriter.startTerm();
|
wrappedPostingsWriter.startTerm();
|
||||||
|
|
||||||
// Flush all buffered docs
|
// Flush all buffered docs
|
||||||
if (!omitTF) {
|
if (indexOptions == IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) {
|
||||||
Position doc = null;
|
Position doc = null;
|
||||||
for(Position pos : pending) {
|
for(Position pos : pending) {
|
||||||
if (doc == null) {
|
if (doc == null) {
|
||||||
|
@ -303,7 +320,7 @@ public final class PulsingPostingsWriterImpl extends PostingsWriterBase {
|
||||||
//wrappedPostingsWriter.finishDoc();
|
//wrappedPostingsWriter.finishDoc();
|
||||||
} else {
|
} else {
|
||||||
for(Position doc : pending) {
|
for(Position doc : pending) {
|
||||||
wrappedPostingsWriter.startDoc(doc.docID, 0);
|
wrappedPostingsWriter.startDoc(doc.docID, indexOptions == IndexOptions.DOCS_ONLY ? 0 : doc.termFreq);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
pendingCount = -1;
|
pendingCount = -1;
|
||||||
|
|
|
@ -23,6 +23,7 @@ import java.util.Collection;
|
||||||
import org.apache.lucene.index.DocsAndPositionsEnum;
|
import org.apache.lucene.index.DocsAndPositionsEnum;
|
||||||
import org.apache.lucene.index.DocsEnum;
|
import org.apache.lucene.index.DocsEnum;
|
||||||
import org.apache.lucene.index.FieldInfo;
|
import org.apache.lucene.index.FieldInfo;
|
||||||
|
import org.apache.lucene.index.FieldInfo.IndexOptions;
|
||||||
import org.apache.lucene.index.IndexFileNames;
|
import org.apache.lucene.index.IndexFileNames;
|
||||||
import org.apache.lucene.index.SegmentInfo;
|
import org.apache.lucene.index.SegmentInfo;
|
||||||
import org.apache.lucene.index.TermState;
|
import org.apache.lucene.index.TermState;
|
||||||
|
@ -68,14 +69,17 @@ public class SepPostingsReaderImpl extends PostingsReaderBase {
|
||||||
|
|
||||||
skipIn = dir.openInput(IndexFileNames.segmentFileName(segmentInfo.name, codecId, SepPostingsWriterImpl.SKIP_EXTENSION), context);
|
skipIn = dir.openInput(IndexFileNames.segmentFileName(segmentInfo.name, codecId, SepPostingsWriterImpl.SKIP_EXTENSION), context);
|
||||||
|
|
||||||
|
if (segmentInfo.getFieldInfos().hasFreq()) {
|
||||||
|
freqIn = intFactory.openInput(dir, IndexFileNames.segmentFileName(segmentInfo.name, codecId, SepPostingsWriterImpl.FREQ_EXTENSION), context);
|
||||||
|
} else {
|
||||||
|
freqIn = null;
|
||||||
|
}
|
||||||
if (segmentInfo.getHasProx()) {
|
if (segmentInfo.getHasProx()) {
|
||||||
freqIn = intFactory.openInput(dir, IndexFileNames.segmentFileName(segmentInfo.name, codecId, SepPostingsWriterImpl.FREQ_EXTENSION), context);
|
|
||||||
posIn = intFactory.openInput(dir, IndexFileNames.segmentFileName(segmentInfo.name, codecId, SepPostingsWriterImpl.POS_EXTENSION), context);
|
posIn = intFactory.openInput(dir, IndexFileNames.segmentFileName(segmentInfo.name, codecId, SepPostingsWriterImpl.POS_EXTENSION), context);
|
||||||
payloadIn = dir.openInput(IndexFileNames.segmentFileName(segmentInfo.name, codecId, SepPostingsWriterImpl.PAYLOAD_EXTENSION), context);
|
payloadIn = dir.openInput(IndexFileNames.segmentFileName(segmentInfo.name, codecId, SepPostingsWriterImpl.PAYLOAD_EXTENSION), context);
|
||||||
} else {
|
} else {
|
||||||
posIn = null;
|
posIn = null;
|
||||||
payloadIn = null;
|
payloadIn = null;
|
||||||
freqIn = null;
|
|
||||||
}
|
}
|
||||||
success = true;
|
success = true;
|
||||||
} finally {
|
} finally {
|
||||||
|
@ -89,8 +93,11 @@ public class SepPostingsReaderImpl extends PostingsReaderBase {
|
||||||
files.add(IndexFileNames.segmentFileName(segmentInfo.name, codecId, SepPostingsWriterImpl.DOC_EXTENSION));
|
files.add(IndexFileNames.segmentFileName(segmentInfo.name, codecId, SepPostingsWriterImpl.DOC_EXTENSION));
|
||||||
files.add(IndexFileNames.segmentFileName(segmentInfo.name, codecId, SepPostingsWriterImpl.SKIP_EXTENSION));
|
files.add(IndexFileNames.segmentFileName(segmentInfo.name, codecId, SepPostingsWriterImpl.SKIP_EXTENSION));
|
||||||
|
|
||||||
if (segmentInfo.getHasProx()) {
|
if (segmentInfo.getFieldInfos().hasFreq()) {
|
||||||
files.add(IndexFileNames.segmentFileName(segmentInfo.name, codecId, SepPostingsWriterImpl.FREQ_EXTENSION));
|
files.add(IndexFileNames.segmentFileName(segmentInfo.name, codecId, SepPostingsWriterImpl.FREQ_EXTENSION));
|
||||||
|
}
|
||||||
|
|
||||||
|
if (segmentInfo.getHasProx()) {
|
||||||
files.add(IndexFileNames.segmentFileName(segmentInfo.name, codecId, SepPostingsWriterImpl.POS_EXTENSION));
|
files.add(IndexFileNames.segmentFileName(segmentInfo.name, codecId, SepPostingsWriterImpl.POS_EXTENSION));
|
||||||
files.add(IndexFileNames.segmentFileName(segmentInfo.name, codecId, SepPostingsWriterImpl.PAYLOAD_EXTENSION));
|
files.add(IndexFileNames.segmentFileName(segmentInfo.name, codecId, SepPostingsWriterImpl.PAYLOAD_EXTENSION));
|
||||||
}
|
}
|
||||||
|
@ -229,8 +236,11 @@ public class SepPostingsReaderImpl extends PostingsReaderBase {
|
||||||
final boolean isFirstTerm = termState.termCount == 0;
|
final boolean isFirstTerm = termState.termCount == 0;
|
||||||
termState.docIndex.read(termState.bytesReader, isFirstTerm);
|
termState.docIndex.read(termState.bytesReader, isFirstTerm);
|
||||||
//System.out.println(" docIndex=" + termState.docIndex);
|
//System.out.println(" docIndex=" + termState.docIndex);
|
||||||
if (!fieldInfo.omitTermFreqAndPositions) {
|
if (fieldInfo.indexOptions != IndexOptions.DOCS_ONLY) {
|
||||||
termState.freqIndex.read(termState.bytesReader, isFirstTerm);
|
termState.freqIndex.read(termState.bytesReader, isFirstTerm);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (fieldInfo.indexOptions == IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) {
|
||||||
//System.out.println(" freqIndex=" + termState.freqIndex);
|
//System.out.println(" freqIndex=" + termState.freqIndex);
|
||||||
termState.posIndex.read(termState.bytesReader, isFirstTerm);
|
termState.posIndex.read(termState.bytesReader, isFirstTerm);
|
||||||
//System.out.println(" posIndex=" + termState.posIndex);
|
//System.out.println(" posIndex=" + termState.posIndex);
|
||||||
|
@ -277,7 +287,7 @@ public class SepPostingsReaderImpl extends PostingsReaderBase {
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public DocsAndPositionsEnum docsAndPositions(FieldInfo fieldInfo, BlockTermState _termState, Bits liveDocs, DocsAndPositionsEnum reuse) throws IOException {
|
public DocsAndPositionsEnum docsAndPositions(FieldInfo fieldInfo, BlockTermState _termState, Bits liveDocs, DocsAndPositionsEnum reuse) throws IOException {
|
||||||
assert !fieldInfo.omitTermFreqAndPositions;
|
assert fieldInfo.indexOptions == IndexOptions.DOCS_AND_FREQS_AND_POSITIONS;
|
||||||
final SepTermState termState = (SepTermState) _termState;
|
final SepTermState termState = (SepTermState) _termState;
|
||||||
SepDocsAndPositionsEnum postingsEnum;
|
SepDocsAndPositionsEnum postingsEnum;
|
||||||
if (reuse == null || !(reuse instanceof SepDocsAndPositionsEnum)) {
|
if (reuse == null || !(reuse instanceof SepDocsAndPositionsEnum)) {
|
||||||
|
@ -304,6 +314,7 @@ public class SepPostingsReaderImpl extends PostingsReaderBase {
|
||||||
|
|
||||||
// TODO: -- should we do omitTF with 2 different enum classes?
|
// TODO: -- should we do omitTF with 2 different enum classes?
|
||||||
private boolean omitTF;
|
private boolean omitTF;
|
||||||
|
private IndexOptions indexOptions;
|
||||||
private boolean storePayloads;
|
private boolean storePayloads;
|
||||||
private Bits liveDocs;
|
private Bits liveDocs;
|
||||||
private final IntIndexInput.Reader docReader;
|
private final IntIndexInput.Reader docReader;
|
||||||
|
@ -340,7 +351,8 @@ public class SepPostingsReaderImpl extends PostingsReaderBase {
|
||||||
|
|
||||||
SepDocsEnum init(FieldInfo fieldInfo, SepTermState termState, Bits liveDocs) throws IOException {
|
SepDocsEnum init(FieldInfo fieldInfo, SepTermState termState, Bits liveDocs) throws IOException {
|
||||||
this.liveDocs = liveDocs;
|
this.liveDocs = liveDocs;
|
||||||
omitTF = fieldInfo.omitTermFreqAndPositions;
|
this.indexOptions = fieldInfo.indexOptions;
|
||||||
|
omitTF = indexOptions == IndexOptions.DOCS_ONLY;
|
||||||
storePayloads = fieldInfo.storePayloads;
|
storePayloads = fieldInfo.storePayloads;
|
||||||
|
|
||||||
// TODO: can't we only do this if consumer
|
// TODO: can't we only do this if consumer
|
||||||
|
@ -456,7 +468,7 @@ public class SepPostingsReaderImpl extends PostingsReaderBase {
|
||||||
0,
|
0,
|
||||||
docFreq,
|
docFreq,
|
||||||
storePayloads);
|
storePayloads);
|
||||||
skipper.setOmitTF(omitTF);
|
skipper.setIndexOptions(indexOptions);
|
||||||
|
|
||||||
skipped = true;
|
skipped = true;
|
||||||
}
|
}
|
||||||
|
@ -633,7 +645,7 @@ public class SepPostingsReaderImpl extends PostingsReaderBase {
|
||||||
payloadFP,
|
payloadFP,
|
||||||
docFreq,
|
docFreq,
|
||||||
storePayloads);
|
storePayloads);
|
||||||
|
skipper.setIndexOptions(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS);
|
||||||
skipped = true;
|
skipped = true;
|
||||||
}
|
}
|
||||||
final int newCount = skipper.skipTo(target);
|
final int newCount = skipper.skipTo(target);
|
||||||
|
|
|
@ -23,6 +23,7 @@ import java.util.Set;
|
||||||
import org.apache.lucene.index.CorruptIndexException;
|
import org.apache.lucene.index.CorruptIndexException;
|
||||||
import org.apache.lucene.index.DocsEnum;
|
import org.apache.lucene.index.DocsEnum;
|
||||||
import org.apache.lucene.index.FieldInfo;
|
import org.apache.lucene.index.FieldInfo;
|
||||||
|
import org.apache.lucene.index.FieldInfo.IndexOptions;
|
||||||
import org.apache.lucene.index.IndexFileNames;
|
import org.apache.lucene.index.IndexFileNames;
|
||||||
import org.apache.lucene.index.SegmentWriteState;
|
import org.apache.lucene.index.SegmentWriteState;
|
||||||
import org.apache.lucene.index.codecs.PostingsWriterBase;
|
import org.apache.lucene.index.codecs.PostingsWriterBase;
|
||||||
|
@ -86,7 +87,7 @@ public final class SepPostingsWriterImpl extends PostingsWriterBase {
|
||||||
final int totalNumDocs;
|
final int totalNumDocs;
|
||||||
|
|
||||||
boolean storePayloads;
|
boolean storePayloads;
|
||||||
boolean omitTF;
|
IndexOptions indexOptions;
|
||||||
|
|
||||||
long lastSkipFP;
|
long lastSkipFP;
|
||||||
|
|
||||||
|
@ -121,11 +122,13 @@ public final class SepPostingsWriterImpl extends PostingsWriterBase {
|
||||||
docOut = factory.createOutput(state.directory, docFileName, state.context);
|
docOut = factory.createOutput(state.directory, docFileName, state.context);
|
||||||
docIndex = docOut.index();
|
docIndex = docOut.index();
|
||||||
|
|
||||||
if (state.fieldInfos.hasProx()) {
|
if (state.fieldInfos.hasFreq()) {
|
||||||
final String frqFileName = IndexFileNames.segmentFileName(state.segmentName, state.codecId, FREQ_EXTENSION);
|
final String frqFileName = IndexFileNames.segmentFileName(state.segmentName, state.codecId, FREQ_EXTENSION);
|
||||||
freqOut = factory.createOutput(state.directory, frqFileName, state.context);
|
freqOut = factory.createOutput(state.directory, frqFileName, state.context);
|
||||||
freqIndex = freqOut.index();
|
freqIndex = freqOut.index();
|
||||||
|
}
|
||||||
|
|
||||||
|
if (state.fieldInfos.hasProx()) {
|
||||||
final String posFileName = IndexFileNames.segmentFileName(state.segmentName, state.codecId, POS_EXTENSION);
|
final String posFileName = IndexFileNames.segmentFileName(state.segmentName, state.codecId, POS_EXTENSION);
|
||||||
posOut = factory.createOutput(state.directory, posFileName, state.context);
|
posOut = factory.createOutput(state.directory, posFileName, state.context);
|
||||||
posIndex = posOut.index();
|
posIndex = posOut.index();
|
||||||
|
@ -168,12 +171,17 @@ public final class SepPostingsWriterImpl extends PostingsWriterBase {
|
||||||
@Override
|
@Override
|
||||||
public void startTerm() throws IOException {
|
public void startTerm() throws IOException {
|
||||||
docIndex.mark();
|
docIndex.mark();
|
||||||
if (!omitTF) {
|
|
||||||
|
if (indexOptions != IndexOptions.DOCS_ONLY) {
|
||||||
freqIndex.mark();
|
freqIndex.mark();
|
||||||
|
}
|
||||||
|
|
||||||
|
if (indexOptions == IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) {
|
||||||
posIndex.mark();
|
posIndex.mark();
|
||||||
payloadStart = payloadOut.getFilePointer();
|
payloadStart = payloadOut.getFilePointer();
|
||||||
lastPayloadLength = -1;
|
lastPayloadLength = -1;
|
||||||
}
|
}
|
||||||
|
|
||||||
skipListWriter.resetSkip(docIndex, freqIndex, posIndex);
|
skipListWriter.resetSkip(docIndex, freqIndex, posIndex);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -182,9 +190,9 @@ public final class SepPostingsWriterImpl extends PostingsWriterBase {
|
||||||
@Override
|
@Override
|
||||||
public void setField(FieldInfo fieldInfo) {
|
public void setField(FieldInfo fieldInfo) {
|
||||||
this.fieldInfo = fieldInfo;
|
this.fieldInfo = fieldInfo;
|
||||||
omitTF = fieldInfo.omitTermFreqAndPositions;
|
this.indexOptions = fieldInfo.indexOptions;
|
||||||
skipListWriter.setOmitTF(omitTF);
|
skipListWriter.setIndexOptions(indexOptions);
|
||||||
storePayloads = !omitTF && fieldInfo.storePayloads;
|
storePayloads = indexOptions == IndexOptions.DOCS_AND_FREQS_AND_POSITIONS && fieldInfo.storePayloads;
|
||||||
}
|
}
|
||||||
|
|
||||||
/** Adds a new doc in this term. If this returns null
|
/** Adds a new doc in this term. If this returns null
|
||||||
|
@ -209,7 +217,7 @@ public final class SepPostingsWriterImpl extends PostingsWriterBase {
|
||||||
|
|
||||||
lastDocID = docID;
|
lastDocID = docID;
|
||||||
docOut.write(delta);
|
docOut.write(delta);
|
||||||
if (!omitTF) {
|
if (indexOptions != IndexOptions.DOCS_ONLY) {
|
||||||
//System.out.println(" sepw startDoc: write freq=" + termDocFreq);
|
//System.out.println(" sepw startDoc: write freq=" + termDocFreq);
|
||||||
freqOut.write(termDocFreq);
|
freqOut.write(termDocFreq);
|
||||||
}
|
}
|
||||||
|
@ -227,7 +235,7 @@ public final class SepPostingsWriterImpl extends PostingsWriterBase {
|
||||||
/** Add a new position & payload */
|
/** Add a new position & payload */
|
||||||
@Override
|
@Override
|
||||||
public void addPosition(int position, BytesRef payload) throws IOException {
|
public void addPosition(int position, BytesRef payload) throws IOException {
|
||||||
assert !omitTF;
|
assert indexOptions == IndexOptions.DOCS_AND_FREQS_AND_POSITIONS;
|
||||||
|
|
||||||
final int delta = position - lastPosition;
|
final int delta = position - lastPosition;
|
||||||
assert delta >= 0: "position=" + position + " lastPosition=" + lastPosition; // not quite right (if pos=0 is repeated twice we don't catch it)
|
assert delta >= 0: "position=" + position + " lastPosition=" + lastPosition; // not quite right (if pos=0 is repeated twice we don't catch it)
|
||||||
|
@ -274,10 +282,12 @@ public final class SepPostingsWriterImpl extends PostingsWriterBase {
|
||||||
docIndex.write(indexBytesWriter, isFirstTerm);
|
docIndex.write(indexBytesWriter, isFirstTerm);
|
||||||
//System.out.println(" docIndex=" + docIndex);
|
//System.out.println(" docIndex=" + docIndex);
|
||||||
|
|
||||||
if (!omitTF) {
|
if (indexOptions != IndexOptions.DOCS_ONLY) {
|
||||||
freqIndex.write(indexBytesWriter, isFirstTerm);
|
freqIndex.write(indexBytesWriter, isFirstTerm);
|
||||||
//System.out.println(" freqIndex=" + freqIndex);
|
//System.out.println(" freqIndex=" + freqIndex);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (indexOptions == IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) {
|
||||||
posIndex.write(indexBytesWriter, isFirstTerm);
|
posIndex.write(indexBytesWriter, isFirstTerm);
|
||||||
//System.out.println(" posIndex=" + posIndex);
|
//System.out.println(" posIndex=" + posIndex);
|
||||||
if (storePayloads) {
|
if (storePayloads) {
|
||||||
|
|
|
@ -21,6 +21,7 @@ import java.io.IOException;
|
||||||
import java.util.Arrays;
|
import java.util.Arrays;
|
||||||
|
|
||||||
import org.apache.lucene.store.IndexInput;
|
import org.apache.lucene.store.IndexInput;
|
||||||
|
import org.apache.lucene.index.FieldInfo.IndexOptions;
|
||||||
import org.apache.lucene.index.codecs.MultiLevelSkipListReader;
|
import org.apache.lucene.index.codecs.MultiLevelSkipListReader;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -87,10 +88,10 @@ class SepSkipListReader extends MultiLevelSkipListReader {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
boolean omitTF;
|
IndexOptions indexOptions;
|
||||||
|
|
||||||
void setOmitTF(boolean v) {
|
void setIndexOptions(IndexOptions v) {
|
||||||
omitTF = v;
|
indexOptions = v;
|
||||||
}
|
}
|
||||||
|
|
||||||
void init(long skipPointer,
|
void init(long skipPointer,
|
||||||
|
@ -177,7 +178,7 @@ class SepSkipListReader extends MultiLevelSkipListReader {
|
||||||
@Override
|
@Override
|
||||||
protected int readSkipData(int level, IndexInput skipStream) throws IOException {
|
protected int readSkipData(int level, IndexInput skipStream) throws IOException {
|
||||||
int delta;
|
int delta;
|
||||||
assert !omitTF || !currentFieldStoresPayloads;
|
assert indexOptions == IndexOptions.DOCS_AND_FREQS_AND_POSITIONS || !currentFieldStoresPayloads;
|
||||||
if (currentFieldStoresPayloads) {
|
if (currentFieldStoresPayloads) {
|
||||||
// the current field stores payloads.
|
// the current field stores payloads.
|
||||||
// if the doc delta is odd then we have
|
// if the doc delta is odd then we have
|
||||||
|
@ -192,11 +193,11 @@ class SepSkipListReader extends MultiLevelSkipListReader {
|
||||||
} else {
|
} else {
|
||||||
delta = skipStream.readVInt();
|
delta = skipStream.readVInt();
|
||||||
}
|
}
|
||||||
if (!omitTF) {
|
if (indexOptions != IndexOptions.DOCS_ONLY) {
|
||||||
freqIndex[level].read(skipStream, false);
|
freqIndex[level].read(skipStream, false);
|
||||||
}
|
}
|
||||||
docIndex[level].read(skipStream, false);
|
docIndex[level].read(skipStream, false);
|
||||||
if (!omitTF) {
|
if (indexOptions == IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) {
|
||||||
posIndex[level].read(skipStream, false);
|
posIndex[level].read(skipStream, false);
|
||||||
if (currentFieldStoresPayloads) {
|
if (currentFieldStoresPayloads) {
|
||||||
payloadPointer[level] += skipStream.readVInt();
|
payloadPointer[level] += skipStream.readVInt();
|
||||||
|
|
|
@ -21,6 +21,7 @@ import java.io.IOException;
|
||||||
import java.util.Arrays;
|
import java.util.Arrays;
|
||||||
|
|
||||||
import org.apache.lucene.store.IndexOutput;
|
import org.apache.lucene.store.IndexOutput;
|
||||||
|
import org.apache.lucene.index.FieldInfo.IndexOptions;
|
||||||
import org.apache.lucene.index.codecs.MultiLevelSkipListWriter;
|
import org.apache.lucene.index.codecs.MultiLevelSkipListWriter;
|
||||||
|
|
||||||
// TODO: -- skip data should somehow be more local to the
|
// TODO: -- skip data should somehow be more local to the
|
||||||
|
@ -84,10 +85,10 @@ class SepSkipListWriter extends MultiLevelSkipListWriter {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
boolean omitTF;
|
IndexOptions indexOptions;
|
||||||
|
|
||||||
void setOmitTF(boolean v) {
|
void setIndexOptions(IndexOptions v) {
|
||||||
omitTF = v;
|
indexOptions = v;
|
||||||
}
|
}
|
||||||
|
|
||||||
void setPosOutput(IntIndexOutput posOutput) throws IOException {
|
void setPosOutput(IntIndexOutput posOutput) throws IOException {
|
||||||
|
@ -159,7 +160,7 @@ class SepSkipListWriter extends MultiLevelSkipListWriter {
|
||||||
// current payload length equals the length at the previous
|
// current payload length equals the length at the previous
|
||||||
// skip point
|
// skip point
|
||||||
|
|
||||||
assert !omitTF || !curStorePayloads;
|
assert indexOptions == IndexOptions.DOCS_AND_FREQS_AND_POSITIONS || !curStorePayloads;
|
||||||
|
|
||||||
if (curStorePayloads) {
|
if (curStorePayloads) {
|
||||||
int delta = curDoc - lastSkipDoc[level];
|
int delta = curDoc - lastSkipDoc[level];
|
||||||
|
@ -179,13 +180,13 @@ class SepSkipListWriter extends MultiLevelSkipListWriter {
|
||||||
skipBuffer.writeVInt(curDoc - lastSkipDoc[level]);
|
skipBuffer.writeVInt(curDoc - lastSkipDoc[level]);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (!omitTF) {
|
if (indexOptions != IndexOptions.DOCS_ONLY) {
|
||||||
freqIndex[level].mark();
|
freqIndex[level].mark();
|
||||||
freqIndex[level].write(skipBuffer, false);
|
freqIndex[level].write(skipBuffer, false);
|
||||||
}
|
}
|
||||||
docIndex[level].mark();
|
docIndex[level].mark();
|
||||||
docIndex[level].write(skipBuffer, false);
|
docIndex[level].write(skipBuffer, false);
|
||||||
if (!omitTF) {
|
if (indexOptions == IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) {
|
||||||
posIndex[level].mark();
|
posIndex[level].mark();
|
||||||
posIndex[level].write(skipBuffer, false);
|
posIndex[level].write(skipBuffer, false);
|
||||||
if (curStorePayloads) {
|
if (curStorePayloads) {
|
||||||
|
|
|
@ -19,6 +19,7 @@ package org.apache.lucene.index.codecs.simpletext;
|
||||||
|
|
||||||
import org.apache.lucene.util.BytesRef;
|
import org.apache.lucene.util.BytesRef;
|
||||||
import org.apache.lucene.index.codecs.FieldsProducer;
|
import org.apache.lucene.index.codecs.FieldsProducer;
|
||||||
|
import org.apache.lucene.index.FieldInfo.IndexOptions;
|
||||||
import org.apache.lucene.index.SegmentReadState;
|
import org.apache.lucene.index.SegmentReadState;
|
||||||
import org.apache.lucene.index.FieldsEnum;
|
import org.apache.lucene.index.FieldsEnum;
|
||||||
import org.apache.lucene.index.Terms;
|
import org.apache.lucene.index.Terms;
|
||||||
|
@ -53,6 +54,7 @@ class SimpleTextFieldsReader extends FieldsProducer {
|
||||||
final static BytesRef FIELD = SimpleTextFieldsWriter.FIELD;
|
final static BytesRef FIELD = SimpleTextFieldsWriter.FIELD;
|
||||||
final static BytesRef TERM = SimpleTextFieldsWriter.TERM;
|
final static BytesRef TERM = SimpleTextFieldsWriter.TERM;
|
||||||
final static BytesRef DOC = SimpleTextFieldsWriter.DOC;
|
final static BytesRef DOC = SimpleTextFieldsWriter.DOC;
|
||||||
|
final static BytesRef FREQ = SimpleTextFieldsWriter.FREQ;
|
||||||
final static BytesRef POS = SimpleTextFieldsWriter.POS;
|
final static BytesRef POS = SimpleTextFieldsWriter.POS;
|
||||||
final static BytesRef PAYLOAD = SimpleTextFieldsWriter.PAYLOAD;
|
final static BytesRef PAYLOAD = SimpleTextFieldsWriter.PAYLOAD;
|
||||||
|
|
||||||
|
@ -114,16 +116,16 @@ class SimpleTextFieldsReader extends FieldsProducer {
|
||||||
|
|
||||||
private class SimpleTextTermsEnum extends TermsEnum {
|
private class SimpleTextTermsEnum extends TermsEnum {
|
||||||
private final IndexInput in;
|
private final IndexInput in;
|
||||||
private final boolean omitTF;
|
private final IndexOptions indexOptions;
|
||||||
private int docFreq;
|
private int docFreq;
|
||||||
private long totalTermFreq;
|
private long totalTermFreq;
|
||||||
private long docsStart;
|
private long docsStart;
|
||||||
private boolean ended;
|
private boolean ended;
|
||||||
private final BytesRefFSTEnum<PairOutputs.Pair<Long,PairOutputs.Pair<Long,Long>>> fstEnum;
|
private final BytesRefFSTEnum<PairOutputs.Pair<Long,PairOutputs.Pair<Long,Long>>> fstEnum;
|
||||||
|
|
||||||
public SimpleTextTermsEnum(FST<PairOutputs.Pair<Long,PairOutputs.Pair<Long,Long>>> fst, boolean omitTF) throws IOException {
|
public SimpleTextTermsEnum(FST<PairOutputs.Pair<Long,PairOutputs.Pair<Long,Long>>> fst, IndexOptions indexOptions) throws IOException {
|
||||||
this.in = (IndexInput) SimpleTextFieldsReader.this.in.clone();
|
this.in = (IndexInput) SimpleTextFieldsReader.this.in.clone();
|
||||||
this.omitTF = omitTF;
|
this.indexOptions = indexOptions;
|
||||||
fstEnum = new BytesRefFSTEnum<PairOutputs.Pair<Long,PairOutputs.Pair<Long,Long>>>(fst);
|
fstEnum = new BytesRefFSTEnum<PairOutputs.Pair<Long,PairOutputs.Pair<Long,Long>>>(fst);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -218,12 +220,12 @@ class SimpleTextFieldsReader extends FieldsProducer {
|
||||||
} else {
|
} else {
|
||||||
docsEnum = new SimpleTextDocsEnum();
|
docsEnum = new SimpleTextDocsEnum();
|
||||||
}
|
}
|
||||||
return docsEnum.reset(docsStart, liveDocs, omitTF);
|
return docsEnum.reset(docsStart, liveDocs, indexOptions == IndexOptions.DOCS_ONLY);
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public DocsAndPositionsEnum docsAndPositions(Bits liveDocs, DocsAndPositionsEnum reuse) throws IOException {
|
public DocsAndPositionsEnum docsAndPositions(Bits liveDocs, DocsAndPositionsEnum reuse) throws IOException {
|
||||||
if (omitTF) {
|
if (indexOptions != IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) {
|
||||||
return null;
|
return null;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -303,8 +305,11 @@ class SimpleTextFieldsReader extends FieldsProducer {
|
||||||
docID = ArrayUtil.parseInt(scratchUTF16.chars, 0, scratchUTF16.length);
|
docID = ArrayUtil.parseInt(scratchUTF16.chars, 0, scratchUTF16.length);
|
||||||
termFreq = 0;
|
termFreq = 0;
|
||||||
first = false;
|
first = false;
|
||||||
|
} else if (scratch.startsWith(FREQ)) {
|
||||||
|
UnicodeUtil.UTF8toUTF16(scratch.bytes, scratch.offset+FREQ.length, scratch.length-FREQ.length, scratchUTF16);
|
||||||
|
termFreq = ArrayUtil.parseInt(scratchUTF16.chars, 0, scratchUTF16.length);
|
||||||
} else if (scratch.startsWith(POS)) {
|
} else if (scratch.startsWith(POS)) {
|
||||||
termFreq++;
|
// skip termFreq++;
|
||||||
} else if (scratch.startsWith(PAYLOAD)) {
|
} else if (scratch.startsWith(PAYLOAD)) {
|
||||||
// skip
|
// skip
|
||||||
} else {
|
} else {
|
||||||
|
@ -384,10 +389,13 @@ class SimpleTextFieldsReader extends FieldsProducer {
|
||||||
UnicodeUtil.UTF8toUTF16(scratch.bytes, scratch.offset+DOC.length, scratch.length-DOC.length, scratchUTF16);
|
UnicodeUtil.UTF8toUTF16(scratch.bytes, scratch.offset+DOC.length, scratch.length-DOC.length, scratchUTF16);
|
||||||
docID = ArrayUtil.parseInt(scratchUTF16.chars, 0, scratchUTF16.length);
|
docID = ArrayUtil.parseInt(scratchUTF16.chars, 0, scratchUTF16.length);
|
||||||
tf = 0;
|
tf = 0;
|
||||||
posStart = in.getFilePointer();
|
|
||||||
first = false;
|
first = false;
|
||||||
|
} else if (scratch.startsWith(FREQ)) {
|
||||||
|
UnicodeUtil.UTF8toUTF16(scratch.bytes, scratch.offset+FREQ.length, scratch.length-FREQ.length, scratchUTF16);
|
||||||
|
tf = ArrayUtil.parseInt(scratchUTF16.chars, 0, scratchUTF16.length);
|
||||||
|
posStart = in.getFilePointer();
|
||||||
} else if (scratch.startsWith(POS)) {
|
} else if (scratch.startsWith(POS)) {
|
||||||
tf++;
|
// skip
|
||||||
} else if (scratch.startsWith(PAYLOAD)) {
|
} else if (scratch.startsWith(PAYLOAD)) {
|
||||||
// skip
|
// skip
|
||||||
} else {
|
} else {
|
||||||
|
@ -461,7 +469,7 @@ class SimpleTextFieldsReader extends FieldsProducer {
|
||||||
|
|
||||||
private class SimpleTextTerms extends Terms {
|
private class SimpleTextTerms extends Terms {
|
||||||
private final long termsStart;
|
private final long termsStart;
|
||||||
private final boolean omitTF;
|
private final IndexOptions indexOptions;
|
||||||
private long sumTotalTermFreq;
|
private long sumTotalTermFreq;
|
||||||
private long sumDocFreq;
|
private long sumDocFreq;
|
||||||
private FST<PairOutputs.Pair<Long,PairOutputs.Pair<Long,Long>>> fst;
|
private FST<PairOutputs.Pair<Long,PairOutputs.Pair<Long,Long>>> fst;
|
||||||
|
@ -470,7 +478,7 @@ class SimpleTextFieldsReader extends FieldsProducer {
|
||||||
|
|
||||||
public SimpleTextTerms(String field, long termsStart) throws IOException {
|
public SimpleTextTerms(String field, long termsStart) throws IOException {
|
||||||
this.termsStart = termsStart;
|
this.termsStart = termsStart;
|
||||||
omitTF = fieldInfos.fieldInfo(field).omitTermFreqAndPositions;
|
indexOptions = fieldInfos.fieldInfo(field).indexOptions;
|
||||||
loadTerms();
|
loadTerms();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -533,7 +541,7 @@ class SimpleTextFieldsReader extends FieldsProducer {
|
||||||
@Override
|
@Override
|
||||||
public TermsEnum iterator() throws IOException {
|
public TermsEnum iterator() throws IOException {
|
||||||
if (fst != null) {
|
if (fst != null) {
|
||||||
return new SimpleTextTermsEnum(fst, omitTF);
|
return new SimpleTextTermsEnum(fst, indexOptions);
|
||||||
} else {
|
} else {
|
||||||
return TermsEnum.EMPTY;
|
return TermsEnum.EMPTY;
|
||||||
}
|
}
|
||||||
|
|
|
@ -24,6 +24,7 @@ import org.apache.lucene.index.codecs.TermsConsumer;
|
||||||
import org.apache.lucene.index.codecs.PostingsConsumer;
|
import org.apache.lucene.index.codecs.PostingsConsumer;
|
||||||
import org.apache.lucene.index.codecs.TermStats;
|
import org.apache.lucene.index.codecs.TermStats;
|
||||||
import org.apache.lucene.index.FieldInfo;
|
import org.apache.lucene.index.FieldInfo;
|
||||||
|
import org.apache.lucene.index.FieldInfo.IndexOptions;
|
||||||
import org.apache.lucene.index.SegmentWriteState;
|
import org.apache.lucene.index.SegmentWriteState;
|
||||||
import org.apache.lucene.store.IndexOutput;
|
import org.apache.lucene.store.IndexOutput;
|
||||||
|
|
||||||
|
@ -41,6 +42,7 @@ class SimpleTextFieldsWriter extends FieldsConsumer {
|
||||||
final static BytesRef FIELD = new BytesRef("field ");
|
final static BytesRef FIELD = new BytesRef("field ");
|
||||||
final static BytesRef TERM = new BytesRef(" term ");
|
final static BytesRef TERM = new BytesRef(" term ");
|
||||||
final static BytesRef DOC = new BytesRef(" doc ");
|
final static BytesRef DOC = new BytesRef(" doc ");
|
||||||
|
final static BytesRef FREQ = new BytesRef(" freq ");
|
||||||
final static BytesRef POS = new BytesRef(" pos ");
|
final static BytesRef POS = new BytesRef(" pos ");
|
||||||
final static BytesRef PAYLOAD = new BytesRef(" payload ");
|
final static BytesRef PAYLOAD = new BytesRef(" payload ");
|
||||||
|
|
||||||
|
@ -73,11 +75,15 @@ class SimpleTextFieldsWriter extends FieldsConsumer {
|
||||||
write(FIELD);
|
write(FIELD);
|
||||||
write(field.name);
|
write(field.name);
|
||||||
out.writeByte(NEWLINE);
|
out.writeByte(NEWLINE);
|
||||||
return new SimpleTextTermsWriter();
|
return new SimpleTextTermsWriter(field);
|
||||||
}
|
}
|
||||||
|
|
||||||
private class SimpleTextTermsWriter extends TermsConsumer {
|
private class SimpleTextTermsWriter extends TermsConsumer {
|
||||||
private final SimpleTextPostingsWriter postingsWriter = new SimpleTextPostingsWriter();
|
private final SimpleTextPostingsWriter postingsWriter;
|
||||||
|
|
||||||
|
public SimpleTextTermsWriter(FieldInfo field) {
|
||||||
|
postingsWriter = new SimpleTextPostingsWriter(field);
|
||||||
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public PostingsConsumer startTerm(BytesRef term) throws IOException {
|
public PostingsConsumer startTerm(BytesRef term) throws IOException {
|
||||||
|
@ -101,7 +107,12 @@ class SimpleTextFieldsWriter extends FieldsConsumer {
|
||||||
private class SimpleTextPostingsWriter extends PostingsConsumer {
|
private class SimpleTextPostingsWriter extends PostingsConsumer {
|
||||||
private BytesRef term;
|
private BytesRef term;
|
||||||
private boolean wroteTerm;
|
private boolean wroteTerm;
|
||||||
|
private IndexOptions indexOptions;
|
||||||
|
|
||||||
|
public SimpleTextPostingsWriter(FieldInfo field) {
|
||||||
|
this.indexOptions = field.indexOptions;
|
||||||
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public void startDoc(int docID, int termDocFreq) throws IOException {
|
public void startDoc(int docID, int termDocFreq) throws IOException {
|
||||||
if (!wroteTerm) {
|
if (!wroteTerm) {
|
||||||
|
@ -115,7 +126,14 @@ class SimpleTextFieldsWriter extends FieldsConsumer {
|
||||||
write(DOC);
|
write(DOC);
|
||||||
write(Integer.toString(docID));
|
write(Integer.toString(docID));
|
||||||
newline();
|
newline();
|
||||||
|
if (indexOptions != IndexOptions.DOCS_ONLY) {
|
||||||
|
write(FREQ);
|
||||||
|
write(Integer.toString(termDocFreq));
|
||||||
|
newline();
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
public PostingsConsumer reset(BytesRef term) {
|
public PostingsConsumer reset(BytesRef term) {
|
||||||
this.term = term;
|
this.term = term;
|
||||||
|
|
|
@ -23,6 +23,7 @@ import java.util.Collection;
|
||||||
import org.apache.lucene.index.DocsAndPositionsEnum;
|
import org.apache.lucene.index.DocsAndPositionsEnum;
|
||||||
import org.apache.lucene.index.DocsEnum;
|
import org.apache.lucene.index.DocsEnum;
|
||||||
import org.apache.lucene.index.FieldInfo;
|
import org.apache.lucene.index.FieldInfo;
|
||||||
|
import org.apache.lucene.index.FieldInfo.IndexOptions;
|
||||||
import org.apache.lucene.index.IndexFileNames;
|
import org.apache.lucene.index.IndexFileNames;
|
||||||
import org.apache.lucene.index.SegmentInfo;
|
import org.apache.lucene.index.SegmentInfo;
|
||||||
import org.apache.lucene.index.TermState;
|
import org.apache.lucene.index.TermState;
|
||||||
|
@ -190,7 +191,7 @@ public class StandardPostingsReader extends PostingsReaderBase {
|
||||||
// undefined
|
// undefined
|
||||||
}
|
}
|
||||||
|
|
||||||
if (!fieldInfo.omitTermFreqAndPositions) {
|
if (fieldInfo.indexOptions == IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) {
|
||||||
if (isFirstTerm) {
|
if (isFirstTerm) {
|
||||||
termState.proxOffset = termState.bytesReader.readVLong();
|
termState.proxOffset = termState.bytesReader.readVLong();
|
||||||
} else {
|
} else {
|
||||||
|
@ -219,7 +220,7 @@ public class StandardPostingsReader extends PostingsReaderBase {
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public DocsAndPositionsEnum docsAndPositions(FieldInfo fieldInfo, BlockTermState termState, Bits liveDocs, DocsAndPositionsEnum reuse) throws IOException {
|
public DocsAndPositionsEnum docsAndPositions(FieldInfo fieldInfo, BlockTermState termState, Bits liveDocs, DocsAndPositionsEnum reuse) throws IOException {
|
||||||
if (fieldInfo.omitTermFreqAndPositions) {
|
if (fieldInfo.indexOptions != IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) {
|
||||||
return null;
|
return null;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -282,7 +283,7 @@ public class StandardPostingsReader extends PostingsReaderBase {
|
||||||
}
|
}
|
||||||
|
|
||||||
public SegmentDocsEnum reset(FieldInfo fieldInfo, StandardTermState termState, Bits liveDocs) throws IOException {
|
public SegmentDocsEnum reset(FieldInfo fieldInfo, StandardTermState termState, Bits liveDocs) throws IOException {
|
||||||
omitTF = fieldInfo.omitTermFreqAndPositions;
|
omitTF = fieldInfo.indexOptions == IndexOptions.DOCS_ONLY;
|
||||||
if (omitTF) {
|
if (omitTF) {
|
||||||
freq = 1;
|
freq = 1;
|
||||||
}
|
}
|
||||||
|
@ -455,7 +456,7 @@ public class StandardPostingsReader extends PostingsReaderBase {
|
||||||
}
|
}
|
||||||
|
|
||||||
public SegmentDocsAndPositionsEnum reset(FieldInfo fieldInfo, StandardTermState termState, Bits liveDocs) throws IOException {
|
public SegmentDocsAndPositionsEnum reset(FieldInfo fieldInfo, StandardTermState termState, Bits liveDocs) throws IOException {
|
||||||
assert !fieldInfo.omitTermFreqAndPositions;
|
assert fieldInfo.indexOptions == IndexOptions.DOCS_AND_FREQS_AND_POSITIONS;
|
||||||
assert !fieldInfo.storePayloads;
|
assert !fieldInfo.storePayloads;
|
||||||
|
|
||||||
this.liveDocs = liveDocs;
|
this.liveDocs = liveDocs;
|
||||||
|
@ -649,7 +650,7 @@ public class StandardPostingsReader extends PostingsReaderBase {
|
||||||
}
|
}
|
||||||
|
|
||||||
public SegmentDocsAndPositionsAndPayloadsEnum reset(FieldInfo fieldInfo, StandardTermState termState, Bits liveDocs) throws IOException {
|
public SegmentDocsAndPositionsAndPayloadsEnum reset(FieldInfo fieldInfo, StandardTermState termState, Bits liveDocs) throws IOException {
|
||||||
assert !fieldInfo.omitTermFreqAndPositions;
|
assert fieldInfo.indexOptions == IndexOptions.DOCS_AND_FREQS_AND_POSITIONS;
|
||||||
assert fieldInfo.storePayloads;
|
assert fieldInfo.storePayloads;
|
||||||
if (payload == null) {
|
if (payload == null) {
|
||||||
payload = new BytesRef();
|
payload = new BytesRef();
|
||||||
|
|
|
@ -25,6 +25,7 @@ import java.io.IOException;
|
||||||
import org.apache.lucene.index.CorruptIndexException;
|
import org.apache.lucene.index.CorruptIndexException;
|
||||||
import org.apache.lucene.index.DocsEnum;
|
import org.apache.lucene.index.DocsEnum;
|
||||||
import org.apache.lucene.index.FieldInfo;
|
import org.apache.lucene.index.FieldInfo;
|
||||||
|
import org.apache.lucene.index.FieldInfo.IndexOptions;
|
||||||
import org.apache.lucene.index.IndexFileNames;
|
import org.apache.lucene.index.IndexFileNames;
|
||||||
import org.apache.lucene.index.SegmentWriteState;
|
import org.apache.lucene.index.SegmentWriteState;
|
||||||
import org.apache.lucene.index.codecs.PostingsWriterBase;
|
import org.apache.lucene.index.codecs.PostingsWriterBase;
|
||||||
|
@ -66,7 +67,7 @@ public final class StandardPostingsWriter extends PostingsWriterBase {
|
||||||
final int totalNumDocs;
|
final int totalNumDocs;
|
||||||
IndexOutput termsOut;
|
IndexOutput termsOut;
|
||||||
|
|
||||||
boolean omitTermFreqAndPositions;
|
IndexOptions indexOptions;
|
||||||
boolean storePayloads;
|
boolean storePayloads;
|
||||||
// Starts a new term
|
// Starts a new term
|
||||||
long lastFreqStart;
|
long lastFreqStart;
|
||||||
|
@ -144,7 +145,7 @@ public final class StandardPostingsWriter extends PostingsWriterBase {
|
||||||
public void setField(FieldInfo fieldInfo) {
|
public void setField(FieldInfo fieldInfo) {
|
||||||
//System.out.println("SPW: setField");
|
//System.out.println("SPW: setField");
|
||||||
this.fieldInfo = fieldInfo;
|
this.fieldInfo = fieldInfo;
|
||||||
omitTermFreqAndPositions = fieldInfo.omitTermFreqAndPositions;
|
indexOptions = fieldInfo.indexOptions;
|
||||||
storePayloads = fieldInfo.storePayloads;
|
storePayloads = fieldInfo.storePayloads;
|
||||||
//System.out.println(" set init blockFreqStart=" + freqStart);
|
//System.out.println(" set init blockFreqStart=" + freqStart);
|
||||||
//System.out.println(" set init blockProxStart=" + proxStart);
|
//System.out.println(" set init blockProxStart=" + proxStart);
|
||||||
|
@ -173,7 +174,7 @@ public final class StandardPostingsWriter extends PostingsWriterBase {
|
||||||
assert docID < totalNumDocs: "docID=" + docID + " totalNumDocs=" + totalNumDocs;
|
assert docID < totalNumDocs: "docID=" + docID + " totalNumDocs=" + totalNumDocs;
|
||||||
|
|
||||||
lastDocID = docID;
|
lastDocID = docID;
|
||||||
if (omitTermFreqAndPositions) {
|
if (indexOptions == IndexOptions.DOCS_ONLY) {
|
||||||
freqOut.writeVInt(delta);
|
freqOut.writeVInt(delta);
|
||||||
} else if (1 == termDocFreq) {
|
} else if (1 == termDocFreq) {
|
||||||
freqOut.writeVInt((delta<<1) | 1);
|
freqOut.writeVInt((delta<<1) | 1);
|
||||||
|
@ -189,7 +190,7 @@ public final class StandardPostingsWriter extends PostingsWriterBase {
|
||||||
@Override
|
@Override
|
||||||
public void addPosition(int position, BytesRef payload) throws IOException {
|
public void addPosition(int position, BytesRef payload) throws IOException {
|
||||||
//System.out.println("StandardW: addPos pos=" + position + " payload=" + (payload == null ? "null" : (payload.length + " bytes")) + " proxFP=" + proxOut.getFilePointer());
|
//System.out.println("StandardW: addPos pos=" + position + " payload=" + (payload == null ? "null" : (payload.length + " bytes")) + " proxFP=" + proxOut.getFilePointer());
|
||||||
assert !omitTermFreqAndPositions: "omitTermFreqAndPositions is true";
|
assert indexOptions == IndexOptions.DOCS_AND_FREQS_AND_POSITIONS: "invalid indexOptions: " + indexOptions;
|
||||||
assert proxOut != null;
|
assert proxOut != null;
|
||||||
|
|
||||||
final int delta = position - lastPosition;
|
final int delta = position - lastPosition;
|
||||||
|
@ -246,7 +247,7 @@ public final class StandardPostingsWriter extends PostingsWriterBase {
|
||||||
bytesWriter.writeVInt((int) (skipListWriter.writeSkip(freqOut)-freqStart));
|
bytesWriter.writeVInt((int) (skipListWriter.writeSkip(freqOut)-freqStart));
|
||||||
}
|
}
|
||||||
|
|
||||||
if (!omitTermFreqAndPositions) {
|
if (indexOptions == IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) {
|
||||||
//System.out.println(" proxFP=" + proxStart);
|
//System.out.println(" proxFP=" + proxStart);
|
||||||
if (isFirstTerm) {
|
if (isFirstTerm) {
|
||||||
bytesWriter.writeVLong(proxStart);
|
bytesWriter.writeVLong(proxStart);
|
||||||
|
|
|
@ -195,7 +195,7 @@ public class MultiPhraseQuery extends Query {
|
||||||
if (postingsEnum == null) {
|
if (postingsEnum == null) {
|
||||||
if (reader.termDocsEnum(liveDocs, term.field(), term.bytes()) != null) {
|
if (reader.termDocsEnum(liveDocs, term.field(), term.bytes()) != null) {
|
||||||
// term does exist, but has no positions
|
// term does exist, but has no positions
|
||||||
throw new IllegalStateException("field \"" + term.field() + "\" was indexed with Field.omitTermFreqAndPositions=true; cannot run PhraseQuery (term=" + term.text() + ")");
|
throw new IllegalStateException("field \"" + term.field() + "\" was indexed without position data; cannot run PhraseQuery (term=" + term.text() + ")");
|
||||||
} else {
|
} else {
|
||||||
// term does not exist
|
// term does not exist
|
||||||
return null;
|
return null;
|
||||||
|
@ -443,7 +443,7 @@ class UnionDocsAndPositionsEnum extends DocsAndPositionsEnum {
|
||||||
} else {
|
} else {
|
||||||
if (indexReader.termDocsEnum(liveDocs, terms[i].field(), terms[i].bytes()) != null) {
|
if (indexReader.termDocsEnum(liveDocs, terms[i].field(), terms[i].bytes()) != null) {
|
||||||
// term does exist, but has no positions
|
// term does exist, but has no positions
|
||||||
throw new IllegalStateException("field \"" + terms[i].field() + "\" was indexed with Field.omitTermFreqAndPositions=true; cannot run PhraseQuery (term=" + terms[i].text() + ")");
|
throw new IllegalStateException("field \"" + terms[i].field() + "\" was indexed without position data; cannot run PhraseQuery (term=" + terms[i].text() + ")");
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -229,7 +229,7 @@ public class PhraseQuery extends Query {
|
||||||
if (postingsEnum == null) {
|
if (postingsEnum == null) {
|
||||||
assert (reader.termDocsEnum(liveDocs, t.field(), t.bytes(), state) != null) : "termstate found but no term exists in reader";
|
assert (reader.termDocsEnum(liveDocs, t.field(), t.bytes(), state) != null) : "termstate found but no term exists in reader";
|
||||||
// term does exist, but has no positions
|
// term does exist, but has no positions
|
||||||
throw new IllegalStateException("field \"" + t.field() + "\" was indexed with Field.omitTermFreqAndPositions=true; cannot run PhraseQuery (term=" + t.text() + ")");
|
throw new IllegalStateException("field \"" + t.field() + "\" was indexed without position data; cannot run PhraseQuery (term=" + t.text() + ")");
|
||||||
}
|
}
|
||||||
// get the docFreq without seeking
|
// get the docFreq without seeking
|
||||||
TermsEnum te = reader.fields().terms(field).getThreadTermsEnum();
|
TermsEnum te = reader.fields().terms(field).getThreadTermsEnum();
|
||||||
|
|
|
@ -92,7 +92,7 @@ public class SpanTermQuery extends SpanQuery {
|
||||||
} else {
|
} else {
|
||||||
if (reader.termDocsEnum(reader.getLiveDocs(), term.field(), term.bytes()) != null) {
|
if (reader.termDocsEnum(reader.getLiveDocs(), term.field(), term.bytes()) != null) {
|
||||||
// term does exist, but has no positions
|
// term does exist, but has no positions
|
||||||
throw new IllegalStateException("field \"" + term.field() + "\" was indexed with Field.omitTermFreqAndPositions=true; cannot run SpanTermQuery (term=" + term.text() + ")");
|
throw new IllegalStateException("field \"" + term.field() + "\" was indexed without position data; cannot run SpanTermQuery (term=" + term.text() + ")");
|
||||||
} else {
|
} else {
|
||||||
// term does not exist
|
// term does not exist
|
||||||
return TermSpans.EMPTY_TERM_SPANS;
|
return TermSpans.EMPTY_TERM_SPANS;
|
||||||
|
|
|
@ -99,6 +99,10 @@
|
||||||
to stored fields file, previously they were stored in
|
to stored fields file, previously they were stored in
|
||||||
text format only.
|
text format only.
|
||||||
</p>
|
</p>
|
||||||
|
<p>
|
||||||
|
In version 3.4, fields can omit position data while
|
||||||
|
still indexing term frequencies.
|
||||||
|
</p>
|
||||||
</section>
|
</section>
|
||||||
|
|
||||||
<section id="Definitions"><title>Definitions</title>
|
<section id="Definitions"><title>Definitions</title>
|
||||||
|
@ -276,7 +280,7 @@
|
||||||
<p>Term Frequency
|
<p>Term Frequency
|
||||||
data. For each term in the dictionary, the numbers of all the
|
data. For each term in the dictionary, the numbers of all the
|
||||||
documents that contain that term, and the frequency of the term in
|
documents that contain that term, and the frequency of the term in
|
||||||
that document if omitTf is false.
|
that document, unless frequencies are omitted (IndexOptions.DOCS_ONLY)
|
||||||
</p>
|
</p>
|
||||||
</li>
|
</li>
|
||||||
|
|
||||||
|
@ -284,8 +288,7 @@
|
||||||
<p>Term Proximity
|
<p>Term Proximity
|
||||||
data. For each term in the dictionary, the positions that the term
|
data. For each term in the dictionary, the positions that the term
|
||||||
occurs in each document. Note that this will
|
occurs in each document. Note that this will
|
||||||
not exist if all fields in all documents set
|
not exist if all fields in all documents omit position data.
|
||||||
omitTf to true.
|
|
||||||
</p>
|
</p>
|
||||||
</li>
|
</li>
|
||||||
|
|
||||||
|
@ -1080,7 +1083,7 @@
|
||||||
|
|
||||||
<p>
|
<p>
|
||||||
HasProx is 1 if any fields in this segment have
|
HasProx is 1 if any fields in this segment have
|
||||||
omitTf set to false; else, it's 0.
|
position data (IndexOptions.DOCS_AND_FREQS_AND_POSITIONS); else, it's 0.
|
||||||
</p>
|
</p>
|
||||||
|
|
||||||
<p>
|
<p>
|
||||||
|
@ -1217,11 +1220,13 @@
|
||||||
<li>If the fourth lowest-order bit is set (0x08), term offsets are stored with the term vectors.</li>
|
<li>If the fourth lowest-order bit is set (0x08), term offsets are stored with the term vectors.</li>
|
||||||
<li>If the fifth lowest-order bit is set (0x10), norms are omitted for the indexed field.</li>
|
<li>If the fifth lowest-order bit is set (0x10), norms are omitted for the indexed field.</li>
|
||||||
<li>If the sixth lowest-order bit is set (0x20), payloads are stored for the indexed field.</li>
|
<li>If the sixth lowest-order bit is set (0x20), payloads are stored for the indexed field.</li>
|
||||||
|
<li>If the seventh lowest-order bit is set (0x40), term frequencies and positions omitted for the indexed field.</li>
|
||||||
|
<li>If the eighth lowest-order bit is set (0x80), positions are omitted for the indexed field.</li>
|
||||||
</ul>
|
</ul>
|
||||||
</p>
|
</p>
|
||||||
|
|
||||||
<p>
|
<p>
|
||||||
FNMVersion (added in 2.9) is always -2.
|
FNMVersion (added in 2.9) is -2 for indexes from 2.9 - 3.3. It is -3 for indexes in Lucene 3.4+
|
||||||
</p>
|
</p>
|
||||||
|
|
||||||
<p>
|
<p>
|
||||||
|
@ -1419,7 +1424,7 @@
|
||||||
file. In particular, it is the difference between the position of
|
file. In particular, it is the difference between the position of
|
||||||
this term's data in that file and the position of the previous
|
this term's data in that file and the position of the previous
|
||||||
term's data (or zero, for the first term in the file. For fields
|
term's data (or zero, for the first term in the file. For fields
|
||||||
with omitTf true, this will be 0 since
|
that omit position data, this will be 0 since
|
||||||
prox information is not stored.
|
prox information is not stored.
|
||||||
</p>
|
</p>
|
||||||
<p>SkipDelta determines the position of this
|
<p>SkipDelta determines the position of this
|
||||||
|
@ -1494,7 +1499,7 @@
|
||||||
<p>
|
<p>
|
||||||
The .frq file contains the lists of documents
|
The .frq file contains the lists of documents
|
||||||
which contain each term, along with the frequency of the term in that
|
which contain each term, along with the frequency of the term in that
|
||||||
document (if omitTf is false).
|
document (except when frequencies are omitted: IndexOptions.DOCS_ONLY).
|
||||||
</p>
|
</p>
|
||||||
<p>FreqFile (.frq) -->
|
<p>FreqFile (.frq) -->
|
||||||
<TermFreqs, SkipData>
|
<TermFreqs, SkipData>
|
||||||
|
@ -1531,26 +1536,26 @@
|
||||||
<p>TermFreq
|
<p>TermFreq
|
||||||
entries are ordered by increasing document number.
|
entries are ordered by increasing document number.
|
||||||
</p>
|
</p>
|
||||||
<p>DocDelta: if omitTf is false, this determines both
|
<p>DocDelta: if frequencies are indexed, this determines both
|
||||||
the document number and the frequency. In
|
the document number and the frequency. In
|
||||||
particular, DocDelta/2 is the difference between
|
particular, DocDelta/2 is the difference between
|
||||||
this document number and the previous document
|
this document number and the previous document
|
||||||
number (or zero when this is the first document in
|
number (or zero when this is the first document in
|
||||||
a TermFreqs). When DocDelta is odd, the frequency
|
a TermFreqs). When DocDelta is odd, the frequency
|
||||||
is one. When DocDelta is even, the frequency is
|
is one. When DocDelta is even, the frequency is
|
||||||
read as another VInt. If omitTf is true, DocDelta
|
read as another VInt. If frequencies are omitted, DocDelta
|
||||||
contains the gap (not multiplied by 2) between
|
contains the gap (not multiplied by 2) between
|
||||||
document numbers and no frequency information is
|
document numbers and no frequency information is
|
||||||
stored.
|
stored.
|
||||||
</p>
|
</p>
|
||||||
<p>For example, the TermFreqs for a term which occurs
|
<p>For example, the TermFreqs for a term which occurs
|
||||||
once in document seven and three times in document
|
once in document seven and three times in document
|
||||||
eleven, with omitTf false, would be the following
|
eleven, with frequencies indexed, would be the following
|
||||||
sequence of VInts:
|
sequence of VInts:
|
||||||
</p>
|
</p>
|
||||||
<p>15, 8, 3
|
<p>15, 8, 3
|
||||||
</p>
|
</p>
|
||||||
<p> If omitTf were true it would be this sequence
|
<p> If frequencies were omitted (IndexOptions.DOCS_ONLY) it would be this sequence
|
||||||
of VInts instead:
|
of VInts instead:
|
||||||
</p>
|
</p>
|
||||||
<p>
|
<p>
|
||||||
|
@ -1621,9 +1626,9 @@
|
||||||
<p>
|
<p>
|
||||||
The .prx file contains the lists of positions that
|
The .prx file contains the lists of positions that
|
||||||
each term occurs at within documents. Note that
|
each term occurs at within documents. Note that
|
||||||
fields with omitTf true do not store
|
fields omitting positional data do not store
|
||||||
anything into this file, and if all fields in the
|
anything into this file, and if all fields in the
|
||||||
index have omitTf true then the .prx file will not
|
index omit positional data then the .prx file will not
|
||||||
exist.
|
exist.
|
||||||
</p>
|
</p>
|
||||||
<p>ProxFile (.prx) -->
|
<p>ProxFile (.prx) -->
|
||||||
|
|
|
@ -29,6 +29,7 @@ import org.apache.lucene.analysis.MockTokenizer;
|
||||||
import org.apache.lucene.document.Document;
|
import org.apache.lucene.document.Document;
|
||||||
import org.apache.lucene.document.Field;
|
import org.apache.lucene.document.Field;
|
||||||
import org.apache.lucene.document.Fieldable;
|
import org.apache.lucene.document.Fieldable;
|
||||||
|
import org.apache.lucene.index.FieldInfo.IndexOptions;
|
||||||
import org.apache.lucene.search.SimilarityProvider;
|
import org.apache.lucene.search.SimilarityProvider;
|
||||||
import org.apache.lucene.store.Directory;
|
import org.apache.lucene.store.Directory;
|
||||||
import org.apache.lucene.util.LuceneTestCase;
|
import org.apache.lucene.util.LuceneTestCase;
|
||||||
|
@ -67,7 +68,7 @@ class DocHelper {
|
||||||
public static Field noTFField = new Field(NO_TF_KEY, NO_TF_TEXT,
|
public static Field noTFField = new Field(NO_TF_KEY, NO_TF_TEXT,
|
||||||
Field.Store.YES, Field.Index.ANALYZED);
|
Field.Store.YES, Field.Index.ANALYZED);
|
||||||
static {
|
static {
|
||||||
noTFField.setOmitTermFreqAndPositions(true);
|
noTFField.setIndexOptions(IndexOptions.DOCS_ONLY);
|
||||||
}
|
}
|
||||||
|
|
||||||
public static final String UNINDEXED_FIELD_TEXT = "unindexed field text";
|
public static final String UNINDEXED_FIELD_TEXT = "unindexed field text";
|
||||||
|
@ -173,7 +174,7 @@ class DocHelper {
|
||||||
if (f.isStored()) add(stored,f);
|
if (f.isStored()) add(stored,f);
|
||||||
else add(unstored,f);
|
else add(unstored,f);
|
||||||
if (f.getOmitNorms()) add(noNorms,f);
|
if (f.getOmitNorms()) add(noNorms,f);
|
||||||
if (f.getOmitTermFreqAndPositions()) add(noTf,f);
|
if (f.getIndexOptions() == IndexOptions.DOCS_ONLY) add(noTf,f);
|
||||||
if (f.isLazy()) add(lazy, f);
|
if (f.isLazy()) add(lazy, f);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -22,6 +22,7 @@ import java.util.Comparator;
|
||||||
|
|
||||||
import org.apache.lucene.index.CorruptIndexException;
|
import org.apache.lucene.index.CorruptIndexException;
|
||||||
import org.apache.lucene.index.FieldInfo;
|
import org.apache.lucene.index.FieldInfo;
|
||||||
|
import org.apache.lucene.index.FieldInfo.IndexOptions;
|
||||||
import org.apache.lucene.index.IndexFileNames;
|
import org.apache.lucene.index.IndexFileNames;
|
||||||
import org.apache.lucene.index.SegmentWriteState;
|
import org.apache.lucene.index.SegmentWriteState;
|
||||||
import org.apache.lucene.index.codecs.FieldsConsumer;
|
import org.apache.lucene.index.codecs.FieldsConsumer;
|
||||||
|
@ -90,7 +91,7 @@ class PreFlexFieldsWriter extends FieldsConsumer {
|
||||||
|
|
||||||
public PreFlexTermsWriter(FieldInfo fieldInfo) {
|
public PreFlexTermsWriter(FieldInfo fieldInfo) {
|
||||||
this.fieldInfo = fieldInfo;
|
this.fieldInfo = fieldInfo;
|
||||||
omitTF = fieldInfo.omitTermFreqAndPositions;
|
omitTF = fieldInfo.indexOptions == IndexOptions.DOCS_ONLY;
|
||||||
storePayloads = fieldInfo.storePayloads;
|
storePayloads = fieldInfo.storePayloads;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -422,7 +422,7 @@ public class _TestUtil {
|
||||||
List<Fieldable> fields = doc.getFields();
|
List<Fieldable> fields = doc.getFields();
|
||||||
for (Fieldable field : fields) {
|
for (Fieldable field : fields) {
|
||||||
fieldInfos.addOrUpdate(field.name(), field.isIndexed(), field.isTermVectorStored(), field.isStorePositionWithTermVector(),
|
fieldInfos.addOrUpdate(field.name(), field.isIndexed(), field.isTermVectorStored(), field.isStorePositionWithTermVector(),
|
||||||
field.isStoreOffsetWithTermVector(), field.getOmitNorms(), false, field.getOmitTermFreqAndPositions(), field.docValuesType());
|
field.isStoreOffsetWithTermVector(), field.getOmitNorms(), false, field.getIndexOptions(), field.docValuesType());
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -507,7 +507,7 @@ public class _TestUtil {
|
||||||
field1.isStored() ? Field.Store.YES : Field.Store.NO,
|
field1.isStored() ? Field.Store.YES : Field.Store.NO,
|
||||||
field1.isIndexed() ? (field1.isTokenized() ? Field.Index.ANALYZED : Field.Index.NOT_ANALYZED) : Field.Index.NO);
|
field1.isIndexed() ? (field1.isTokenized() ? Field.Index.ANALYZED : Field.Index.NOT_ANALYZED) : Field.Index.NO);
|
||||||
field2.setOmitNorms(field1.getOmitNorms());
|
field2.setOmitNorms(field1.getOmitNorms());
|
||||||
field2.setOmitTermFreqAndPositions(field1.getOmitTermFreqAndPositions());
|
field2.setIndexOptions(field1.getIndexOptions());
|
||||||
doc2.add(field2);
|
doc2.add(field2);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -24,6 +24,7 @@ import org.apache.lucene.analysis.TokenStream;
|
||||||
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
|
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
|
||||||
import org.apache.lucene.document.Document;
|
import org.apache.lucene.document.Document;
|
||||||
import org.apache.lucene.document.Field;
|
import org.apache.lucene.document.Field;
|
||||||
|
import org.apache.lucene.index.FieldInfo.IndexOptions;
|
||||||
import org.apache.lucene.index.codecs.CodecProvider;
|
import org.apache.lucene.index.codecs.CodecProvider;
|
||||||
import org.apache.lucene.store.MockDirectoryWrapper;
|
import org.apache.lucene.store.MockDirectoryWrapper;
|
||||||
import org.apache.lucene.util.LuceneTestCase;
|
import org.apache.lucene.util.LuceneTestCase;
|
||||||
|
@ -62,7 +63,7 @@ public class Test2BPostings extends LuceneTestCase {
|
||||||
|
|
||||||
Document doc = new Document();
|
Document doc = new Document();
|
||||||
Field field = new Field("field", new MyTokenStream());
|
Field field = new Field("field", new MyTokenStream());
|
||||||
field.setOmitTermFreqAndPositions(true);
|
field.setIndexOptions(IndexOptions.DOCS_ONLY);
|
||||||
field.setOmitNorms(true);
|
field.setOmitNorms(true);
|
||||||
doc.add(field);
|
doc.add(field);
|
||||||
|
|
||||||
|
|
|
@ -23,6 +23,7 @@ import org.apache.lucene.search.*;
|
||||||
import org.apache.lucene.analysis.*;
|
import org.apache.lucene.analysis.*;
|
||||||
import org.apache.lucene.analysis.tokenattributes.*;
|
import org.apache.lucene.analysis.tokenattributes.*;
|
||||||
import org.apache.lucene.document.*;
|
import org.apache.lucene.document.*;
|
||||||
|
import org.apache.lucene.index.FieldInfo.IndexOptions;
|
||||||
import org.apache.lucene.index.codecs.CodecProvider;
|
import org.apache.lucene.index.codecs.CodecProvider;
|
||||||
import java.io.File;
|
import java.io.File;
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
|
@ -177,7 +178,7 @@ public class Test2BTerms extends LuceneTestCase {
|
||||||
Document doc = new Document();
|
Document doc = new Document();
|
||||||
final MyTokenStream ts = new MyTokenStream(random, TERMS_PER_DOC);
|
final MyTokenStream ts = new MyTokenStream(random, TERMS_PER_DOC);
|
||||||
Field field = new Field("field", ts);
|
Field field = new Field("field", ts);
|
||||||
field.setOmitTermFreqAndPositions(true);
|
field.setIndexOptions(IndexOptions.DOCS_ONLY);
|
||||||
field.setOmitNorms(true);
|
field.setOmitNorms(true);
|
||||||
doc.add(field);
|
doc.add(field);
|
||||||
//w.setInfoStream(System.out);
|
//w.setInfoStream(System.out);
|
||||||
|
|
|
@ -31,6 +31,7 @@ import org.apache.lucene.document.Document;
|
||||||
import org.apache.lucene.document.Field;
|
import org.apache.lucene.document.Field;
|
||||||
import org.apache.lucene.document.Fieldable;
|
import org.apache.lucene.document.Fieldable;
|
||||||
import org.apache.lucene.document.NumericField;
|
import org.apache.lucene.document.NumericField;
|
||||||
|
import org.apache.lucene.index.FieldInfo.IndexOptions;
|
||||||
import org.apache.lucene.index.IndexWriterConfig.OpenMode;
|
import org.apache.lucene.index.IndexWriterConfig.OpenMode;
|
||||||
import org.apache.lucene.search.DefaultSimilarity;
|
import org.apache.lucene.search.DefaultSimilarity;
|
||||||
import org.apache.lucene.search.DocIdSetIterator;
|
import org.apache.lucene.search.DocIdSetIterator;
|
||||||
|
@ -606,10 +607,10 @@ public class TestBackwardsCompatibility extends LuceneTestCase {
|
||||||
private void addNoProxDoc(IndexWriter writer) throws IOException {
|
private void addNoProxDoc(IndexWriter writer) throws IOException {
|
||||||
Document doc = new Document();
|
Document doc = new Document();
|
||||||
Field f = new Field("content3", "aaa", Field.Store.YES, Field.Index.ANALYZED);
|
Field f = new Field("content3", "aaa", Field.Store.YES, Field.Index.ANALYZED);
|
||||||
f.setOmitTermFreqAndPositions(true);
|
f.setIndexOptions(IndexOptions.DOCS_ONLY);
|
||||||
doc.add(f);
|
doc.add(f);
|
||||||
f = new Field("content4", "aaa", Field.Store.YES, Field.Index.NO);
|
f = new Field("content4", "aaa", Field.Store.YES, Field.Index.NO);
|
||||||
f.setOmitTermFreqAndPositions(true);
|
f.setIndexOptions(IndexOptions.DOCS_ONLY);
|
||||||
doc.add(f);
|
doc.add(f);
|
||||||
writer.addDocument(doc);
|
writer.addDocument(doc);
|
||||||
}
|
}
|
||||||
|
|
|
@ -25,6 +25,7 @@ import org.apache.lucene.analysis.MockAnalyzer;
|
||||||
import org.apache.lucene.document.Document;
|
import org.apache.lucene.document.Document;
|
||||||
import org.apache.lucene.document.Field;
|
import org.apache.lucene.document.Field;
|
||||||
import org.apache.lucene.document.Field.Store;
|
import org.apache.lucene.document.Field.Store;
|
||||||
|
import org.apache.lucene.index.FieldInfo.IndexOptions;
|
||||||
import org.apache.lucene.index.codecs.CodecProvider;
|
import org.apache.lucene.index.codecs.CodecProvider;
|
||||||
import org.apache.lucene.index.codecs.FieldsConsumer;
|
import org.apache.lucene.index.codecs.FieldsConsumer;
|
||||||
import org.apache.lucene.index.codecs.FieldsProducer;
|
import org.apache.lucene.index.codecs.FieldsProducer;
|
||||||
|
@ -84,7 +85,8 @@ public class TestCodecs extends LuceneTestCase {
|
||||||
this.storePayloads = storePayloads;
|
this.storePayloads = storePayloads;
|
||||||
fieldInfos.addOrUpdate(name, true);
|
fieldInfos.addOrUpdate(name, true);
|
||||||
fieldInfo = fieldInfos.fieldInfo(name);
|
fieldInfo = fieldInfos.fieldInfo(name);
|
||||||
fieldInfo.omitTermFreqAndPositions = omitTF;
|
// TODO: change this test to use all three
|
||||||
|
fieldInfo.indexOptions = omitTF ? IndexOptions.DOCS_ONLY : IndexOptions.DOCS_AND_FREQS_AND_POSITIONS;
|
||||||
fieldInfo.storePayloads = storePayloads;
|
fieldInfo.storePayloads = storePayloads;
|
||||||
this.terms = terms;
|
this.terms = terms;
|
||||||
for(int i=0;i<terms.length;i++)
|
for(int i=0;i<terms.length;i++)
|
||||||
|
|
|
@ -34,6 +34,7 @@ import org.apache.lucene.document.Field.Index;
|
||||||
import org.apache.lucene.document.Field.Store;
|
import org.apache.lucene.document.Field.Store;
|
||||||
import org.apache.lucene.document.Field.TermVector;
|
import org.apache.lucene.document.Field.TermVector;
|
||||||
import org.apache.lucene.document.Fieldable;
|
import org.apache.lucene.document.Fieldable;
|
||||||
|
import org.apache.lucene.index.FieldInfo.IndexOptions;
|
||||||
import org.apache.lucene.store.Directory;
|
import org.apache.lucene.store.Directory;
|
||||||
import org.apache.lucene.store.IOContext;
|
import org.apache.lucene.store.IOContext;
|
||||||
import org.apache.lucene.store.IOContext.Context;
|
import org.apache.lucene.store.IOContext.Context;
|
||||||
|
@ -303,7 +304,7 @@ public class TestDocumentWriter extends LuceneTestCase {
|
||||||
doc.add(newField("f1", "v2", Store.YES, Index.NO));
|
doc.add(newField("f1", "v2", Store.YES, Index.NO));
|
||||||
// f2 has no TF
|
// f2 has no TF
|
||||||
Field f = newField("f2", "v1", Store.NO, Index.ANALYZED);
|
Field f = newField("f2", "v1", Store.NO, Index.ANALYZED);
|
||||||
f.setOmitTermFreqAndPositions(true);
|
f.setIndexOptions(IndexOptions.DOCS_ONLY);
|
||||||
doc.add(f);
|
doc.add(f);
|
||||||
doc.add(newField("f2", "v2", Store.YES, Index.NO));
|
doc.add(newField("f2", "v2", Store.YES, Index.NO));
|
||||||
|
|
||||||
|
@ -319,10 +320,10 @@ public class TestDocumentWriter extends LuceneTestCase {
|
||||||
FieldInfos fi = reader.fieldInfos();
|
FieldInfos fi = reader.fieldInfos();
|
||||||
// f1
|
// f1
|
||||||
assertFalse("f1 should have no norms", reader.hasNorms("f1"));
|
assertFalse("f1 should have no norms", reader.hasNorms("f1"));
|
||||||
assertFalse("omitTermFreqAndPositions field bit should not be set for f1", fi.fieldInfo("f1").omitTermFreqAndPositions);
|
assertEquals("omitTermFreqAndPositions field bit should not be set for f1", IndexOptions.DOCS_AND_FREQS_AND_POSITIONS, fi.fieldInfo("f1").indexOptions);
|
||||||
// f2
|
// f2
|
||||||
assertTrue("f2 should have norms", reader.hasNorms("f2"));
|
assertTrue("f2 should have norms", reader.hasNorms("f2"));
|
||||||
assertTrue("omitTermFreqAndPositions field bit should be set for f2", fi.fieldInfo("f2").omitTermFreqAndPositions);
|
assertEquals("omitTermFreqAndPositions field bit should be set for f2", IndexOptions.DOCS_ONLY, fi.fieldInfo("f2").indexOptions);
|
||||||
reader.close();
|
reader.close();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -20,6 +20,7 @@ package org.apache.lucene.index;
|
||||||
import org.apache.lucene.util.LuceneTestCase;
|
import org.apache.lucene.util.LuceneTestCase;
|
||||||
import org.apache.lucene.util._TestUtil;
|
import org.apache.lucene.util._TestUtil;
|
||||||
import org.apache.lucene.document.Document;
|
import org.apache.lucene.document.Document;
|
||||||
|
import org.apache.lucene.index.FieldInfo.IndexOptions;
|
||||||
import org.apache.lucene.store.Directory;
|
import org.apache.lucene.store.Directory;
|
||||||
import org.apache.lucene.store.IndexOutput;
|
import org.apache.lucene.store.IndexOutput;
|
||||||
|
|
||||||
|
@ -137,7 +138,7 @@ public class TestFieldInfos extends LuceneTestCase {
|
||||||
try {
|
try {
|
||||||
readOnly.addOrUpdate("bogus", random.nextBoolean(), random.nextBoolean(),
|
readOnly.addOrUpdate("bogus", random.nextBoolean(), random.nextBoolean(),
|
||||||
random.nextBoolean(), random.nextBoolean(), random.nextBoolean(),
|
random.nextBoolean(), random.nextBoolean(), random.nextBoolean(),
|
||||||
random.nextBoolean(), random.nextBoolean(), null);
|
random.nextBoolean(), random.nextBoolean() ? IndexOptions.DOCS_ONLY : IndexOptions.DOCS_AND_FREQS_AND_POSITIONS, null);
|
||||||
fail("instance should be read only");
|
fail("instance should be read only");
|
||||||
} catch (IllegalStateException e) {
|
} catch (IllegalStateException e) {
|
||||||
// expected
|
// expected
|
||||||
|
|
|
@ -30,6 +30,7 @@ import org.apache.lucene.document.FieldSelectorResult;
|
||||||
import org.apache.lucene.document.Fieldable;
|
import org.apache.lucene.document.Fieldable;
|
||||||
import org.apache.lucene.document.LoadFirstFieldSelector;
|
import org.apache.lucene.document.LoadFirstFieldSelector;
|
||||||
import org.apache.lucene.document.SetBasedFieldSelector;
|
import org.apache.lucene.document.SetBasedFieldSelector;
|
||||||
|
import org.apache.lucene.index.FieldInfo.IndexOptions;
|
||||||
import org.apache.lucene.index.IndexWriterConfig.OpenMode;
|
import org.apache.lucene.index.IndexWriterConfig.OpenMode;
|
||||||
import org.apache.lucene.search.FieldCache;
|
import org.apache.lucene.search.FieldCache;
|
||||||
import org.apache.lucene.store.AlreadyClosedException;
|
import org.apache.lucene.store.AlreadyClosedException;
|
||||||
|
@ -91,7 +92,7 @@ public class TestFieldsReader extends LuceneTestCase {
|
||||||
assertTrue(field.isStoreOffsetWithTermVector() == true);
|
assertTrue(field.isStoreOffsetWithTermVector() == true);
|
||||||
assertTrue(field.isStorePositionWithTermVector() == true);
|
assertTrue(field.isStorePositionWithTermVector() == true);
|
||||||
assertTrue(field.getOmitNorms() == false);
|
assertTrue(field.getOmitNorms() == false);
|
||||||
assertTrue(field.getOmitTermFreqAndPositions() == false);
|
assertTrue(field.getIndexOptions() == IndexOptions.DOCS_AND_FREQS_AND_POSITIONS);
|
||||||
|
|
||||||
field = doc.getField(DocHelper.TEXT_FIELD_3_KEY);
|
field = doc.getField(DocHelper.TEXT_FIELD_3_KEY);
|
||||||
assertTrue(field != null);
|
assertTrue(field != null);
|
||||||
|
@ -99,7 +100,7 @@ public class TestFieldsReader extends LuceneTestCase {
|
||||||
assertTrue(field.isStoreOffsetWithTermVector() == false);
|
assertTrue(field.isStoreOffsetWithTermVector() == false);
|
||||||
assertTrue(field.isStorePositionWithTermVector() == false);
|
assertTrue(field.isStorePositionWithTermVector() == false);
|
||||||
assertTrue(field.getOmitNorms() == true);
|
assertTrue(field.getOmitNorms() == true);
|
||||||
assertTrue(field.getOmitTermFreqAndPositions() == false);
|
assertTrue(field.getIndexOptions() == IndexOptions.DOCS_AND_FREQS_AND_POSITIONS);
|
||||||
|
|
||||||
field = doc.getField(DocHelper.NO_TF_KEY);
|
field = doc.getField(DocHelper.NO_TF_KEY);
|
||||||
assertTrue(field != null);
|
assertTrue(field != null);
|
||||||
|
@ -107,7 +108,7 @@ public class TestFieldsReader extends LuceneTestCase {
|
||||||
assertTrue(field.isStoreOffsetWithTermVector() == false);
|
assertTrue(field.isStoreOffsetWithTermVector() == false);
|
||||||
assertTrue(field.isStorePositionWithTermVector() == false);
|
assertTrue(field.isStorePositionWithTermVector() == false);
|
||||||
assertTrue(field.getOmitNorms() == false);
|
assertTrue(field.getOmitNorms() == false);
|
||||||
assertTrue(field.getOmitTermFreqAndPositions() == true);
|
assertTrue(field.getIndexOptions() == IndexOptions.DOCS_ONLY);
|
||||||
reader.close();
|
reader.close();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -26,6 +26,7 @@ import org.apache.lucene.analysis.TokenStream;
|
||||||
import org.apache.lucene.analysis.tokenattributes.TermToBytesRefAttribute;
|
import org.apache.lucene.analysis.tokenattributes.TermToBytesRefAttribute;
|
||||||
import org.apache.lucene.document.Document;
|
import org.apache.lucene.document.Document;
|
||||||
import org.apache.lucene.document.Field;
|
import org.apache.lucene.document.Field;
|
||||||
|
import org.apache.lucene.index.FieldInfo.IndexOptions;
|
||||||
import org.apache.lucene.index.codecs.CodecProvider;
|
import org.apache.lucene.index.codecs.CodecProvider;
|
||||||
import org.apache.lucene.store.Directory;
|
import org.apache.lucene.store.Directory;
|
||||||
import org.apache.lucene.util.BytesRef;
|
import org.apache.lucene.util.BytesRef;
|
||||||
|
@ -63,8 +64,8 @@ public class TestLongPostings extends LuceneTestCase {
|
||||||
}
|
}
|
||||||
|
|
||||||
public void testLongPostings() throws Exception {
|
public void testLongPostings() throws Exception {
|
||||||
assumeFalse("Too slow with SimpleText codec", CodecProvider.getDefault().getFieldCodec("field").equals("SimpleText"));
|
assumeFalse("Too slow with SimpleText codec at night", TEST_NIGHTLY && CodecProvider.getDefault().getFieldCodec("field").equals("SimpleText"));
|
||||||
assumeFalse("Too slow with Memory codec", CodecProvider.getDefault().getFieldCodec("field").equals("Memory"));
|
assumeFalse("Too slow with Memory codec at night", TEST_NIGHTLY && CodecProvider.getDefault().getFieldCodec("field").equals("Memory"));
|
||||||
|
|
||||||
// Don't use _TestUtil.getTempDir so that we own the
|
// Don't use _TestUtil.getTempDir so that we own the
|
||||||
// randomness (ie same seed will point to same dir):
|
// randomness (ie same seed will point to same dir):
|
||||||
|
@ -250,4 +251,187 @@ public class TestLongPostings extends LuceneTestCase {
|
||||||
r.close();
|
r.close();
|
||||||
dir.close();
|
dir.close();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// a weaker form of testLongPostings, that doesnt check positions
|
||||||
|
public void testLongPostingsNoPositions() throws Exception {
|
||||||
|
doTestLongPostingsNoPositions(IndexOptions.DOCS_ONLY);
|
||||||
|
doTestLongPostingsNoPositions(IndexOptions.DOCS_AND_FREQS);
|
||||||
|
}
|
||||||
|
|
||||||
|
public void doTestLongPostingsNoPositions(IndexOptions options) throws Exception {
|
||||||
|
assumeFalse("Too slow with SimpleText codec at night", TEST_NIGHTLY && CodecProvider.getDefault().getFieldCodec("field").equals("SimpleText"));
|
||||||
|
assumeFalse("Too slow with Memory codec at night", TEST_NIGHTLY && CodecProvider.getDefault().getFieldCodec("field").equals("Memory"));
|
||||||
|
// Don't use _TestUtil.getTempDir so that we own the
|
||||||
|
// randomness (ie same seed will point to same dir):
|
||||||
|
Directory dir = newFSDirectory(_TestUtil.getTempDir("longpostings" + "." + random.nextLong()));
|
||||||
|
|
||||||
|
final int NUM_DOCS = atLeast(2000);
|
||||||
|
|
||||||
|
if (VERBOSE) {
|
||||||
|
System.out.println("TEST: NUM_DOCS=" + NUM_DOCS);
|
||||||
|
}
|
||||||
|
|
||||||
|
final String s1 = getRandomTerm(null);
|
||||||
|
final String s2 = getRandomTerm(s1);
|
||||||
|
|
||||||
|
if (VERBOSE) {
|
||||||
|
System.out.println("\nTEST: s1=" + s1 + " s2=" + s2);
|
||||||
|
/*
|
||||||
|
for(int idx=0;idx<s1.length();idx++) {
|
||||||
|
System.out.println(" s1 ch=0x" + Integer.toHexString(s1.charAt(idx)));
|
||||||
|
}
|
||||||
|
for(int idx=0;idx<s2.length();idx++) {
|
||||||
|
System.out.println(" s2 ch=0x" + Integer.toHexString(s2.charAt(idx)));
|
||||||
|
}
|
||||||
|
*/
|
||||||
|
}
|
||||||
|
|
||||||
|
final FixedBitSet isS1 = new FixedBitSet(NUM_DOCS);
|
||||||
|
for(int idx=0;idx<NUM_DOCS;idx++) {
|
||||||
|
if (random.nextBoolean()) {
|
||||||
|
isS1.set(idx);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
final IndexReader r;
|
||||||
|
if (true) {
|
||||||
|
final IndexWriterConfig iwc = newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random))
|
||||||
|
.setOpenMode(IndexWriterConfig.OpenMode.CREATE)
|
||||||
|
.setMergePolicy(newLogMergePolicy());
|
||||||
|
iwc.setRAMBufferSizeMB(16.0 + 16.0 * random.nextDouble());
|
||||||
|
iwc.setMaxBufferedDocs(-1);
|
||||||
|
final RandomIndexWriter riw = new RandomIndexWriter(random, dir, iwc);
|
||||||
|
|
||||||
|
for(int idx=0;idx<NUM_DOCS;idx++) {
|
||||||
|
final Document doc = new Document();
|
||||||
|
String s = isS1.get(idx) ? s1 : s2;
|
||||||
|
final Field f = newField("field", s, Field.Index.ANALYZED);
|
||||||
|
f.setIndexOptions(options);
|
||||||
|
final int count = _TestUtil.nextInt(random, 1, 4);
|
||||||
|
for(int ct=0;ct<count;ct++) {
|
||||||
|
doc.add(f);
|
||||||
|
}
|
||||||
|
riw.addDocument(doc);
|
||||||
|
}
|
||||||
|
|
||||||
|
r = riw.getReader();
|
||||||
|
riw.close();
|
||||||
|
} else {
|
||||||
|
r = IndexReader.open(dir);
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
if (VERBOSE) {
|
||||||
|
System.out.println("TEST: terms");
|
||||||
|
TermEnum termEnum = r.terms();
|
||||||
|
while(termEnum.next()) {
|
||||||
|
System.out.println(" term=" + termEnum.term() + " len=" + termEnum.term().text().length());
|
||||||
|
assertTrue(termEnum.docFreq() > 0);
|
||||||
|
System.out.println(" s1?=" + (termEnum.term().text().equals(s1)) + " s1len=" + s1.length());
|
||||||
|
System.out.println(" s2?=" + (termEnum.term().text().equals(s2)) + " s2len=" + s2.length());
|
||||||
|
final String s = termEnum.term().text();
|
||||||
|
for(int idx=0;idx<s.length();idx++) {
|
||||||
|
System.out.println(" ch=0x" + Integer.toHexString(s.charAt(idx)));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
*/
|
||||||
|
|
||||||
|
assertEquals(NUM_DOCS, r.numDocs());
|
||||||
|
assertTrue(r.docFreq(new Term("field", s1)) > 0);
|
||||||
|
assertTrue(r.docFreq(new Term("field", s2)) > 0);
|
||||||
|
|
||||||
|
int num = atLeast(1000);
|
||||||
|
for(int iter=0;iter<num;iter++) {
|
||||||
|
|
||||||
|
final String term;
|
||||||
|
final boolean doS1;
|
||||||
|
if (random.nextBoolean()) {
|
||||||
|
term = s1;
|
||||||
|
doS1 = true;
|
||||||
|
} else {
|
||||||
|
term = s2;
|
||||||
|
doS1 = false;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (VERBOSE) {
|
||||||
|
System.out.println("\nTEST: iter=" + iter + " doS1=" + doS1);
|
||||||
|
}
|
||||||
|
|
||||||
|
final DocsEnum postings = MultiFields.getTermDocsEnum(r, null, "field", new BytesRef(term));
|
||||||
|
|
||||||
|
int docID = -1;
|
||||||
|
while(docID < DocsEnum.NO_MORE_DOCS) {
|
||||||
|
final int what = random.nextInt(3);
|
||||||
|
if (what == 0) {
|
||||||
|
if (VERBOSE) {
|
||||||
|
System.out.println("TEST: docID=" + docID + "; do next()");
|
||||||
|
}
|
||||||
|
// nextDoc
|
||||||
|
int expected = docID+1;
|
||||||
|
while(true) {
|
||||||
|
if (expected == NUM_DOCS) {
|
||||||
|
expected = Integer.MAX_VALUE;
|
||||||
|
break;
|
||||||
|
} else if (isS1.get(expected) == doS1) {
|
||||||
|
break;
|
||||||
|
} else {
|
||||||
|
expected++;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
docID = postings.nextDoc();
|
||||||
|
if (VERBOSE) {
|
||||||
|
System.out.println(" got docID=" + docID);
|
||||||
|
}
|
||||||
|
assertEquals(expected, docID);
|
||||||
|
if (docID == DocsEnum.NO_MORE_DOCS) {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (random.nextInt(6) == 3) {
|
||||||
|
final int freq = postings.freq();
|
||||||
|
assertTrue(freq >=1 && freq <= 4);
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
// advance
|
||||||
|
final int targetDocID;
|
||||||
|
if (docID == -1) {
|
||||||
|
targetDocID = random.nextInt(NUM_DOCS+1);
|
||||||
|
} else {
|
||||||
|
targetDocID = docID + _TestUtil.nextInt(random, 1, NUM_DOCS - docID);
|
||||||
|
}
|
||||||
|
if (VERBOSE) {
|
||||||
|
System.out.println("TEST: docID=" + docID + "; do advance(" + targetDocID + ")");
|
||||||
|
}
|
||||||
|
int expected = targetDocID;
|
||||||
|
while(true) {
|
||||||
|
if (expected == NUM_DOCS) {
|
||||||
|
expected = Integer.MAX_VALUE;
|
||||||
|
break;
|
||||||
|
} else if (isS1.get(expected) == doS1) {
|
||||||
|
break;
|
||||||
|
} else {
|
||||||
|
expected++;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
docID = postings.advance(targetDocID);
|
||||||
|
if (VERBOSE) {
|
||||||
|
System.out.println(" got docID=" + docID);
|
||||||
|
}
|
||||||
|
assertEquals(expected, docID);
|
||||||
|
if (docID == DocsEnum.NO_MORE_DOCS) {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (random.nextInt(6) == 3) {
|
||||||
|
final int freq = postings.freq();
|
||||||
|
assertTrue(freq >=1 && freq <= 4);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
r.close();
|
||||||
|
dir.close();
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -0,0 +1,232 @@
|
||||||
|
package org.apache.lucene.index;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||||
|
* contributor license agreements. See the NOTICE file distributed with
|
||||||
|
* this work for additional information regarding copyright ownership.
|
||||||
|
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||||
|
* (the "License"); you may not use this file except in compliance with
|
||||||
|
* the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
import org.apache.lucene.analysis.Analyzer;
|
||||||
|
import org.apache.lucene.analysis.MockAnalyzer;
|
||||||
|
import org.apache.lucene.document.Document;
|
||||||
|
import org.apache.lucene.document.Field;
|
||||||
|
import org.apache.lucene.index.FieldInfo.IndexOptions;
|
||||||
|
import org.apache.lucene.search.DocIdSetIterator;
|
||||||
|
import org.apache.lucene.store.Directory;
|
||||||
|
import org.apache.lucene.util.BytesRef;
|
||||||
|
import org.apache.lucene.util.LuceneTestCase;
|
||||||
|
|
||||||
|
/**
|
||||||
|
*
|
||||||
|
* @lucene.experimental
|
||||||
|
*/
|
||||||
|
public class TestOmitPositions extends LuceneTestCase {
|
||||||
|
|
||||||
|
public void testBasic() throws Exception {
|
||||||
|
Directory dir = newDirectory();
|
||||||
|
RandomIndexWriter w = new RandomIndexWriter(random, dir);
|
||||||
|
Document doc = new Document();
|
||||||
|
Field f = newField("foo", "this is a test test", Field.Index.ANALYZED);
|
||||||
|
f.setIndexOptions(IndexOptions.DOCS_AND_FREQS);
|
||||||
|
doc.add(f);
|
||||||
|
for (int i = 0; i < 100; i++) {
|
||||||
|
w.addDocument(doc);
|
||||||
|
}
|
||||||
|
|
||||||
|
IndexReader reader = w.getReader();
|
||||||
|
w.close();
|
||||||
|
|
||||||
|
assertNull(MultiFields.getTermPositionsEnum(reader, null, "foo", new BytesRef("test")));
|
||||||
|
|
||||||
|
DocsEnum de = MultiFields.getTermDocsEnum(reader, null, "foo", new BytesRef("test"));
|
||||||
|
while (de.nextDoc() != DocIdSetIterator.NO_MORE_DOCS) {
|
||||||
|
assertEquals(2, de.freq());
|
||||||
|
}
|
||||||
|
|
||||||
|
reader.close();
|
||||||
|
dir.close();
|
||||||
|
}
|
||||||
|
|
||||||
|
// Tests whether the DocumentWriter correctly enable the
|
||||||
|
// omitTermFreqAndPositions bit in the FieldInfo
|
||||||
|
public void testPositions() throws Exception {
|
||||||
|
Directory ram = newDirectory();
|
||||||
|
Analyzer analyzer = new MockAnalyzer(random);
|
||||||
|
IndexWriter writer = new IndexWriter(ram, newIndexWriterConfig( TEST_VERSION_CURRENT, analyzer));
|
||||||
|
Document d = new Document();
|
||||||
|
|
||||||
|
// f1,f2,f3: docs only
|
||||||
|
Field f1 = newField("f1", "This field has docs only", Field.Store.NO, Field.Index.ANALYZED);
|
||||||
|
f1.setIndexOptions(IndexOptions.DOCS_ONLY);
|
||||||
|
d.add(f1);
|
||||||
|
|
||||||
|
Field f2 = newField("f2", "This field has docs only", Field.Store.NO, Field.Index.ANALYZED);
|
||||||
|
f2.setIndexOptions(IndexOptions.DOCS_ONLY);
|
||||||
|
d.add(f2);
|
||||||
|
|
||||||
|
Field f3 = newField("f3", "This field has docs only", Field.Store.NO, Field.Index.ANALYZED);
|
||||||
|
f3.setIndexOptions(IndexOptions.DOCS_ONLY);
|
||||||
|
d.add(f3);
|
||||||
|
|
||||||
|
// f4,f5,f6 docs and freqs
|
||||||
|
Field f4 = newField("f4", "This field has docs and freqs", Field.Store.NO, Field.Index.ANALYZED);
|
||||||
|
f4.setIndexOptions(IndexOptions.DOCS_AND_FREQS);
|
||||||
|
d.add(f4);
|
||||||
|
|
||||||
|
Field f5 = newField("f5", "This field has docs and freqs", Field.Store.NO, Field.Index.ANALYZED);
|
||||||
|
f5.setIndexOptions(IndexOptions.DOCS_AND_FREQS);
|
||||||
|
d.add(f5);
|
||||||
|
|
||||||
|
Field f6 = newField("f6", "This field has docs and freqs", Field.Store.NO, Field.Index.ANALYZED);
|
||||||
|
f6.setIndexOptions(IndexOptions.DOCS_AND_FREQS);
|
||||||
|
d.add(f6);
|
||||||
|
|
||||||
|
// f7,f8,f9 docs/freqs/positions
|
||||||
|
Field f7 = newField("f7", "This field has docs and freqs and positions", Field.Store.NO, Field.Index.ANALYZED);
|
||||||
|
f7.setIndexOptions(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS);
|
||||||
|
d.add(f7);
|
||||||
|
|
||||||
|
Field f8 = newField("f8", "This field has docs and freqs and positions", Field.Store.NO, Field.Index.ANALYZED);
|
||||||
|
f8.setIndexOptions(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS);
|
||||||
|
d.add(f8);
|
||||||
|
|
||||||
|
Field f9 = newField("f9", "This field has docs and freqs and positions", Field.Store.NO, Field.Index.ANALYZED);
|
||||||
|
f9.setIndexOptions(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS);
|
||||||
|
d.add(f9);
|
||||||
|
|
||||||
|
writer.addDocument(d);
|
||||||
|
writer.optimize();
|
||||||
|
|
||||||
|
// now we add another document which has docs-only for f1, f4, f7, docs/freqs for f2, f5, f8,
|
||||||
|
// and docs/freqs/positions for f3, f6, f9
|
||||||
|
d = new Document();
|
||||||
|
|
||||||
|
// f1,f4,f7: docs only
|
||||||
|
f1 = newField("f1", "This field has docs only", Field.Store.NO, Field.Index.ANALYZED);
|
||||||
|
f1.setIndexOptions(IndexOptions.DOCS_ONLY);
|
||||||
|
d.add(f1);
|
||||||
|
|
||||||
|
f4 = newField("f4", "This field has docs only", Field.Store.NO, Field.Index.ANALYZED);
|
||||||
|
f4.setIndexOptions(IndexOptions.DOCS_ONLY);
|
||||||
|
d.add(f4);
|
||||||
|
|
||||||
|
f7 = newField("f7", "This field has docs only", Field.Store.NO, Field.Index.ANALYZED);
|
||||||
|
f7.setIndexOptions(IndexOptions.DOCS_ONLY);
|
||||||
|
d.add(f7);
|
||||||
|
|
||||||
|
// f2, f5, f8: docs and freqs
|
||||||
|
f2 = newField("f2", "This field has docs and freqs", Field.Store.NO, Field.Index.ANALYZED);
|
||||||
|
f2.setIndexOptions(IndexOptions.DOCS_AND_FREQS);
|
||||||
|
d.add(f2);
|
||||||
|
|
||||||
|
f5 = newField("f5", "This field has docs and freqs", Field.Store.NO, Field.Index.ANALYZED);
|
||||||
|
f5.setIndexOptions(IndexOptions.DOCS_AND_FREQS);
|
||||||
|
d.add(f5);
|
||||||
|
|
||||||
|
f8 = newField("f8", "This field has docs and freqs", Field.Store.NO, Field.Index.ANALYZED);
|
||||||
|
f8.setIndexOptions(IndexOptions.DOCS_AND_FREQS);
|
||||||
|
d.add(f8);
|
||||||
|
|
||||||
|
// f3, f6, f9: docs and freqs and positions
|
||||||
|
f3 = newField("f3", "This field has docs and freqs and positions", Field.Store.NO, Field.Index.ANALYZED);
|
||||||
|
f3.setIndexOptions(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS);
|
||||||
|
d.add(f3);
|
||||||
|
|
||||||
|
f6 = newField("f6", "This field has docs and freqs and positions", Field.Store.NO, Field.Index.ANALYZED);
|
||||||
|
f6.setIndexOptions(IndexOptions.DOCS_AND_FREQS);
|
||||||
|
d.add(f6);
|
||||||
|
|
||||||
|
f9 = newField("f9", "This field has docs and freqs and positions", Field.Store.NO, Field.Index.ANALYZED);
|
||||||
|
f9.setIndexOptions(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS);
|
||||||
|
d.add(f9);
|
||||||
|
|
||||||
|
writer.addDocument(d);
|
||||||
|
|
||||||
|
// force merge
|
||||||
|
writer.optimize();
|
||||||
|
// flush
|
||||||
|
writer.close();
|
||||||
|
|
||||||
|
SegmentReader reader = getOnlySegmentReader(IndexReader.open(ram, false));
|
||||||
|
FieldInfos fi = reader.fieldInfos();
|
||||||
|
// docs + docs = docs
|
||||||
|
assertEquals(IndexOptions.DOCS_ONLY, fi.fieldInfo("f1").indexOptions);
|
||||||
|
// docs + docs/freqs = docs
|
||||||
|
assertEquals(IndexOptions.DOCS_ONLY, fi.fieldInfo("f2").indexOptions);
|
||||||
|
// docs + docs/freqs/pos = docs
|
||||||
|
assertEquals(IndexOptions.DOCS_ONLY, fi.fieldInfo("f3").indexOptions);
|
||||||
|
// docs/freqs + docs = docs
|
||||||
|
assertEquals(IndexOptions.DOCS_ONLY, fi.fieldInfo("f4").indexOptions);
|
||||||
|
// docs/freqs + docs/freqs = docs/freqs
|
||||||
|
assertEquals(IndexOptions.DOCS_AND_FREQS, fi.fieldInfo("f5").indexOptions);
|
||||||
|
// docs/freqs + docs/freqs/pos = docs/freqs
|
||||||
|
assertEquals(IndexOptions.DOCS_AND_FREQS, fi.fieldInfo("f6").indexOptions);
|
||||||
|
// docs/freqs/pos + docs = docs
|
||||||
|
assertEquals(IndexOptions.DOCS_ONLY, fi.fieldInfo("f7").indexOptions);
|
||||||
|
// docs/freqs/pos + docs/freqs = docs/freqs
|
||||||
|
assertEquals(IndexOptions.DOCS_AND_FREQS, fi.fieldInfo("f8").indexOptions);
|
||||||
|
// docs/freqs/pos + docs/freqs/pos = docs/freqs/pos
|
||||||
|
assertEquals(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS, fi.fieldInfo("f9").indexOptions);
|
||||||
|
|
||||||
|
reader.close();
|
||||||
|
ram.close();
|
||||||
|
}
|
||||||
|
|
||||||
|
private void assertNoPrx(Directory dir) throws Throwable {
|
||||||
|
final String[] files = dir.listAll();
|
||||||
|
for(int i=0;i<files.length;i++) {
|
||||||
|
assertFalse(files[i].endsWith(".prx"));
|
||||||
|
assertFalse(files[i].endsWith(".pos"));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Verifies no *.prx exists when all fields omit term positions:
|
||||||
|
public void testNoPrxFile() throws Throwable {
|
||||||
|
Directory ram = newDirectory();
|
||||||
|
Analyzer analyzer = new MockAnalyzer(random);
|
||||||
|
IndexWriter writer = new IndexWriter(ram, newIndexWriterConfig(
|
||||||
|
TEST_VERSION_CURRENT, analyzer).setMaxBufferedDocs(3).setMergePolicy(newLogMergePolicy()));
|
||||||
|
LogMergePolicy lmp = (LogMergePolicy) writer.getConfig().getMergePolicy();
|
||||||
|
lmp.setMergeFactor(2);
|
||||||
|
lmp.setUseCompoundFile(false);
|
||||||
|
Document d = new Document();
|
||||||
|
|
||||||
|
Field f1 = newField("f1", "This field has term freqs", Field.Store.NO, Field.Index.ANALYZED);
|
||||||
|
f1.setIndexOptions(IndexOptions.DOCS_AND_FREQS);
|
||||||
|
d.add(f1);
|
||||||
|
|
||||||
|
for(int i=0;i<30;i++)
|
||||||
|
writer.addDocument(d);
|
||||||
|
|
||||||
|
writer.commit();
|
||||||
|
|
||||||
|
assertNoPrx(ram);
|
||||||
|
|
||||||
|
// now add some documents with positions, and check there is no prox after optimization
|
||||||
|
d = new Document();
|
||||||
|
f1 = newField("f1", "This field has positions", Field.Store.NO, Field.Index.ANALYZED);
|
||||||
|
d.add(f1);
|
||||||
|
|
||||||
|
for(int i=0;i<30;i++)
|
||||||
|
writer.addDocument(d);
|
||||||
|
|
||||||
|
// force merge
|
||||||
|
writer.optimize();
|
||||||
|
// flush
|
||||||
|
writer.close();
|
||||||
|
|
||||||
|
assertNoPrx(ram);
|
||||||
|
ram.close();
|
||||||
|
}
|
||||||
|
}
|
|
@ -26,6 +26,7 @@ import org.apache.lucene.analysis.Analyzer;
|
||||||
import org.apache.lucene.analysis.MockAnalyzer;
|
import org.apache.lucene.analysis.MockAnalyzer;
|
||||||
import org.apache.lucene.document.Document;
|
import org.apache.lucene.document.Document;
|
||||||
import org.apache.lucene.document.Field;
|
import org.apache.lucene.document.Field;
|
||||||
|
import org.apache.lucene.index.FieldInfo.IndexOptions;
|
||||||
import org.apache.lucene.index.IndexReader.AtomicReaderContext;
|
import org.apache.lucene.index.IndexReader.AtomicReaderContext;
|
||||||
import org.apache.lucene.search.*;
|
import org.apache.lucene.search.*;
|
||||||
import org.apache.lucene.search.BooleanClause.Occur;
|
import org.apache.lucene.search.BooleanClause.Occur;
|
||||||
|
@ -65,7 +66,7 @@ public class TestOmitTf extends LuceneTestCase {
|
||||||
|
|
||||||
// this field will NOT have Tf
|
// this field will NOT have Tf
|
||||||
Field f2 = newField("f2", "This field has NO Tf in all docs", Field.Store.NO, Field.Index.ANALYZED);
|
Field f2 = newField("f2", "This field has NO Tf in all docs", Field.Store.NO, Field.Index.ANALYZED);
|
||||||
f2.setOmitTermFreqAndPositions(true);
|
f2.setIndexOptions(IndexOptions.DOCS_ONLY);
|
||||||
d.add(f2);
|
d.add(f2);
|
||||||
|
|
||||||
writer.addDocument(d);
|
writer.addDocument(d);
|
||||||
|
@ -75,10 +76,10 @@ public class TestOmitTf extends LuceneTestCase {
|
||||||
d = new Document();
|
d = new Document();
|
||||||
|
|
||||||
// Reverse
|
// Reverse
|
||||||
f1.setOmitTermFreqAndPositions(true);
|
f1.setIndexOptions(IndexOptions.DOCS_ONLY);
|
||||||
d.add(f1);
|
d.add(f1);
|
||||||
|
|
||||||
f2.setOmitTermFreqAndPositions(false);
|
f2.setIndexOptions(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS);
|
||||||
d.add(f2);
|
d.add(f2);
|
||||||
|
|
||||||
writer.addDocument(d);
|
writer.addDocument(d);
|
||||||
|
@ -90,8 +91,8 @@ public class TestOmitTf extends LuceneTestCase {
|
||||||
|
|
||||||
SegmentReader reader = getOnlySegmentReader(IndexReader.open(ram, false));
|
SegmentReader reader = getOnlySegmentReader(IndexReader.open(ram, false));
|
||||||
FieldInfos fi = reader.fieldInfos();
|
FieldInfos fi = reader.fieldInfos();
|
||||||
assertTrue("OmitTermFreqAndPositions field bit should be set.", fi.fieldInfo("f1").omitTermFreqAndPositions);
|
assertEquals("OmitTermFreqAndPositions field bit should be set.", IndexOptions.DOCS_ONLY, fi.fieldInfo("f1").indexOptions);
|
||||||
assertTrue("OmitTermFreqAndPositions field bit should be set.", fi.fieldInfo("f2").omitTermFreqAndPositions);
|
assertEquals("OmitTermFreqAndPositions field bit should be set.", IndexOptions.DOCS_ONLY, fi.fieldInfo("f2").indexOptions);
|
||||||
|
|
||||||
reader.close();
|
reader.close();
|
||||||
ram.close();
|
ram.close();
|
||||||
|
@ -117,7 +118,7 @@ public class TestOmitTf extends LuceneTestCase {
|
||||||
|
|
||||||
// this field will NOT have Tf
|
// this field will NOT have Tf
|
||||||
Field f2 = newField("f2", "This field has NO Tf in all docs", Field.Store.NO, Field.Index.ANALYZED);
|
Field f2 = newField("f2", "This field has NO Tf in all docs", Field.Store.NO, Field.Index.ANALYZED);
|
||||||
f2.setOmitTermFreqAndPositions(true);
|
f2.setIndexOptions(IndexOptions.DOCS_ONLY);
|
||||||
d.add(f2);
|
d.add(f2);
|
||||||
|
|
||||||
for(int i=0;i<30;i++)
|
for(int i=0;i<30;i++)
|
||||||
|
@ -128,10 +129,10 @@ public class TestOmitTf extends LuceneTestCase {
|
||||||
d = new Document();
|
d = new Document();
|
||||||
|
|
||||||
// Reverese
|
// Reverese
|
||||||
f1.setOmitTermFreqAndPositions(true);
|
f1.setIndexOptions(IndexOptions.DOCS_ONLY);
|
||||||
d.add(f1);
|
d.add(f1);
|
||||||
|
|
||||||
f2.setOmitTermFreqAndPositions(false);
|
f2.setIndexOptions(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS);
|
||||||
d.add(f2);
|
d.add(f2);
|
||||||
|
|
||||||
for(int i=0;i<30;i++)
|
for(int i=0;i<30;i++)
|
||||||
|
@ -144,8 +145,8 @@ public class TestOmitTf extends LuceneTestCase {
|
||||||
|
|
||||||
SegmentReader reader = getOnlySegmentReader(IndexReader.open(ram, false));
|
SegmentReader reader = getOnlySegmentReader(IndexReader.open(ram, false));
|
||||||
FieldInfos fi = reader.fieldInfos();
|
FieldInfos fi = reader.fieldInfos();
|
||||||
assertTrue("OmitTermFreqAndPositions field bit should be set.", fi.fieldInfo("f1").omitTermFreqAndPositions);
|
assertEquals("OmitTermFreqAndPositions field bit should be set.", IndexOptions.DOCS_ONLY, fi.fieldInfo("f1").indexOptions);
|
||||||
assertTrue("OmitTermFreqAndPositions field bit should be set.", fi.fieldInfo("f2").omitTermFreqAndPositions);
|
assertEquals("OmitTermFreqAndPositions field bit should be set.", IndexOptions.DOCS_ONLY, fi.fieldInfo("f2").indexOptions);
|
||||||
|
|
||||||
reader.close();
|
reader.close();
|
||||||
ram.close();
|
ram.close();
|
||||||
|
@ -176,7 +177,7 @@ public class TestOmitTf extends LuceneTestCase {
|
||||||
for(int i=0;i<5;i++)
|
for(int i=0;i<5;i++)
|
||||||
writer.addDocument(d);
|
writer.addDocument(d);
|
||||||
|
|
||||||
f2.setOmitTermFreqAndPositions(true);
|
f2.setIndexOptions(IndexOptions.DOCS_ONLY);
|
||||||
|
|
||||||
for(int i=0;i<20;i++)
|
for(int i=0;i<20;i++)
|
||||||
writer.addDocument(d);
|
writer.addDocument(d);
|
||||||
|
@ -189,8 +190,8 @@ public class TestOmitTf extends LuceneTestCase {
|
||||||
|
|
||||||
SegmentReader reader = getOnlySegmentReader(IndexReader.open(ram, false));
|
SegmentReader reader = getOnlySegmentReader(IndexReader.open(ram, false));
|
||||||
FieldInfos fi = reader.fieldInfos();
|
FieldInfos fi = reader.fieldInfos();
|
||||||
assertTrue("OmitTermFreqAndPositions field bit should not be set.", !fi.fieldInfo("f1").omitTermFreqAndPositions);
|
assertEquals("OmitTermFreqAndPositions field bit should not be set.", IndexOptions.DOCS_AND_FREQS_AND_POSITIONS, fi.fieldInfo("f1").indexOptions);
|
||||||
assertTrue("OmitTermFreqAndPositions field bit should be set.", fi.fieldInfo("f2").omitTermFreqAndPositions);
|
assertEquals("OmitTermFreqAndPositions field bit should be set.", IndexOptions.DOCS_ONLY, fi.fieldInfo("f2").indexOptions);
|
||||||
|
|
||||||
reader.close();
|
reader.close();
|
||||||
ram.close();
|
ram.close();
|
||||||
|
@ -198,8 +199,10 @@ public class TestOmitTf extends LuceneTestCase {
|
||||||
|
|
||||||
private void assertNoPrx(Directory dir) throws Throwable {
|
private void assertNoPrx(Directory dir) throws Throwable {
|
||||||
final String[] files = dir.listAll();
|
final String[] files = dir.listAll();
|
||||||
for(int i=0;i<files.length;i++)
|
for(int i=0;i<files.length;i++) {
|
||||||
assertFalse(files[i].endsWith(".prx"));
|
assertFalse(files[i].endsWith(".prx"));
|
||||||
|
assertFalse(files[i].endsWith(".pos"));
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Verifies no *.prx exists when all fields omit term freq:
|
// Verifies no *.prx exists when all fields omit term freq:
|
||||||
|
@ -213,8 +216,8 @@ public class TestOmitTf extends LuceneTestCase {
|
||||||
lmp.setUseCompoundFile(false);
|
lmp.setUseCompoundFile(false);
|
||||||
Document d = new Document();
|
Document d = new Document();
|
||||||
|
|
||||||
Field f1 = newField("f1", "This field has term freqs", Field.Store.NO, Field.Index.ANALYZED);
|
Field f1 = newField("f1", "This field has no term freqs", Field.Store.NO, Field.Index.ANALYZED);
|
||||||
f1.setOmitTermFreqAndPositions(true);
|
f1.setIndexOptions(IndexOptions.DOCS_ONLY);
|
||||||
d.add(f1);
|
d.add(f1);
|
||||||
|
|
||||||
for(int i=0;i<30;i++)
|
for(int i=0;i<30;i++)
|
||||||
|
@ -223,7 +226,15 @@ public class TestOmitTf extends LuceneTestCase {
|
||||||
writer.commit();
|
writer.commit();
|
||||||
|
|
||||||
assertNoPrx(ram);
|
assertNoPrx(ram);
|
||||||
|
|
||||||
|
// now add some documents with positions, and check there is no prox after optimization
|
||||||
|
d = new Document();
|
||||||
|
f1 = newField("f1", "This field has positions", Field.Store.NO, Field.Index.ANALYZED);
|
||||||
|
d.add(f1);
|
||||||
|
|
||||||
|
for(int i=0;i<30;i++)
|
||||||
|
writer.addDocument(d);
|
||||||
|
|
||||||
// force merge
|
// force merge
|
||||||
writer.optimize();
|
writer.optimize();
|
||||||
// flush
|
// flush
|
||||||
|
@ -253,7 +264,7 @@ public class TestOmitTf extends LuceneTestCase {
|
||||||
sb.append(term).append(" ");
|
sb.append(term).append(" ");
|
||||||
String content = sb.toString();
|
String content = sb.toString();
|
||||||
Field noTf = newField("noTf", content + (i%2==0 ? "" : " notf"), Field.Store.NO, Field.Index.ANALYZED);
|
Field noTf = newField("noTf", content + (i%2==0 ? "" : " notf"), Field.Store.NO, Field.Index.ANALYZED);
|
||||||
noTf.setOmitTermFreqAndPositions(true);
|
noTf.setIndexOptions(IndexOptions.DOCS_ONLY);
|
||||||
d.add(noTf);
|
d.add(noTf);
|
||||||
|
|
||||||
Field tf = newField("tf", content + (i%2==0 ? " tf" : ""), Field.Store.NO, Field.Index.ANALYZED);
|
Field tf = newField("tf", content + (i%2==0 ? " tf" : ""), Field.Store.NO, Field.Index.ANALYZED);
|
||||||
|
|
|
@ -21,6 +21,7 @@ import org.apache.lucene.document.Field.Index;
|
||||||
import org.apache.lucene.document.Field.Store;
|
import org.apache.lucene.document.Field.Store;
|
||||||
import org.apache.lucene.index.CorruptIndexException;
|
import org.apache.lucene.index.CorruptIndexException;
|
||||||
import org.apache.lucene.index.DocsEnum;
|
import org.apache.lucene.index.DocsEnum;
|
||||||
|
import org.apache.lucene.index.FieldInfo.IndexOptions;
|
||||||
import org.apache.lucene.index.IndexReader;
|
import org.apache.lucene.index.IndexReader;
|
||||||
import org.apache.lucene.index.IndexWriter;
|
import org.apache.lucene.index.IndexWriter;
|
||||||
import org.apache.lucene.index.IndexWriterConfig;
|
import org.apache.lucene.index.IndexWriterConfig;
|
||||||
|
@ -181,7 +182,7 @@ public class LuceneTaxonomyWriter implements TaxonomyWriter {
|
||||||
parentStreamField = new Field(Consts.FIELD_PAYLOADS, parentStream);
|
parentStreamField = new Field(Consts.FIELD_PAYLOADS, parentStream);
|
||||||
parentStreamField.setOmitNorms(true);
|
parentStreamField.setOmitNorms(true);
|
||||||
fullPathField = new Field(Consts.FULL, "", Store.YES, Index.NOT_ANALYZED_NO_NORMS);
|
fullPathField = new Field(Consts.FULL, "", Store.YES, Index.NOT_ANALYZED_NO_NORMS);
|
||||||
fullPathField.setOmitTermFreqAndPositions(true);
|
fullPathField.setIndexOptions(IndexOptions.DOCS_ONLY);
|
||||||
|
|
||||||
this.nextID = indexWriter.maxDoc();
|
this.nextID = indexWriter.maxDoc();
|
||||||
|
|
||||||
|
|
|
@ -24,6 +24,7 @@ import org.apache.lucene.analysis.MockAnalyzer;
|
||||||
import org.apache.lucene.document.Document;
|
import org.apache.lucene.document.Document;
|
||||||
import org.apache.lucene.document.Field;
|
import org.apache.lucene.document.Field;
|
||||||
import org.apache.lucene.document.NumericField;
|
import org.apache.lucene.document.NumericField;
|
||||||
|
import org.apache.lucene.index.FieldInfo.IndexOptions;
|
||||||
import org.apache.lucene.index.IndexReader;
|
import org.apache.lucene.index.IndexReader;
|
||||||
import org.apache.lucene.index.RandomIndexWriter;
|
import org.apache.lucene.index.RandomIndexWriter;
|
||||||
import org.apache.lucene.index.Term;
|
import org.apache.lucene.index.Term;
|
||||||
|
@ -395,7 +396,7 @@ public class TestGrouping extends LuceneTestCase {
|
||||||
}
|
}
|
||||||
// So we can pull filter marking last doc in block:
|
// So we can pull filter marking last doc in block:
|
||||||
final Field groupEnd = newField("groupend", "x", Field.Index.NOT_ANALYZED);
|
final Field groupEnd = newField("groupend", "x", Field.Index.NOT_ANALYZED);
|
||||||
groupEnd.setOmitTermFreqAndPositions(true);
|
groupEnd.setIndexOptions(IndexOptions.DOCS_ONLY);
|
||||||
groupEnd.setOmitNorms(true);
|
groupEnd.setOmitNorms(true);
|
||||||
docs.get(docs.size()-1).add(groupEnd);
|
docs.get(docs.size()-1).add(groupEnd);
|
||||||
// Add as a doc block:
|
// Add as a doc block:
|
||||||
|
|
|
@ -28,6 +28,7 @@ import org.apache.lucene.analysis.Analyzer;
|
||||||
import org.apache.lucene.analysis.TokenStream;
|
import org.apache.lucene.analysis.TokenStream;
|
||||||
import org.apache.lucene.document.Document;
|
import org.apache.lucene.document.Document;
|
||||||
import org.apache.lucene.document.Field;
|
import org.apache.lucene.document.Field;
|
||||||
|
import org.apache.lucene.index.FieldInfo.IndexOptions;
|
||||||
import org.apache.lucene.index.IndexReader;
|
import org.apache.lucene.index.IndexReader;
|
||||||
import org.apache.lucene.index.IndexWriter;
|
import org.apache.lucene.index.IndexWriter;
|
||||||
import org.apache.lucene.index.IndexWriterConfig;
|
import org.apache.lucene.index.IndexWriterConfig;
|
||||||
|
@ -618,7 +619,7 @@ public class SpellChecker implements java.io.Closeable {
|
||||||
// the word field is never queried on... its indexed so it can be quickly
|
// the word field is never queried on... its indexed so it can be quickly
|
||||||
// checked for rebuild (and stored for retrieval). Doesn't need norms or TF/pos
|
// checked for rebuild (and stored for retrieval). Doesn't need norms or TF/pos
|
||||||
Field f = new Field(F_WORD, text, Field.Store.YES, Field.Index.NOT_ANALYZED);
|
Field f = new Field(F_WORD, text, Field.Store.YES, Field.Index.NOT_ANALYZED);
|
||||||
f.setOmitTermFreqAndPositions(true);
|
f.setIndexOptions(IndexOptions.DOCS_ONLY);
|
||||||
f.setOmitNorms(true);
|
f.setOmitNorms(true);
|
||||||
doc.add(f); // orig term
|
doc.add(f); // orig term
|
||||||
addGram(text, doc, ng1, ng2);
|
addGram(text, doc, ng1, ng2);
|
||||||
|
@ -636,7 +637,7 @@ public class SpellChecker implements java.io.Closeable {
|
||||||
if (i == 0) {
|
if (i == 0) {
|
||||||
// only one term possible in the startXXField, TF/pos and norms aren't needed.
|
// only one term possible in the startXXField, TF/pos and norms aren't needed.
|
||||||
Field startField = new Field("start" + ng, gram, Field.Store.NO, Field.Index.NOT_ANALYZED);
|
Field startField = new Field("start" + ng, gram, Field.Store.NO, Field.Index.NOT_ANALYZED);
|
||||||
startField.setOmitTermFreqAndPositions(true);
|
startField.setIndexOptions(IndexOptions.DOCS_ONLY);
|
||||||
startField.setOmitNorms(true);
|
startField.setOmitNorms(true);
|
||||||
doc.add(startField);
|
doc.add(startField);
|
||||||
}
|
}
|
||||||
|
@ -645,7 +646,7 @@ public class SpellChecker implements java.io.Closeable {
|
||||||
if (end != null) { // may not be present if len==ng1
|
if (end != null) { // may not be present if len==ng1
|
||||||
// only one term possible in the endXXField, TF/pos and norms aren't needed.
|
// only one term possible in the endXXField, TF/pos and norms aren't needed.
|
||||||
Field endField = new Field("end" + ng, end, Field.Store.NO, Field.Index.NOT_ANALYZED);
|
Field endField = new Field("end" + ng, end, Field.Store.NO, Field.Index.NOT_ANALYZED);
|
||||||
endField.setOmitTermFreqAndPositions(true);
|
endField.setIndexOptions(IndexOptions.DOCS_ONLY);
|
||||||
endField.setOmitNorms(true);
|
endField.setOmitNorms(true);
|
||||||
doc.add(endField);
|
doc.add(endField);
|
||||||
}
|
}
|
||||||
|
|
|
@ -321,6 +321,8 @@ New Features
|
||||||
before adding to the index. Fix a null pointer exception in logging
|
before adding to the index. Fix a null pointer exception in logging
|
||||||
when there was no unique key. (David Smiley via yonik)
|
when there was no unique key. (David Smiley via yonik)
|
||||||
|
|
||||||
|
* LUCENE-2048: Added omitPositions to the schema, so you can omit position
|
||||||
|
information while still indexing term frequencies. (rmuir)
|
||||||
|
|
||||||
Optimizations
|
Optimizations
|
||||||
----------------------
|
----------------------
|
||||||
|
|
|
@ -35,6 +35,7 @@ import org.slf4j.LoggerFactory;
|
||||||
import org.apache.lucene.analysis.Analyzer;
|
import org.apache.lucene.analysis.Analyzer;
|
||||||
import org.apache.lucene.document.Document;
|
import org.apache.lucene.document.Document;
|
||||||
import org.apache.lucene.document.Fieldable;
|
import org.apache.lucene.document.Fieldable;
|
||||||
|
import org.apache.lucene.index.FieldInfo.IndexOptions;
|
||||||
import org.apache.lucene.index.IndexReader;
|
import org.apache.lucene.index.IndexReader;
|
||||||
import org.apache.lucene.index.Term;
|
import org.apache.lucene.index.Term;
|
||||||
import org.apache.lucene.index.Fields;
|
import org.apache.lucene.index.Fields;
|
||||||
|
@ -202,7 +203,10 @@ public class LukeRequestHandler extends RequestHandlerBase
|
||||||
flags.append( (f != null && f.storeTermOffsets() ) ? FieldFlag.TERM_VECTOR_OFFSET.getAbbreviation() : '-' );
|
flags.append( (f != null && f.storeTermOffsets() ) ? FieldFlag.TERM_VECTOR_OFFSET.getAbbreviation() : '-' );
|
||||||
flags.append( (f != null && f.storeTermPositions() ) ? FieldFlag.TERM_VECTOR_POSITION.getAbbreviation() : '-' );
|
flags.append( (f != null && f.storeTermPositions() ) ? FieldFlag.TERM_VECTOR_POSITION.getAbbreviation() : '-' );
|
||||||
flags.append( (f != null && f.omitNorms()) ? FieldFlag.OMIT_NORMS.getAbbreviation() : '-' );
|
flags.append( (f != null && f.omitNorms()) ? FieldFlag.OMIT_NORMS.getAbbreviation() : '-' );
|
||||||
flags.append( (f != null && f.omitTf()) ? FieldFlag.OMIT_TF.getAbbreviation() : '-' );
|
flags.append( (f != null &&
|
||||||
|
f.indexOptions() == IndexOptions.DOCS_ONLY) ? FieldFlag.OMIT_TF.getAbbreviation() : '-' );
|
||||||
|
flags.append( (f != null &&
|
||||||
|
f.indexOptions() == IndexOptions.DOCS_AND_FREQS) ? FieldFlag.OMIT_POSITIONS.getAbbreviation() : '-' );
|
||||||
flags.append( (lazy) ? FieldFlag.LAZY.getAbbreviation() : '-' );
|
flags.append( (lazy) ? FieldFlag.LAZY.getAbbreviation() : '-' );
|
||||||
flags.append( (binary) ? FieldFlag.BINARY.getAbbreviation() : '-' );
|
flags.append( (binary) ? FieldFlag.BINARY.getAbbreviation() : '-' );
|
||||||
flags.append( (f != null && f.sortMissingFirst() ) ? FieldFlag.SORT_MISSING_FIRST.getAbbreviation() : '-' );
|
flags.append( (f != null && f.sortMissingFirst() ) ? FieldFlag.SORT_MISSING_FIRST.getAbbreviation() : '-' );
|
||||||
|
|
|
@ -47,13 +47,14 @@ public abstract class FieldProperties {
|
||||||
protected final static int SORT_MISSING_LAST = 0x00000800;
|
protected final static int SORT_MISSING_LAST = 0x00000800;
|
||||||
|
|
||||||
protected final static int REQUIRED = 0x00001000;
|
protected final static int REQUIRED = 0x00001000;
|
||||||
|
protected final static int OMIT_POSITIONS = 0x00002000;
|
||||||
|
|
||||||
static final String[] propertyNames = {
|
static final String[] propertyNames = {
|
||||||
"indexed", "tokenized", "stored",
|
"indexed", "tokenized", "stored",
|
||||||
"binary", "omitNorms", "omitTermFreqAndPositions",
|
"binary", "omitNorms", "omitTermFreqAndPositions",
|
||||||
"termVectors", "termPositions", "termOffsets",
|
"termVectors", "termPositions", "termOffsets",
|
||||||
"multiValued",
|
"multiValued",
|
||||||
"sortMissingFirst","sortMissingLast","required"
|
"sortMissingFirst","sortMissingLast","required", "omitPositions"
|
||||||
};
|
};
|
||||||
|
|
||||||
static final Map<String,Integer> propertyMap = new HashMap<String,Integer>();
|
static final Map<String,Integer> propertyMap = new HashMap<String,Integer>();
|
||||||
|
|
|
@ -23,6 +23,7 @@ import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
|
||||||
import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
|
import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
|
||||||
import org.apache.lucene.document.Field;
|
import org.apache.lucene.document.Field;
|
||||||
import org.apache.lucene.document.Fieldable;
|
import org.apache.lucene.document.Fieldable;
|
||||||
|
import org.apache.lucene.index.FieldInfo.IndexOptions;
|
||||||
import org.apache.lucene.index.Term;
|
import org.apache.lucene.index.Term;
|
||||||
import org.apache.lucene.queries.function.ValueSource;
|
import org.apache.lucene.queries.function.ValueSource;
|
||||||
import org.apache.lucene.search.Query;
|
import org.apache.lucene.search.Query;
|
||||||
|
@ -251,7 +252,7 @@ public abstract class FieldType extends FieldProperties {
|
||||||
|
|
||||||
return createField(field.getName(), val, getFieldStore(field, val),
|
return createField(field.getName(), val, getFieldStore(field, val),
|
||||||
getFieldIndex(field, val), getFieldTermVec(field, val), field.omitNorms(),
|
getFieldIndex(field, val), getFieldTermVec(field, val), field.omitNorms(),
|
||||||
field.omitTf(), boost);
|
field.indexOptions(), boost);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@ -269,14 +270,14 @@ public abstract class FieldType extends FieldProperties {
|
||||||
* @return the {@link org.apache.lucene.document.Fieldable}.
|
* @return the {@link org.apache.lucene.document.Fieldable}.
|
||||||
*/
|
*/
|
||||||
protected Fieldable createField(String name, String val, Field.Store storage, Field.Index index,
|
protected Fieldable createField(String name, String val, Field.Store storage, Field.Index index,
|
||||||
Field.TermVector vec, boolean omitNorms, boolean omitTFPos, float boost){
|
Field.TermVector vec, boolean omitNorms, IndexOptions options, float boost){
|
||||||
Field f = new Field(name,
|
Field f = new Field(name,
|
||||||
val,
|
val,
|
||||||
storage,
|
storage,
|
||||||
index,
|
index,
|
||||||
vec);
|
vec);
|
||||||
f.setOmitNorms(omitNorms);
|
f.setOmitNorms(omitNorms);
|
||||||
f.setOmitTermFreqAndPositions(omitTFPos);
|
f.setIndexOptions(options);
|
||||||
f.setBoost(boost);
|
f.setBoost(boost);
|
||||||
return f;
|
return f;
|
||||||
}
|
}
|
||||||
|
|
|
@ -18,6 +18,7 @@ package org.apache.solr.schema;
|
||||||
|
|
||||||
import org.apache.lucene.document.Field;
|
import org.apache.lucene.document.Field;
|
||||||
import org.apache.lucene.document.Fieldable;
|
import org.apache.lucene.document.Fieldable;
|
||||||
|
import org.apache.lucene.index.FieldInfo.IndexOptions;
|
||||||
import org.apache.lucene.index.IndexReader;
|
import org.apache.lucene.index.IndexReader;
|
||||||
import org.apache.lucene.index.IndexReader.AtomicReaderContext;
|
import org.apache.lucene.index.IndexReader.AtomicReaderContext;
|
||||||
import org.apache.lucene.queries.function.DocValues;
|
import org.apache.lucene.queries.function.DocValues;
|
||||||
|
@ -77,7 +78,7 @@ public class LatLonType extends AbstractSubTypeFieldType implements SpatialQuery
|
||||||
if (field.stored()) {
|
if (field.stored()) {
|
||||||
f[f.length - 1] = createField(field.getName(), externalVal,
|
f[f.length - 1] = createField(field.getName(), externalVal,
|
||||||
getFieldStore(field, externalVal), Field.Index.NO, Field.TermVector.NO,
|
getFieldStore(field, externalVal), Field.Index.NO, Field.TermVector.NO,
|
||||||
false, false, boost);
|
false, IndexOptions.DOCS_AND_FREQS_AND_POSITIONS, boost);
|
||||||
}
|
}
|
||||||
return f;
|
return f;
|
||||||
}
|
}
|
||||||
|
|
|
@ -19,6 +19,7 @@ package org.apache.solr.schema;
|
||||||
|
|
||||||
import org.apache.lucene.document.Field;
|
import org.apache.lucene.document.Field;
|
||||||
import org.apache.lucene.document.Fieldable;
|
import org.apache.lucene.document.Fieldable;
|
||||||
|
import org.apache.lucene.index.FieldInfo.IndexOptions;
|
||||||
import org.apache.lucene.queries.function.ValueSource;
|
import org.apache.lucene.queries.function.ValueSource;
|
||||||
import org.apache.lucene.queries.function.valuesource.VectorValueSource;
|
import org.apache.lucene.queries.function.valuesource.VectorValueSource;
|
||||||
import org.apache.lucene.search.BooleanClause;
|
import org.apache.lucene.search.BooleanClause;
|
||||||
|
@ -90,7 +91,7 @@ public class PointType extends CoordinateFieldType implements SpatialQueryable {
|
||||||
String storedVal = externalVal; // normalize or not?
|
String storedVal = externalVal; // normalize or not?
|
||||||
f[f.length - 1] = createField(field.getName(), storedVal,
|
f[f.length - 1] = createField(field.getName(), storedVal,
|
||||||
getFieldStore(field, storedVal), Field.Index.NO, Field.TermVector.NO,
|
getFieldStore(field, storedVal), Field.Index.NO, Field.TermVector.NO,
|
||||||
false, false, boost);
|
false, IndexOptions.DOCS_AND_FREQS_AND_POSITIONS, boost);
|
||||||
}
|
}
|
||||||
|
|
||||||
return f;
|
return f;
|
||||||
|
|
|
@ -20,6 +20,7 @@ package org.apache.solr.schema;
|
||||||
import org.apache.solr.common.SolrException;
|
import org.apache.solr.common.SolrException;
|
||||||
import org.apache.solr.common.SolrException.ErrorCode;
|
import org.apache.solr.common.SolrException.ErrorCode;
|
||||||
import org.apache.lucene.document.Fieldable;
|
import org.apache.lucene.document.Fieldable;
|
||||||
|
import org.apache.lucene.index.FieldInfo.IndexOptions;
|
||||||
import org.apache.lucene.search.SortField;
|
import org.apache.lucene.search.SortField;
|
||||||
import org.apache.solr.search.QParser;
|
import org.apache.solr.search.QParser;
|
||||||
|
|
||||||
|
@ -81,7 +82,17 @@ public final class SchemaField extends FieldProperties {
|
||||||
public boolean storeTermPositions() { return (properties & STORE_TERMPOSITIONS)!=0; }
|
public boolean storeTermPositions() { return (properties & STORE_TERMPOSITIONS)!=0; }
|
||||||
public boolean storeTermOffsets() { return (properties & STORE_TERMOFFSETS)!=0; }
|
public boolean storeTermOffsets() { return (properties & STORE_TERMOFFSETS)!=0; }
|
||||||
public boolean omitNorms() { return (properties & OMIT_NORMS)!=0; }
|
public boolean omitNorms() { return (properties & OMIT_NORMS)!=0; }
|
||||||
public boolean omitTf() { return (properties & OMIT_TF_POSITIONS)!=0; }
|
|
||||||
|
public IndexOptions indexOptions() {
|
||||||
|
if ((properties & OMIT_TF_POSITIONS) != 0) {
|
||||||
|
return IndexOptions.DOCS_ONLY;
|
||||||
|
} else if ((properties & OMIT_POSITIONS) != 0) {
|
||||||
|
return IndexOptions.DOCS_AND_FREQS;
|
||||||
|
} else {
|
||||||
|
return IndexOptions.DOCS_AND_FREQS_AND_POSITIONS;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
public boolean multiValued() { return (properties & MULTIVALUED)!=0; }
|
public boolean multiValued() { return (properties & MULTIVALUED)!=0; }
|
||||||
public boolean sortMissingFirst() { return (properties & SORT_MISSING_FIRST)!=0; }
|
public boolean sortMissingFirst() { return (properties & SORT_MISSING_FIRST)!=0; }
|
||||||
public boolean sortMissingLast() { return (properties & SORT_MISSING_LAST)!=0; }
|
public boolean sortMissingLast() { return (properties & SORT_MISSING_LAST)!=0; }
|
||||||
|
@ -215,7 +226,7 @@ public final class SchemaField extends FieldProperties {
|
||||||
}
|
}
|
||||||
|
|
||||||
if (on(falseProps,INDEXED)) {
|
if (on(falseProps,INDEXED)) {
|
||||||
int pp = (INDEXED | OMIT_NORMS | OMIT_TF_POSITIONS
|
int pp = (INDEXED | OMIT_NORMS | OMIT_TF_POSITIONS | OMIT_POSITIONS
|
||||||
| STORE_TERMVECTORS | STORE_TERMPOSITIONS | STORE_TERMOFFSETS
|
| STORE_TERMVECTORS | STORE_TERMPOSITIONS | STORE_TERMOFFSETS
|
||||||
| SORT_MISSING_FIRST | SORT_MISSING_LAST);
|
| SORT_MISSING_FIRST | SORT_MISSING_LAST);
|
||||||
if (on(pp,trueProps)) {
|
if (on(pp,trueProps)) {
|
||||||
|
@ -225,6 +236,14 @@ public final class SchemaField extends FieldProperties {
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (on(falseProps,OMIT_TF_POSITIONS)) {
|
||||||
|
int pp = (OMIT_POSITIONS | OMIT_TF_POSITIONS);
|
||||||
|
if (on(pp, trueProps)) {
|
||||||
|
throw new RuntimeException("SchemaField: " + name + " conflicting indexed field options:" + props);
|
||||||
|
}
|
||||||
|
p &= ~pp;
|
||||||
|
}
|
||||||
|
|
||||||
if (on(falseProps,STORE_TERMVECTORS)) {
|
if (on(falseProps,STORE_TERMVECTORS)) {
|
||||||
int pp = (STORE_TERMVECTORS | STORE_TERMPOSITIONS | STORE_TERMOFFSETS);
|
int pp = (STORE_TERMVECTORS | STORE_TERMPOSITIONS | STORE_TERMOFFSETS);
|
||||||
if (on(pp,trueProps)) {
|
if (on(pp,trueProps)) {
|
||||||
|
|
|
@ -19,6 +19,7 @@ package org.apache.solr.schema;
|
||||||
import org.apache.lucene.document.Fieldable;
|
import org.apache.lucene.document.Fieldable;
|
||||||
import org.apache.lucene.document.Field;
|
import org.apache.lucene.document.Field;
|
||||||
import org.apache.lucene.document.NumericField;
|
import org.apache.lucene.document.NumericField;
|
||||||
|
import org.apache.lucene.index.FieldInfo.IndexOptions;
|
||||||
import org.apache.lucene.queries.function.ValueSource;
|
import org.apache.lucene.queries.function.ValueSource;
|
||||||
import org.apache.lucene.queries.function.valuesource.DoubleFieldSource;
|
import org.apache.lucene.queries.function.valuesource.DoubleFieldSource;
|
||||||
import org.apache.lucene.queries.function.valuesource.FloatFieldSource;
|
import org.apache.lucene.queries.function.valuesource.FloatFieldSource;
|
||||||
|
@ -521,7 +522,7 @@ public class TrieField extends FieldType {
|
||||||
}
|
}
|
||||||
|
|
||||||
f.setOmitNorms(field.omitNorms());
|
f.setOmitNorms(field.omitNorms());
|
||||||
f.setOmitTermFreqAndPositions(field.omitTf());
|
f.setIndexOptions(field.indexOptions());
|
||||||
f.setBoost(boost);
|
f.setBoost(boost);
|
||||||
return f;
|
return f;
|
||||||
}
|
}
|
||||||
|
|
|
@ -417,6 +417,13 @@
|
||||||
<tokenizer class="solr.WhitespaceTokenizerFactory"/>
|
<tokenizer class="solr.WhitespaceTokenizerFactory"/>
|
||||||
</analyzer>
|
</analyzer>
|
||||||
</fieldType>
|
</fieldType>
|
||||||
|
|
||||||
|
<!-- omitPositions example -->
|
||||||
|
<fieldType name="nopositions" class="solr.TextField" omitPositions="true">
|
||||||
|
<analyzer>
|
||||||
|
<tokenizer class="solr.WhitespaceTokenizerFactory"/>
|
||||||
|
</analyzer>
|
||||||
|
</fieldType>
|
||||||
</types>
|
</types>
|
||||||
|
|
||||||
|
|
||||||
|
@ -525,6 +532,8 @@
|
||||||
<field name="sim1text" type="sim1" indexed="true" stored="true"/>
|
<field name="sim1text" type="sim1" indexed="true" stored="true"/>
|
||||||
<field name="sim2text" type="sim2" indexed="true" stored="true"/>
|
<field name="sim2text" type="sim2" indexed="true" stored="true"/>
|
||||||
<field name="sim3text" type="sim3" indexed="true" stored="true"/>
|
<field name="sim3text" type="sim3" indexed="true" stored="true"/>
|
||||||
|
|
||||||
|
<field name="nopositionstext" type="nopositions" indexed="true" stored="true"/>
|
||||||
|
|
||||||
<field name="tlong" type="tlong" indexed="true" stored="true" />
|
<field name="tlong" type="tlong" indexed="true" stored="true" />
|
||||||
|
|
||||||
|
|
|
@ -0,0 +1,60 @@
|
||||||
|
package org.apache.solr.schema;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||||
|
* contributor license agreements. See the NOTICE file distributed with
|
||||||
|
* this work for additional information regarding copyright ownership.
|
||||||
|
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||||
|
* (the "License"); you may not use this file except in compliance with
|
||||||
|
* the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
import org.apache.solr.SolrTestCaseJ4;
|
||||||
|
import org.junit.BeforeClass;
|
||||||
|
|
||||||
|
public class TestOmitPositions extends SolrTestCaseJ4 {
|
||||||
|
@BeforeClass
|
||||||
|
public static void beforeClass() throws Exception {
|
||||||
|
initCore("solrconfig.xml","schema.xml");
|
||||||
|
// add some docs
|
||||||
|
assertU(adoc("id", "1", "nopositionstext", "this is a test this is only a test", "text", "just another test"));
|
||||||
|
assertU(adoc("id", "2", "nopositionstext", "test test test test test test test test test test test test test", "text", "have a nice day"));
|
||||||
|
assertU(commit());
|
||||||
|
}
|
||||||
|
|
||||||
|
public void testFrequencies() {
|
||||||
|
// doc 2 should be ranked above doc 1
|
||||||
|
assertQ("term query: ",
|
||||||
|
req("fl", "id", "q", "nopositionstext:test"),
|
||||||
|
"//*[@numFound='2']",
|
||||||
|
"//result/doc[1]/int[@name='id'][.=2]",
|
||||||
|
"//result/doc[2]/int[@name='id'][.=1]"
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
public void testPositions() {
|
||||||
|
// no results should be found:
|
||||||
|
// lucene 3.x: silent failure
|
||||||
|
// lucene 4.x: illegal state exception, field was indexed without positions
|
||||||
|
|
||||||
|
ignoreException("was indexed without position data");
|
||||||
|
try {
|
||||||
|
assertQ("phrase query: ",
|
||||||
|
req("fl", "id", "q", "nopositionstext:\"test test\""),
|
||||||
|
"//*[@numFound='0']"
|
||||||
|
);
|
||||||
|
} catch (Exception expected) {
|
||||||
|
assertTrue(expected.getCause() instanceof IllegalStateException);
|
||||||
|
// in lucene 4.0, queries don't silently fail
|
||||||
|
}
|
||||||
|
resetExceptionIgnores();
|
||||||
|
}
|
||||||
|
}
|
|
@ -31,6 +31,7 @@ public enum FieldFlag {
|
||||||
TERM_VECTOR_POSITION('p', "Store Position With TermVector"),
|
TERM_VECTOR_POSITION('p', "Store Position With TermVector"),
|
||||||
OMIT_NORMS('O', "Omit Norms"),
|
OMIT_NORMS('O', "Omit Norms"),
|
||||||
OMIT_TF('F', "Omit Tf"),
|
OMIT_TF('F', "Omit Tf"),
|
||||||
|
OMIT_POSITIONS('P', "Omit Positions"),
|
||||||
LAZY('L', "Lazy"),
|
LAZY('L', "Lazy"),
|
||||||
BINARY('B', "Binary"),
|
BINARY('B', "Binary"),
|
||||||
SORT_MISSING_FIRST('f', "Sort Missing First"),
|
SORT_MISSING_FIRST('f', "Sort Missing First"),
|
||||||
|
|
Loading…
Reference in New Issue