mirror of https://github.com/apache/lucene.git
LUCENE-2048: omit positions but keep term freq
git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1145594 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
dfc5ce1cff
commit
1c646d24c9
|
@ -523,6 +523,15 @@ New Features
|
|||
(grow on demand if you set/get/clear too-large indices). (Mike
|
||||
McCandless)
|
||||
|
||||
* LUCENE-2048: Added the ability to omit positions but still index
|
||||
term frequencies, you can now control what is indexed into
|
||||
the postings via AbstractField.setIndexOptions:
|
||||
DOCS_ONLY: only documents are indexed: term frequencies and positions are omitted
|
||||
DOCS_AND_FREQS: only documents and term frequencies are indexed: positions are omitted
|
||||
DOCS_AND_FREQS_AND_POSITIONS: full postings: documents, frequencies, and positions
|
||||
AbstractField.setOmitTermFrequenciesAndPositions is deprecated,
|
||||
you should use DOCS_ONLY instead. (Robert Muir)
|
||||
|
||||
Optimizations
|
||||
|
||||
* LUCENE-3201, LUCENE-3218: CompoundFileSystem code has been consolidated
|
||||
|
|
|
@ -22,6 +22,7 @@ import org.apache.lucene.analysis.standard.StandardAnalyzer;
|
|||
import org.apache.lucene.document.Document;
|
||||
import org.apache.lucene.document.Field;
|
||||
import org.apache.lucene.document.NumericField;
|
||||
import org.apache.lucene.index.FieldInfo.IndexOptions;
|
||||
import org.apache.lucene.index.IndexWriter;
|
||||
import org.apache.lucene.index.IndexWriterConfig.OpenMode;
|
||||
import org.apache.lucene.index.IndexWriterConfig;
|
||||
|
@ -173,7 +174,7 @@ public class IndexFiles {
|
|||
// the field into separate words and don't index term frequency
|
||||
// or positional information:
|
||||
Field pathField = new Field("path", file.getPath(), Field.Store.YES, Field.Index.NOT_ANALYZED_NO_NORMS);
|
||||
pathField.setOmitTermFreqAndPositions(true);
|
||||
pathField.setIndexOptions(IndexOptions.DOCS_ONLY);
|
||||
doc.add(pathField);
|
||||
|
||||
// Add the last modified date of the file a field named "modified".
|
||||
|
|
|
@ -87,9 +87,7 @@ public class TestNRTManager extends LuceneTestCase {
|
|||
if (field1.getOmitNorms()) {
|
||||
field2.setOmitNorms(true);
|
||||
}
|
||||
if (field1.getOmitTermFreqAndPositions()) {
|
||||
field2.setOmitTermFreqAndPositions(true);
|
||||
}
|
||||
field2.setIndexOptions(field1.getIndexOptions());
|
||||
doc2.add(field2);
|
||||
}
|
||||
|
||||
|
|
|
@ -49,7 +49,7 @@ import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
|
|||
* <pre>
|
||||
* Field field = new Field(name, new NumericTokenStream(precisionStep).setIntValue(value));
|
||||
* field.setOmitNorms(true);
|
||||
* field.setOmitTermFreqAndPositions(true);
|
||||
* field.setIndexOptions(IndexOptions.DOCS_ONLY);
|
||||
* document.add(field);
|
||||
* </pre>
|
||||
*
|
||||
|
@ -60,7 +60,7 @@ import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
|
|||
* NumericTokenStream stream = new NumericTokenStream(precisionStep);
|
||||
* Field field = new Field(name, stream);
|
||||
* field.setOmitNorms(true);
|
||||
* field.setOmitTermFreqAndPositions(true);
|
||||
* field.setIndexOptions(IndexOptions.DOCS_ONLY);
|
||||
* Document document = new Document();
|
||||
* document.add(field);
|
||||
*
|
||||
|
|
|
@ -18,6 +18,7 @@ package org.apache.lucene.document;
|
|||
import org.apache.lucene.search.PhraseQuery; // for javadocs
|
||||
import org.apache.lucene.search.spans.SpanQuery; // for javadocs
|
||||
import org.apache.lucene.analysis.TokenStream;
|
||||
import org.apache.lucene.index.FieldInfo.IndexOptions;
|
||||
import org.apache.lucene.index.FieldInvertState; // for javadocs
|
||||
import org.apache.lucene.index.values.PerDocFieldValues;
|
||||
import org.apache.lucene.index.values.ValueType;
|
||||
|
@ -39,7 +40,7 @@ public abstract class AbstractField implements Fieldable {
|
|||
protected boolean isTokenized = true;
|
||||
protected boolean isBinary = false;
|
||||
protected boolean lazy = false;
|
||||
protected boolean omitTermFreqAndPositions = false;
|
||||
protected IndexOptions indexOptions = IndexOptions.DOCS_AND_FREQS_AND_POSITIONS;
|
||||
protected float boost = 1.0f;
|
||||
// the data object for all different kind of field values
|
||||
protected Object fieldsData = null;
|
||||
|
@ -50,7 +51,6 @@ public abstract class AbstractField implements Fieldable {
|
|||
protected int binaryOffset;
|
||||
protected PerDocFieldValues docValues;
|
||||
|
||||
|
||||
protected AbstractField()
|
||||
{
|
||||
}
|
||||
|
@ -208,8 +208,8 @@ public abstract class AbstractField implements Fieldable {
|
|||
/** True if norms are omitted for this indexed field */
|
||||
public boolean getOmitNorms() { return omitNorms; }
|
||||
|
||||
/** @see #setOmitTermFreqAndPositions */
|
||||
public boolean getOmitTermFreqAndPositions() { return omitTermFreqAndPositions; }
|
||||
/** @see #setIndexOptions */
|
||||
public IndexOptions getIndexOptions() { return indexOptions; }
|
||||
|
||||
/** Expert:
|
||||
*
|
||||
|
@ -220,7 +220,7 @@ public abstract class AbstractField implements Fieldable {
|
|||
|
||||
/** Expert:
|
||||
*
|
||||
* If set, omit term freq, positions and payloads from
|
||||
* If set, omit term freq, and optionally also positions and payloads from
|
||||
* postings for this field.
|
||||
*
|
||||
* <p><b>NOTE</b>: While this option reduces storage space
|
||||
|
@ -229,7 +229,7 @@ public abstract class AbstractField implements Fieldable {
|
|||
* PhraseQuery} or {@link SpanQuery} subclasses will
|
||||
* silently fail to find results.
|
||||
*/
|
||||
public void setOmitTermFreqAndPositions(boolean omitTermFreqAndPositions) { this.omitTermFreqAndPositions=omitTermFreqAndPositions; }
|
||||
public void setIndexOptions(IndexOptions indexOptions) { this.indexOptions=indexOptions; }
|
||||
|
||||
public boolean isLazy() {
|
||||
return lazy;
|
||||
|
@ -275,8 +275,9 @@ public abstract class AbstractField implements Fieldable {
|
|||
if (omitNorms) {
|
||||
result.append(",omitNorms");
|
||||
}
|
||||
if (omitTermFreqAndPositions) {
|
||||
result.append(",omitTermFreqAndPositions");
|
||||
if (indexOptions != IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) {
|
||||
result.append(",indexOptions=");
|
||||
result.append(indexOptions);
|
||||
}
|
||||
if (lazy){
|
||||
result.append(",lazy");
|
||||
|
|
|
@ -20,6 +20,7 @@ package org.apache.lucene.document;
|
|||
import java.io.Reader;
|
||||
|
||||
import org.apache.lucene.analysis.TokenStream;
|
||||
import org.apache.lucene.index.FieldInfo.IndexOptions;
|
||||
import org.apache.lucene.index.IndexWriter;
|
||||
|
||||
/**
|
||||
|
@ -389,7 +390,8 @@ public final class Field extends AbstractField implements Fieldable {
|
|||
this.isTokenized = index.isAnalyzed();
|
||||
this.omitNorms = index.omitNorms();
|
||||
if (index == Index.NO) {
|
||||
this.omitTermFreqAndPositions = false;
|
||||
// note: now this reads even wierder than before
|
||||
this.indexOptions = IndexOptions.DOCS_AND_FREQS_AND_POSITIONS;
|
||||
}
|
||||
|
||||
this.isBinary = false;
|
||||
|
@ -520,7 +522,7 @@ public final class Field extends AbstractField implements Fieldable {
|
|||
isStored = true;
|
||||
isIndexed = false;
|
||||
isTokenized = false;
|
||||
omitTermFreqAndPositions = false;
|
||||
indexOptions = IndexOptions.DOCS_AND_FREQS_AND_POSITIONS;
|
||||
omitNorms = true;
|
||||
|
||||
isBinary = true;
|
||||
|
|
|
@ -17,6 +17,7 @@ package org.apache.lucene.document;
|
|||
*/
|
||||
|
||||
import org.apache.lucene.analysis.TokenStream;
|
||||
import org.apache.lucene.index.FieldInfo.IndexOptions;
|
||||
import org.apache.lucene.index.FieldInvertState; // for javadocs
|
||||
import org.apache.lucene.index.values.IndexDocValues;
|
||||
import org.apache.lucene.index.values.PerDocFieldValues;
|
||||
|
@ -194,12 +195,12 @@ public interface Fieldable {
|
|||
*/
|
||||
abstract byte[] getBinaryValue(byte[] result);
|
||||
|
||||
/** @see #setOmitTermFreqAndPositions */
|
||||
boolean getOmitTermFreqAndPositions();
|
||||
/** @see #setIndexOptions */
|
||||
IndexOptions getIndexOptions();
|
||||
|
||||
/** Expert:
|
||||
*
|
||||
* If set, omit term freq, positions and payloads from
|
||||
* If set, omit term freq, and optionally positions and payloads from
|
||||
* postings for this field.
|
||||
*
|
||||
* <p><b>NOTE</b>: While this option reduces storage space
|
||||
|
@ -208,7 +209,7 @@ public interface Fieldable {
|
|||
* PhraseQuery} or {@link SpanQuery} subclasses will
|
||||
* fail with an exception.
|
||||
*/
|
||||
void setOmitTermFreqAndPositions(boolean omitTermFreqAndPositions);
|
||||
void setIndexOptions(IndexOptions indexOptions);
|
||||
|
||||
/**
|
||||
* Returns the {@link PerDocFieldValues}
|
||||
|
|
|
@ -21,6 +21,7 @@ import java.io.Reader;
|
|||
|
||||
import org.apache.lucene.analysis.TokenStream;
|
||||
import org.apache.lucene.analysis.NumericTokenStream;
|
||||
import org.apache.lucene.index.FieldInfo.IndexOptions;
|
||||
import org.apache.lucene.util.NumericUtils;
|
||||
import org.apache.lucene.search.NumericRangeQuery; // javadocs
|
||||
import org.apache.lucene.search.NumericRangeFilter; // javadocs
|
||||
|
@ -192,7 +193,7 @@ public final class NumericField extends AbstractField {
|
|||
public NumericField(String name, int precisionStep, Field.Store store, boolean index) {
|
||||
super(name, store, index ? Field.Index.ANALYZED_NO_NORMS : Field.Index.NO, Field.TermVector.NO);
|
||||
this.precisionStep = precisionStep;
|
||||
setOmitTermFreqAndPositions(true);
|
||||
setIndexOptions(IndexOptions.DOCS_ONLY);
|
||||
}
|
||||
|
||||
/** Returns a {@link NumericTokenStream} for indexing the numeric value. */
|
||||
|
|
|
@ -186,8 +186,8 @@ public class CheckIndex {
|
|||
int numFields;
|
||||
|
||||
/** True if at least one of the fields in this segment
|
||||
* does not omitTermFreqAndPositions.
|
||||
* @see AbstractField#setOmitTermFreqAndPositions */
|
||||
* has position data
|
||||
* @see AbstractField#setIndexOptions(org.apache.lucene.index.FieldInfo.IndexOptions) */
|
||||
public boolean hasProx;
|
||||
|
||||
/** Map that includes certain
|
||||
|
|
|
@ -233,7 +233,7 @@ final class DocFieldProcessor extends DocConsumer {
|
|||
// easily add it
|
||||
FieldInfo fi = fieldInfos.addOrUpdate(fieldName, field.isIndexed(), field.isTermVectorStored(),
|
||||
field.isStorePositionWithTermVector(), field.isStoreOffsetWithTermVector(),
|
||||
field.getOmitNorms(), false, field.getOmitTermFreqAndPositions(), field.docValuesType());
|
||||
field.getOmitNorms(), false, field.getIndexOptions(), field.docValuesType());
|
||||
|
||||
fp = new DocFieldProcessorPerField(this, fi);
|
||||
fp.next = fieldHash[hashPos];
|
||||
|
@ -245,7 +245,7 @@ final class DocFieldProcessor extends DocConsumer {
|
|||
} else {
|
||||
fieldInfos.addOrUpdate(fp.fieldInfo.name, field.isIndexed(), field.isTermVectorStored(),
|
||||
field.isStorePositionWithTermVector(), field.isStoreOffsetWithTermVector(),
|
||||
field.getOmitNorms(), false, field.getOmitTermFreqAndPositions(), field.docValuesType());
|
||||
field.getOmitNorms(), false, field.getIndexOptions(), field.docValuesType());
|
||||
}
|
||||
|
||||
if (thisFieldGen != fp.lastGen) {
|
||||
|
|
|
@ -35,14 +35,27 @@ public final class FieldInfo {
|
|||
boolean storePositionWithTermVector;
|
||||
|
||||
public boolean omitNorms; // omit norms associated with indexed fields
|
||||
public boolean omitTermFreqAndPositions;
|
||||
public IndexOptions indexOptions;
|
||||
|
||||
public boolean storePayloads; // whether this field stores payloads together with term positions
|
||||
private int codecId = UNASSIGNED_CODEC_ID; // set inside SegmentCodecs#build() during segment flush - this is used to identify the codec used to write this field
|
||||
|
||||
/**
|
||||
* Controls how much information is stored in the postings lists.
|
||||
* @lucene.experimental
|
||||
*/
|
||||
public static enum IndexOptions {
|
||||
/** only documents are indexed: term frequencies and positions are omitted */
|
||||
DOCS_ONLY,
|
||||
/** only documents and term frequencies are indexed: positions are omitted */
|
||||
DOCS_AND_FREQS,
|
||||
/** full postings: documents, frequencies, and positions */
|
||||
DOCS_AND_FREQS_AND_POSITIONS
|
||||
};
|
||||
|
||||
FieldInfo(String na, boolean tk, int nu, boolean storeTermVector,
|
||||
boolean storePositionWithTermVector, boolean storeOffsetWithTermVector,
|
||||
boolean omitNorms, boolean storePayloads, boolean omitTermFreqAndPositions, ValueType docValues) {
|
||||
boolean omitNorms, boolean storePayloads, IndexOptions indexOptions, ValueType docValues) {
|
||||
name = na;
|
||||
isIndexed = tk;
|
||||
number = nu;
|
||||
|
@ -53,16 +66,16 @@ public final class FieldInfo {
|
|||
this.storePositionWithTermVector = storePositionWithTermVector;
|
||||
this.storePayloads = storePayloads;
|
||||
this.omitNorms = omitNorms;
|
||||
this.omitTermFreqAndPositions = omitTermFreqAndPositions;
|
||||
this.indexOptions = indexOptions;
|
||||
} else { // for non-indexed fields, leave defaults
|
||||
this.storeTermVector = false;
|
||||
this.storeOffsetWithTermVector = false;
|
||||
this.storePositionWithTermVector = false;
|
||||
this.storePayloads = false;
|
||||
this.omitNorms = false;
|
||||
this.omitTermFreqAndPositions = false;
|
||||
this.indexOptions = IndexOptions.DOCS_AND_FREQS_AND_POSITIONS;
|
||||
}
|
||||
assert !omitTermFreqAndPositions || !storePayloads;
|
||||
assert indexOptions == IndexOptions.DOCS_AND_FREQS_AND_POSITIONS || !storePayloads;
|
||||
}
|
||||
|
||||
void setCodecId(int codecId) {
|
||||
|
@ -77,14 +90,14 @@ public final class FieldInfo {
|
|||
@Override
|
||||
public Object clone() {
|
||||
FieldInfo clone = new FieldInfo(name, isIndexed, number, storeTermVector, storePositionWithTermVector,
|
||||
storeOffsetWithTermVector, omitNorms, storePayloads, omitTermFreqAndPositions, docValues);
|
||||
storeOffsetWithTermVector, omitNorms, storePayloads, indexOptions, docValues);
|
||||
clone.codecId = this.codecId;
|
||||
return clone;
|
||||
}
|
||||
|
||||
// should only be called by FieldInfos#addOrUpdate
|
||||
void update(boolean isIndexed, boolean storeTermVector, boolean storePositionWithTermVector,
|
||||
boolean storeOffsetWithTermVector, boolean omitNorms, boolean storePayloads, boolean omitTermFreqAndPositions) {
|
||||
boolean storeOffsetWithTermVector, boolean omitNorms, boolean storePayloads, IndexOptions indexOptions) {
|
||||
|
||||
if (this.isIndexed != isIndexed) {
|
||||
this.isIndexed = true; // once indexed, always index
|
||||
|
@ -105,12 +118,13 @@ public final class FieldInfo {
|
|||
if (this.omitNorms != omitNorms) {
|
||||
this.omitNorms = true; // if one require omitNorms at least once, it remains off for life
|
||||
}
|
||||
if (this.omitTermFreqAndPositions != omitTermFreqAndPositions) {
|
||||
this.omitTermFreqAndPositions = true; // if one require omitTermFreqAndPositions at least once, it remains off for life
|
||||
if (this.indexOptions != indexOptions) {
|
||||
// downgrade
|
||||
this.indexOptions = this.indexOptions.compareTo(indexOptions) < 0 ? this.indexOptions : indexOptions;
|
||||
this.storePayloads = false;
|
||||
}
|
||||
}
|
||||
assert !this.omitTermFreqAndPositions || !this.storePayloads;
|
||||
assert this.indexOptions == IndexOptions.DOCS_AND_FREQS_AND_POSITIONS || !this.storePayloads;
|
||||
}
|
||||
void setDocValues(ValueType v) {
|
||||
if (docValues == null) {
|
||||
|
|
|
@ -28,6 +28,7 @@ import java.util.SortedMap;
|
|||
import java.util.TreeMap;
|
||||
import java.util.Map.Entry;
|
||||
|
||||
import org.apache.lucene.index.FieldInfo.IndexOptions;
|
||||
import org.apache.lucene.index.SegmentCodecs; // Required for Java 1.5 javadocs
|
||||
import org.apache.lucene.index.SegmentCodecs.SegmentCodecsBuilder;
|
||||
import org.apache.lucene.index.codecs.CodecProvider;
|
||||
|
@ -201,13 +202,13 @@ public final class FieldInfos implements Iterable<FieldInfo> {
|
|||
|
||||
// First used in 2.9; prior to 2.9 there was no format header
|
||||
public static final int FORMAT_START = -2;
|
||||
public static final int FORMAT_PER_FIELD_CODEC = -3;
|
||||
|
||||
// Records index values for this field
|
||||
public static final int FORMAT_INDEX_VALUES = -3;
|
||||
// First used in 3.4: omit only positional information
|
||||
public static final int FORMAT_OMIT_POSITIONS = -3;
|
||||
// per-field codec support, records index values for fields
|
||||
public static final int FORMAT_FLEX = -4;
|
||||
|
||||
// whenever you add a new format, make it 1 smaller (negative version logic)!
|
||||
static final int FORMAT_CURRENT = FORMAT_PER_FIELD_CODEC;
|
||||
static final int FORMAT_CURRENT = FORMAT_FLEX;
|
||||
|
||||
static final int FORMAT_MINIMUM = FORMAT_START;
|
||||
|
||||
|
@ -218,8 +219,10 @@ public final class FieldInfos implements Iterable<FieldInfo> {
|
|||
static final byte OMIT_NORMS = 0x10;
|
||||
static final byte STORE_PAYLOADS = 0x20;
|
||||
static final byte OMIT_TERM_FREQ_AND_POSITIONS = 0x40;
|
||||
static final byte OMIT_POSITIONS = -128;
|
||||
|
||||
private int format;
|
||||
private boolean hasFreq; // only set if readonly
|
||||
private boolean hasProx; // only set if readonly
|
||||
private boolean hasVectors; // only set if readonly
|
||||
private long version; // internal use to track changes
|
||||
|
@ -308,6 +311,7 @@ public final class FieldInfos implements Iterable<FieldInfo> {
|
|||
synchronized public Object clone() {
|
||||
FieldInfos fis = new FieldInfos(globalFieldNumbers, segmentCodecsBuilder);
|
||||
fis.format = format;
|
||||
fis.hasFreq = hasFreq;
|
||||
fis.hasProx = hasProx;
|
||||
fis.hasVectors = hasVectors;
|
||||
for (FieldInfo fi : this) {
|
||||
|
@ -317,14 +321,28 @@ public final class FieldInfos implements Iterable<FieldInfo> {
|
|||
return fis;
|
||||
}
|
||||
|
||||
/** Returns true if any fields do not omitTermFreqAndPositions */
|
||||
/** Returns true if any fields do not positions */
|
||||
public boolean hasProx() {
|
||||
if (isReadOnly()) {
|
||||
return hasProx;
|
||||
}
|
||||
// mutable FIs must check!
|
||||
for (FieldInfo fi : this) {
|
||||
if (fi.isIndexed && !fi.omitTermFreqAndPositions) {
|
||||
if (fi.isIndexed && fi.indexOptions == IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
/** Returns true if any fields have freqs */
|
||||
public boolean hasFreq() {
|
||||
if (isReadOnly()) {
|
||||
return hasFreq;
|
||||
}
|
||||
// mutable FIs must check!
|
||||
for (FieldInfo fi : this) {
|
||||
if (fi.isIndexed && fi.indexOptions != IndexOptions.DOCS_ONLY) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
@ -414,7 +432,7 @@ public final class FieldInfos implements Iterable<FieldInfo> {
|
|||
synchronized public void addOrUpdate(String name, boolean isIndexed, boolean storeTermVector,
|
||||
boolean storePositionWithTermVector, boolean storeOffsetWithTermVector, boolean omitNorms) {
|
||||
addOrUpdate(name, isIndexed, storeTermVector, storePositionWithTermVector,
|
||||
storeOffsetWithTermVector, omitNorms, false, false, null);
|
||||
storeOffsetWithTermVector, omitNorms, false, IndexOptions.DOCS_AND_FREQS_AND_POSITIONS, null);
|
||||
}
|
||||
|
||||
/** If the field is not yet known, adds it. If it is known, checks to make
|
||||
|
@ -429,18 +447,18 @@ public final class FieldInfos implements Iterable<FieldInfo> {
|
|||
* @param storeOffsetWithTermVector true if the term vector with offsets should be stored
|
||||
* @param omitNorms true if the norms for the indexed field should be omitted
|
||||
* @param storePayloads true if payloads should be stored for this field
|
||||
* @param omitTermFreqAndPositions true if term freqs should be omitted for this field
|
||||
* @param indexOptions if term freqs should be omitted for this field
|
||||
*/
|
||||
synchronized public FieldInfo addOrUpdate(String name, boolean isIndexed, boolean storeTermVector,
|
||||
boolean storePositionWithTermVector, boolean storeOffsetWithTermVector,
|
||||
boolean omitNorms, boolean storePayloads, boolean omitTermFreqAndPositions, ValueType docValues) {
|
||||
boolean omitNorms, boolean storePayloads, IndexOptions indexOptions, ValueType docValues) {
|
||||
return addOrUpdateInternal(name, -1, isIndexed, storeTermVector, storePositionWithTermVector,
|
||||
storeOffsetWithTermVector, omitNorms, storePayloads, omitTermFreqAndPositions, docValues);
|
||||
storeOffsetWithTermVector, omitNorms, storePayloads, indexOptions, docValues);
|
||||
}
|
||||
|
||||
synchronized private FieldInfo addOrUpdateInternal(String name, int preferredFieldNumber, boolean isIndexed,
|
||||
boolean storeTermVector, boolean storePositionWithTermVector, boolean storeOffsetWithTermVector,
|
||||
boolean omitNorms, boolean storePayloads, boolean omitTermFreqAndPositions, ValueType docValues) {
|
||||
boolean omitNorms, boolean storePayloads, IndexOptions indexOptions, ValueType docValues) {
|
||||
if (globalFieldNumbers == null) {
|
||||
throw new IllegalStateException("FieldInfos are read-only, create a new instance with a global field map to make modifications to FieldInfos");
|
||||
}
|
||||
|
@ -448,9 +466,9 @@ public final class FieldInfos implements Iterable<FieldInfo> {
|
|||
FieldInfo fi = fieldInfo(name);
|
||||
if (fi == null) {
|
||||
final int fieldNumber = nextFieldNumber(name, preferredFieldNumber);
|
||||
fi = addInternal(name, fieldNumber, isIndexed, storeTermVector, storePositionWithTermVector, storeOffsetWithTermVector, omitNorms, storePayloads, omitTermFreqAndPositions, docValues);
|
||||
fi = addInternal(name, fieldNumber, isIndexed, storeTermVector, storePositionWithTermVector, storeOffsetWithTermVector, omitNorms, storePayloads, indexOptions, docValues);
|
||||
} else {
|
||||
fi.update(isIndexed, storeTermVector, storePositionWithTermVector, storeOffsetWithTermVector, omitNorms, storePayloads, omitTermFreqAndPositions);
|
||||
fi.update(isIndexed, storeTermVector, storePositionWithTermVector, storeOffsetWithTermVector, omitNorms, storePayloads, indexOptions);
|
||||
fi.setDocValues(docValues);
|
||||
}
|
||||
if ((fi.isIndexed || fi.hasDocValues()) && fi.getCodecId() == FieldInfo.UNASSIGNED_CODEC_ID) {
|
||||
|
@ -465,7 +483,7 @@ public final class FieldInfos implements Iterable<FieldInfo> {
|
|||
return addOrUpdateInternal(fi.name, fi.number, fi.isIndexed, fi.storeTermVector,
|
||||
fi.storePositionWithTermVector, fi.storeOffsetWithTermVector,
|
||||
fi.omitNorms, fi.storePayloads,
|
||||
fi.omitTermFreqAndPositions, fi.docValues);
|
||||
fi.indexOptions, fi.docValues);
|
||||
}
|
||||
|
||||
/*
|
||||
|
@ -473,13 +491,13 @@ public final class FieldInfos implements Iterable<FieldInfo> {
|
|||
*/
|
||||
private FieldInfo addInternal(String name, int fieldNumber, boolean isIndexed,
|
||||
boolean storeTermVector, boolean storePositionWithTermVector,
|
||||
boolean storeOffsetWithTermVector, boolean omitNorms, boolean storePayloads, boolean omitTermFreqAndPositions, ValueType docValuesType) {
|
||||
boolean storeOffsetWithTermVector, boolean omitNorms, boolean storePayloads, IndexOptions indexOptions, ValueType docValuesType) {
|
||||
// don't check modifiable here since we use that to initially build up FIs
|
||||
if (globalFieldNumbers != null) {
|
||||
globalFieldNumbers.setIfNotSet(fieldNumber, name);
|
||||
}
|
||||
final FieldInfo fi = new FieldInfo(name, isIndexed, fieldNumber, storeTermVector, storePositionWithTermVector,
|
||||
storeOffsetWithTermVector, omitNorms, storePayloads, omitTermFreqAndPositions, docValuesType);
|
||||
storeOffsetWithTermVector, omitNorms, storePayloads, indexOptions, docValuesType);
|
||||
putInternal(fi);
|
||||
return fi;
|
||||
}
|
||||
|
@ -590,7 +608,7 @@ public final class FieldInfos implements Iterable<FieldInfo> {
|
|||
output.writeVInt(FORMAT_CURRENT);
|
||||
output.writeVInt(size());
|
||||
for (FieldInfo fi : this) {
|
||||
assert !fi.omitTermFreqAndPositions || !fi.storePayloads;
|
||||
assert fi.indexOptions == IndexOptions.DOCS_AND_FREQS_AND_POSITIONS || !fi.storePayloads;
|
||||
byte bits = 0x0;
|
||||
if (fi.isIndexed) bits |= IS_INDEXED;
|
||||
if (fi.storeTermVector) bits |= STORE_TERMVECTOR;
|
||||
|
@ -598,7 +616,10 @@ public final class FieldInfos implements Iterable<FieldInfo> {
|
|||
if (fi.storeOffsetWithTermVector) bits |= STORE_OFFSET_WITH_TERMVECTOR;
|
||||
if (fi.omitNorms) bits |= OMIT_NORMS;
|
||||
if (fi.storePayloads) bits |= STORE_PAYLOADS;
|
||||
if (fi.omitTermFreqAndPositions) bits |= OMIT_TERM_FREQ_AND_POSITIONS;
|
||||
if (fi.indexOptions == IndexOptions.DOCS_ONLY)
|
||||
bits |= OMIT_TERM_FREQ_AND_POSITIONS;
|
||||
else if (fi.indexOptions == IndexOptions.DOCS_AND_FREQS)
|
||||
bits |= OMIT_POSITIONS;
|
||||
output.writeString(fi.name);
|
||||
output.writeInt(fi.number);
|
||||
output.writeInt(fi.getCodecId());
|
||||
|
@ -673,8 +694,8 @@ public final class FieldInfos implements Iterable<FieldInfo> {
|
|||
for (int i = 0; i < size; i++) {
|
||||
String name = input.readString();
|
||||
// if this is a previous format codec 0 will be preflex!
|
||||
final int fieldNumber = format <= FORMAT_PER_FIELD_CODEC? input.readInt():i;
|
||||
final int codecId = format <= FORMAT_PER_FIELD_CODEC? input.readInt():0;
|
||||
final int fieldNumber = format <= FORMAT_FLEX? input.readInt():i;
|
||||
final int codecId = format <= FORMAT_FLEX? input.readInt():0;
|
||||
byte bits = input.readByte();
|
||||
boolean isIndexed = (bits & IS_INDEXED) != 0;
|
||||
boolean storeTermVector = (bits & STORE_TERMVECTOR) != 0;
|
||||
|
@ -682,18 +703,30 @@ public final class FieldInfos implements Iterable<FieldInfo> {
|
|||
boolean storeOffsetWithTermVector = (bits & STORE_OFFSET_WITH_TERMVECTOR) != 0;
|
||||
boolean omitNorms = (bits & OMIT_NORMS) != 0;
|
||||
boolean storePayloads = (bits & STORE_PAYLOADS) != 0;
|
||||
boolean omitTermFreqAndPositions = (bits & OMIT_TERM_FREQ_AND_POSITIONS) != 0;
|
||||
final IndexOptions indexOptions;
|
||||
if ((bits & OMIT_TERM_FREQ_AND_POSITIONS) != 0) {
|
||||
indexOptions = IndexOptions.DOCS_ONLY;
|
||||
} else if ((bits & OMIT_POSITIONS) != 0) {
|
||||
if (format <= FORMAT_OMIT_POSITIONS) {
|
||||
indexOptions = IndexOptions.DOCS_AND_FREQS;
|
||||
} else {
|
||||
throw new CorruptIndexException("Corrupt fieldinfos, OMIT_POSITIONS set but format=" + format);
|
||||
}
|
||||
} else {
|
||||
indexOptions = IndexOptions.DOCS_AND_FREQS_AND_POSITIONS;
|
||||
}
|
||||
|
||||
// LUCENE-3027: past indices were able to write
|
||||
// storePayloads=true when omitTFAP is also true,
|
||||
// which is invalid. We correct that, here:
|
||||
if (omitTermFreqAndPositions) {
|
||||
if (indexOptions != IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) {
|
||||
storePayloads = false;
|
||||
}
|
||||
hasVectors |= storeTermVector;
|
||||
hasProx |= isIndexed && !omitTermFreqAndPositions;
|
||||
hasProx |= isIndexed && indexOptions == IndexOptions.DOCS_AND_FREQS_AND_POSITIONS;
|
||||
hasFreq |= isIndexed && indexOptions != IndexOptions.DOCS_ONLY;
|
||||
ValueType docValuesType = null;
|
||||
if (format <= FORMAT_INDEX_VALUES) {
|
||||
if (format <= FORMAT_FLEX) {
|
||||
final byte b = input.readByte();
|
||||
switch(b) {
|
||||
case 0:
|
||||
|
@ -743,7 +776,7 @@ public final class FieldInfos implements Iterable<FieldInfo> {
|
|||
throw new IllegalStateException("unhandled indexValues type " + b);
|
||||
}
|
||||
}
|
||||
final FieldInfo addInternal = addInternal(name, fieldNumber, isIndexed, storeTermVector, storePositionsWithTermVector, storeOffsetWithTermVector, omitNorms, storePayloads, omitTermFreqAndPositions, docValuesType);
|
||||
final FieldInfo addInternal = addInternal(name, fieldNumber, isIndexed, storeTermVector, storePositionsWithTermVector, storeOffsetWithTermVector, omitNorms, storePayloads, indexOptions, docValuesType);
|
||||
addInternal.setCodecId(codecId);
|
||||
}
|
||||
|
||||
|
@ -771,7 +804,8 @@ public final class FieldInfos implements Iterable<FieldInfo> {
|
|||
FieldInfo clone = (FieldInfo) (fieldInfo).clone();
|
||||
roFis.putInternal(clone);
|
||||
roFis.hasVectors |= clone.storeTermVector;
|
||||
roFis.hasProx |= clone.isIndexed && !clone.omitTermFreqAndPositions;
|
||||
roFis.hasProx |= clone.isIndexed && clone.indexOptions == IndexOptions.DOCS_AND_FREQS_AND_POSITIONS;
|
||||
roFis.hasFreq |= clone.isIndexed && clone.indexOptions != IndexOptions.DOCS_ONLY;
|
||||
}
|
||||
return roFis;
|
||||
}
|
||||
|
|
|
@ -340,7 +340,7 @@ public final class FieldsReader implements Cloneable, Closeable {
|
|||
}
|
||||
|
||||
f.setOmitNorms(fi.omitNorms);
|
||||
f.setOmitTermFreqAndPositions(fi.omitTermFreqAndPositions);
|
||||
f.setIndexOptions(fi.indexOptions);
|
||||
doc.add(f);
|
||||
}
|
||||
|
||||
|
@ -364,7 +364,7 @@ public final class FieldsReader implements Cloneable, Closeable {
|
|||
termVector);
|
||||
}
|
||||
|
||||
f.setOmitTermFreqAndPositions(fi.omitTermFreqAndPositions);
|
||||
f.setIndexOptions(fi.indexOptions);
|
||||
f.setOmitNorms(fi.omitNorms);
|
||||
doc.add(f);
|
||||
}
|
||||
|
|
|
@ -22,6 +22,7 @@ import java.util.ArrayList;
|
|||
import java.util.List;
|
||||
import java.util.Map;
|
||||
|
||||
import org.apache.lucene.index.FieldInfo.IndexOptions;
|
||||
import org.apache.lucene.index.codecs.FieldsConsumer;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.apache.lucene.util.CollectionUtil;
|
||||
|
@ -79,7 +80,7 @@ final class FreqProxTermsWriter extends TermsHashConsumer {
|
|||
|
||||
// Aggregate the storePayload as seen by the same
|
||||
// field across multiple threads
|
||||
if (!fieldInfo.omitTermFreqAndPositions) {
|
||||
if (fieldInfo.indexOptions == IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) {
|
||||
fieldInfo.storePayloads |= fieldWriter.hasPayloads;
|
||||
}
|
||||
|
||||
|
|
|
@ -23,6 +23,7 @@ import java.util.Map;
|
|||
|
||||
import org.apache.lucene.analysis.tokenattributes.PayloadAttribute;
|
||||
import org.apache.lucene.document.Fieldable;
|
||||
import org.apache.lucene.index.FieldInfo.IndexOptions;
|
||||
import org.apache.lucene.index.codecs.FieldsConsumer;
|
||||
import org.apache.lucene.index.codecs.PostingsConsumer;
|
||||
import org.apache.lucene.index.codecs.TermStats;
|
||||
|
@ -41,7 +42,7 @@ final class FreqProxTermsWriterPerField extends TermsHashConsumerPerField implem
|
|||
final FieldInfo fieldInfo;
|
||||
final DocumentsWriterPerThread.DocState docState;
|
||||
final FieldInvertState fieldState;
|
||||
boolean omitTermFreqAndPositions;
|
||||
IndexOptions indexOptions;
|
||||
PayloadAttribute payloadAttribute;
|
||||
|
||||
public FreqProxTermsWriterPerField(TermsHashPerField termsHashPerField, FreqProxTermsWriter parent, FieldInfo fieldInfo) {
|
||||
|
@ -50,12 +51,12 @@ final class FreqProxTermsWriterPerField extends TermsHashConsumerPerField implem
|
|||
this.fieldInfo = fieldInfo;
|
||||
docState = termsHashPerField.docState;
|
||||
fieldState = termsHashPerField.fieldState;
|
||||
omitTermFreqAndPositions = fieldInfo.omitTermFreqAndPositions;
|
||||
indexOptions = fieldInfo.indexOptions;
|
||||
}
|
||||
|
||||
@Override
|
||||
int getStreamCount() {
|
||||
if (fieldInfo.omitTermFreqAndPositions)
|
||||
if (fieldInfo.indexOptions != IndexOptions.DOCS_AND_FREQS_AND_POSITIONS)
|
||||
return 1;
|
||||
else
|
||||
return 2;
|
||||
|
@ -76,7 +77,7 @@ final class FreqProxTermsWriterPerField extends TermsHashConsumerPerField implem
|
|||
void reset() {
|
||||
// Record, up front, whether our in-RAM format will be
|
||||
// with or without term freqs:
|
||||
omitTermFreqAndPositions = fieldInfo.omitTermFreqAndPositions;
|
||||
indexOptions = fieldInfo.indexOptions;
|
||||
payloadAttribute = null;
|
||||
}
|
||||
|
||||
|
@ -126,13 +127,15 @@ final class FreqProxTermsWriterPerField extends TermsHashConsumerPerField implem
|
|||
|
||||
FreqProxPostingsArray postings = (FreqProxPostingsArray) termsHashPerField.postingsArray;
|
||||
postings.lastDocIDs[termID] = docState.docID;
|
||||
if (omitTermFreqAndPositions) {
|
||||
if (indexOptions == IndexOptions.DOCS_ONLY) {
|
||||
postings.lastDocCodes[termID] = docState.docID;
|
||||
} else {
|
||||
postings.lastDocCodes[termID] = docState.docID << 1;
|
||||
postings.docFreqs[termID] = 1;
|
||||
if (indexOptions == IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) {
|
||||
writeProx(termID, fieldState.position);
|
||||
}
|
||||
}
|
||||
fieldState.maxTermFrequency = Math.max(1, fieldState.maxTermFrequency);
|
||||
fieldState.uniqueTermCount++;
|
||||
}
|
||||
|
@ -144,9 +147,9 @@ final class FreqProxTermsWriterPerField extends TermsHashConsumerPerField implem
|
|||
|
||||
FreqProxPostingsArray postings = (FreqProxPostingsArray) termsHashPerField.postingsArray;
|
||||
|
||||
assert omitTermFreqAndPositions || postings.docFreqs[termID] > 0;
|
||||
assert indexOptions == IndexOptions.DOCS_ONLY || postings.docFreqs[termID] > 0;
|
||||
|
||||
if (omitTermFreqAndPositions) {
|
||||
if (indexOptions == IndexOptions.DOCS_ONLY) {
|
||||
if (docState.docID != postings.lastDocIDs[termID]) {
|
||||
assert docState.docID > postings.lastDocIDs[termID];
|
||||
termsHashPerField.writeVInt(0, postings.lastDocCodes[termID]);
|
||||
|
@ -172,14 +175,18 @@ final class FreqProxTermsWriterPerField extends TermsHashConsumerPerField implem
|
|||
fieldState.maxTermFrequency = Math.max(1, fieldState.maxTermFrequency);
|
||||
postings.lastDocCodes[termID] = (docState.docID - postings.lastDocIDs[termID]) << 1;
|
||||
postings.lastDocIDs[termID] = docState.docID;
|
||||
if (indexOptions == IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) {
|
||||
writeProx(termID, fieldState.position);
|
||||
}
|
||||
fieldState.uniqueTermCount++;
|
||||
} else {
|
||||
fieldState.maxTermFrequency = Math.max(fieldState.maxTermFrequency, ++postings.docFreqs[termID]);
|
||||
if (indexOptions == IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) {
|
||||
writeProx(termID, fieldState.position-postings.lastPositions[termID]);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
ParallelPostingsArray createPostingsArray(int size) {
|
||||
|
@ -237,7 +244,7 @@ final class FreqProxTermsWriterPerField extends TermsHashConsumerPerField implem
|
|||
final TermsConsumer termsConsumer = consumer.addField(fieldInfo);
|
||||
final Comparator<BytesRef> termComp = termsConsumer.getComparator();
|
||||
|
||||
final boolean currentFieldOmitTermFreqAndPositions = fieldInfo.omitTermFreqAndPositions;
|
||||
final IndexOptions currentFieldIndexOptions = fieldInfo.indexOptions;
|
||||
|
||||
final Map<Term,Integer> segDeletes;
|
||||
if (state.segDeletes != null && state.segDeletes.terms.size() > 0) {
|
||||
|
@ -263,7 +270,7 @@ final class FreqProxTermsWriterPerField extends TermsHashConsumerPerField implem
|
|||
termsHashPerField.bytePool.setBytesRef(text, textStart);
|
||||
|
||||
termsHashPerField.initReader(freq, termID, 0);
|
||||
if (!fieldInfo.omitTermFreqAndPositions) {
|
||||
if (fieldInfo.indexOptions == IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) {
|
||||
termsHashPerField.initReader(prox, termID, 1);
|
||||
}
|
||||
|
||||
|
@ -300,7 +307,7 @@ final class FreqProxTermsWriterPerField extends TermsHashConsumerPerField implem
|
|||
if (postings.lastDocCodes[termID] != -1) {
|
||||
// Return last doc
|
||||
docID = postings.lastDocIDs[termID];
|
||||
if (!omitTermFreqAndPositions) {
|
||||
if (indexOptions != IndexOptions.DOCS_ONLY) {
|
||||
termFreq = postings.docFreqs[termID];
|
||||
}
|
||||
postings.lastDocCodes[termID] = -1;
|
||||
|
@ -310,7 +317,7 @@ final class FreqProxTermsWriterPerField extends TermsHashConsumerPerField implem
|
|||
}
|
||||
} else {
|
||||
final int code = freq.readVInt();
|
||||
if (omitTermFreqAndPositions) {
|
||||
if (indexOptions == IndexOptions.DOCS_ONLY) {
|
||||
docID += code;
|
||||
} else {
|
||||
docID += code >>> 1;
|
||||
|
@ -351,14 +358,17 @@ final class FreqProxTermsWriterPerField extends TermsHashConsumerPerField implem
|
|||
state.liveDocs.clear(docID);
|
||||
}
|
||||
|
||||
if (currentFieldIndexOptions != IndexOptions.DOCS_ONLY) {
|
||||
totTF += termDocFreq;
|
||||
}
|
||||
|
||||
// Carefully copy over the prox + payload info,
|
||||
// changing the format to match Lucene's segment
|
||||
// format.
|
||||
if (!currentFieldOmitTermFreqAndPositions) {
|
||||
// omitTermFreqAndPositions == false so we do write positions &
|
||||
// payload
|
||||
|
||||
if (currentFieldIndexOptions == IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) {
|
||||
// we do write positions & payload
|
||||
int position = 0;
|
||||
totTF += termDocFreq;
|
||||
for(int j=0;j<termDocFreq;j++) {
|
||||
final int code = prox.readVInt();
|
||||
position += code >> 1;
|
||||
|
|
|
@ -153,6 +153,8 @@ public abstract class IndexReader implements Cloneable,Closeable {
|
|||
STORES_PAYLOADS,
|
||||
/** All fields that omit tf */
|
||||
OMIT_TERM_FREQ_AND_POSITIONS,
|
||||
/** All fields that omit positions */
|
||||
OMIT_POSITIONS,
|
||||
/** All fields which are not indexed */
|
||||
UNINDEXED,
|
||||
/** All fields which are indexed with termvectors enabled */
|
||||
|
|
|
@ -91,7 +91,7 @@ public final class SegmentInfo implements Cloneable {
|
|||
//TODO: remove when we don't have to support old indexes anymore that had this field
|
||||
private int hasVectors = CHECK_FIELDINFO;
|
||||
//TODO: remove when we don't have to support old indexes anymore that had this field
|
||||
private int hasProx = CHECK_FIELDINFO; // True if this segment has any fields with omitTermFreqAndPositions==false
|
||||
private int hasProx = CHECK_FIELDINFO; // True if this segment has any fields with positional information
|
||||
|
||||
|
||||
private FieldInfos fieldInfos;
|
||||
|
|
|
@ -24,6 +24,7 @@ import java.util.Collection;
|
|||
import java.util.List;
|
||||
|
||||
import org.apache.lucene.document.Document;
|
||||
import org.apache.lucene.index.FieldInfo.IndexOptions;
|
||||
import org.apache.lucene.index.IndexReader.FieldOption;
|
||||
import org.apache.lucene.index.MergePolicy.MergeAbortedException;
|
||||
import org.apache.lucene.index.codecs.Codec;
|
||||
|
@ -158,12 +159,12 @@ final class SegmentMerger {
|
|||
private static void addIndexed(IndexReader reader, FieldInfos fInfos,
|
||||
Collection<String> names, boolean storeTermVectors,
|
||||
boolean storePositionWithTermVector, boolean storeOffsetWithTermVector,
|
||||
boolean storePayloads, boolean omitTFAndPositions)
|
||||
boolean storePayloads, IndexOptions indexOptions)
|
||||
throws IOException {
|
||||
for (String field : names) {
|
||||
fInfos.addOrUpdate(field, true, storeTermVectors,
|
||||
storePositionWithTermVector, storeOffsetWithTermVector, !reader
|
||||
.hasNorms(field), storePayloads, omitTFAndPositions, null);
|
||||
.hasNorms(field), storePayloads, indexOptions, null);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -223,13 +224,14 @@ final class SegmentMerger {
|
|||
fieldInfos.add(fi);
|
||||
}
|
||||
} else {
|
||||
addIndexed(reader, fieldInfos, reader.getFieldNames(FieldOption.TERMVECTOR_WITH_POSITION_OFFSET), true, true, true, false, false);
|
||||
addIndexed(reader, fieldInfos, reader.getFieldNames(FieldOption.TERMVECTOR_WITH_POSITION), true, true, false, false, false);
|
||||
addIndexed(reader, fieldInfos, reader.getFieldNames(FieldOption.TERMVECTOR_WITH_OFFSET), true, false, true, false, false);
|
||||
addIndexed(reader, fieldInfos, reader.getFieldNames(FieldOption.TERMVECTOR), true, false, false, false, false);
|
||||
addIndexed(reader, fieldInfos, reader.getFieldNames(FieldOption.OMIT_TERM_FREQ_AND_POSITIONS), false, false, false, false, true);
|
||||
addIndexed(reader, fieldInfos, reader.getFieldNames(FieldOption.STORES_PAYLOADS), false, false, false, true, false);
|
||||
addIndexed(reader, fieldInfos, reader.getFieldNames(FieldOption.INDEXED), false, false, false, false, false);
|
||||
addIndexed(reader, fieldInfos, reader.getFieldNames(FieldOption.TERMVECTOR_WITH_POSITION_OFFSET), true, true, true, false, IndexOptions.DOCS_AND_FREQS_AND_POSITIONS);
|
||||
addIndexed(reader, fieldInfos, reader.getFieldNames(FieldOption.TERMVECTOR_WITH_POSITION), true, true, false, false, IndexOptions.DOCS_AND_FREQS_AND_POSITIONS);
|
||||
addIndexed(reader, fieldInfos, reader.getFieldNames(FieldOption.TERMVECTOR_WITH_OFFSET), true, false, true, false, IndexOptions.DOCS_AND_FREQS_AND_POSITIONS);
|
||||
addIndexed(reader, fieldInfos, reader.getFieldNames(FieldOption.TERMVECTOR), true, false, false, false, IndexOptions.DOCS_AND_FREQS_AND_POSITIONS);
|
||||
addIndexed(reader, fieldInfos, reader.getFieldNames(FieldOption.OMIT_POSITIONS), false, false, false, false, IndexOptions.DOCS_AND_FREQS);
|
||||
addIndexed(reader, fieldInfos, reader.getFieldNames(FieldOption.OMIT_TERM_FREQ_AND_POSITIONS), false, false, false, false, IndexOptions.DOCS_ONLY);
|
||||
addIndexed(reader, fieldInfos, reader.getFieldNames(FieldOption.STORES_PAYLOADS), false, false, false, true, IndexOptions.DOCS_AND_FREQS_AND_POSITIONS);
|
||||
addIndexed(reader, fieldInfos, reader.getFieldNames(FieldOption.INDEXED), false, false, false, false, IndexOptions.DOCS_AND_FREQS_AND_POSITIONS);
|
||||
fieldInfos.addOrUpdate(reader.getFieldNames(FieldOption.UNINDEXED), false);
|
||||
fieldInfos.addOrUpdate(reader.getFieldNames(FieldOption.DOC_VALUES), false);
|
||||
}
|
||||
|
|
|
@ -29,6 +29,7 @@ import java.util.concurrent.atomic.AtomicInteger;
|
|||
|
||||
import org.apache.lucene.document.Document;
|
||||
import org.apache.lucene.document.FieldSelector;
|
||||
import org.apache.lucene.index.FieldInfo.IndexOptions;
|
||||
import org.apache.lucene.index.codecs.PerDocValues;
|
||||
import org.apache.lucene.store.BufferedIndexInput;
|
||||
import org.apache.lucene.store.Directory;
|
||||
|
@ -513,7 +514,10 @@ public class SegmentReader extends IndexReader implements Cloneable {
|
|||
else if (!fi.isIndexed && fieldOption == IndexReader.FieldOption.UNINDEXED) {
|
||||
fieldSet.add(fi.name);
|
||||
}
|
||||
else if (fi.omitTermFreqAndPositions && fieldOption == IndexReader.FieldOption.OMIT_TERM_FREQ_AND_POSITIONS) {
|
||||
else if (fi.indexOptions == IndexOptions.DOCS_ONLY && fieldOption == IndexReader.FieldOption.OMIT_TERM_FREQ_AND_POSITIONS) {
|
||||
fieldSet.add(fi.name);
|
||||
}
|
||||
else if (fi.indexOptions == IndexOptions.DOCS_AND_FREQS && fieldOption == IndexReader.FieldOption.OMIT_POSITIONS) {
|
||||
fieldSet.add(fi.name);
|
||||
}
|
||||
else if (fi.storePayloads && fieldOption == IndexReader.FieldOption.STORES_PAYLOADS) {
|
||||
|
|
|
@ -27,6 +27,7 @@ import java.util.TreeMap;
|
|||
import org.apache.lucene.index.DocsAndPositionsEnum;
|
||||
import org.apache.lucene.index.DocsEnum;
|
||||
import org.apache.lucene.index.FieldInfo;
|
||||
import org.apache.lucene.index.FieldInfo.IndexOptions;
|
||||
import org.apache.lucene.index.FieldInfos;
|
||||
import org.apache.lucene.index.FieldsEnum;
|
||||
import org.apache.lucene.index.IndexFileNames;
|
||||
|
@ -136,7 +137,7 @@ public class BlockTermsReader extends FieldsProducer {
|
|||
assert numTerms >= 0;
|
||||
final long termsStartPointer = in.readVLong();
|
||||
final FieldInfo fieldInfo = fieldInfos.fieldInfo(field);
|
||||
final long sumTotalTermFreq = fieldInfo.omitTermFreqAndPositions ? -1 : in.readVLong();
|
||||
final long sumTotalTermFreq = fieldInfo.indexOptions == IndexOptions.DOCS_ONLY ? -1 : in.readVLong();
|
||||
final long sumDocFreq = in.readVLong();
|
||||
assert !fields.containsKey(fieldInfo.name);
|
||||
fields.put(fieldInfo.name, new FieldReader(fieldInfo, numTerms, termsStartPointer, sumTotalTermFreq, sumDocFreq));
|
||||
|
@ -709,7 +710,7 @@ public class BlockTermsReader extends FieldsProducer {
|
|||
public DocsAndPositionsEnum docsAndPositions(Bits liveDocs, DocsAndPositionsEnum reuse) throws IOException {
|
||||
//System.out.println("BTR.d&p this=" + this);
|
||||
decodeMetaData();
|
||||
if (fieldInfo.omitTermFreqAndPositions) {
|
||||
if (fieldInfo.indexOptions != IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) {
|
||||
return null;
|
||||
} else {
|
||||
DocsAndPositionsEnum dpe = postingsReader.docsAndPositions(fieldInfo, state, liveDocs, reuse);
|
||||
|
@ -867,7 +868,7 @@ public class BlockTermsReader extends FieldsProducer {
|
|||
// just skipN here:
|
||||
state.docFreq = freqReader.readVInt();
|
||||
//System.out.println(" dF=" + state.docFreq);
|
||||
if (!fieldInfo.omitTermFreqAndPositions) {
|
||||
if (fieldInfo.indexOptions != IndexOptions.DOCS_ONLY) {
|
||||
state.totalTermFreq = state.docFreq + freqReader.readVLong();
|
||||
//System.out.println(" totTF=" + state.totalTermFreq);
|
||||
}
|
||||
|
|
|
@ -23,6 +23,7 @@ import java.util.Comparator;
|
|||
import java.util.List;
|
||||
|
||||
import org.apache.lucene.index.FieldInfo;
|
||||
import org.apache.lucene.index.FieldInfo.IndexOptions;
|
||||
import org.apache.lucene.index.FieldInfos;
|
||||
import org.apache.lucene.index.IndexFileNames;
|
||||
import org.apache.lucene.index.SegmentWriteState;
|
||||
|
@ -129,7 +130,7 @@ public class BlockTermsWriter extends FieldsConsumer {
|
|||
out.writeVInt(field.fieldInfo.number);
|
||||
out.writeVLong(field.numTerms);
|
||||
out.writeVLong(field.termsStartPointer);
|
||||
if (!field.fieldInfo.omitTermFreqAndPositions) {
|
||||
if (field.fieldInfo.indexOptions != IndexOptions.DOCS_ONLY) {
|
||||
out.writeVLong(field.sumTotalTermFreq);
|
||||
}
|
||||
out.writeVLong(field.sumDocFreq);
|
||||
|
@ -298,7 +299,7 @@ public class BlockTermsWriter extends FieldsConsumer {
|
|||
final TermStats stats = pendingTerms[termCount].stats;
|
||||
assert stats != null;
|
||||
bytesWriter.writeVInt(stats.docFreq);
|
||||
if (!fieldInfo.omitTermFreqAndPositions) {
|
||||
if (fieldInfo.indexOptions != IndexOptions.DOCS_ONLY) {
|
||||
bytesWriter.writeVLong(stats.totalTermFreq-stats.docFreq);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -21,6 +21,7 @@ import java.io.IOException;
|
|||
|
||||
import org.apache.lucene.index.DocsAndPositionsEnum;
|
||||
import org.apache.lucene.index.DocsEnum;
|
||||
import org.apache.lucene.index.FieldInfo.IndexOptions;
|
||||
import org.apache.lucene.search.DocIdSetIterator;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
|
||||
|
@ -60,16 +61,17 @@ public abstract class PostingsConsumer {
|
|||
int df = 0;
|
||||
long totTF = 0;
|
||||
|
||||
if (mergeState.fieldInfo.omitTermFreqAndPositions) {
|
||||
if (mergeState.fieldInfo.indexOptions != IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) {
|
||||
while(true) {
|
||||
final int doc = postings.nextDoc();
|
||||
if (doc == DocIdSetIterator.NO_MORE_DOCS) {
|
||||
break;
|
||||
}
|
||||
this.startDoc(doc, postings.freq());
|
||||
final int freq = postings.freq();
|
||||
this.startDoc(doc, freq);
|
||||
this.finishDoc();
|
||||
df++;
|
||||
totTF++;
|
||||
totTF += freq;
|
||||
}
|
||||
} else {
|
||||
final DocsAndPositionsEnum postingsEnum = (DocsAndPositionsEnum) postings;
|
||||
|
|
|
@ -20,6 +20,7 @@ package org.apache.lucene.index.codecs;
|
|||
import java.io.IOException;
|
||||
import java.util.Comparator;
|
||||
|
||||
import org.apache.lucene.index.FieldInfo.IndexOptions;
|
||||
import org.apache.lucene.index.TermsEnum;
|
||||
import org.apache.lucene.index.MultiDocsEnum;
|
||||
import org.apache.lucene.index.MultiDocsAndPositionsEnum;
|
||||
|
@ -59,7 +60,7 @@ public abstract class TermsConsumer {
|
|||
long sumDocFreq = 0;
|
||||
long sumDFsinceLastAbortCheck = 0;
|
||||
|
||||
if (mergeState.fieldInfo.omitTermFreqAndPositions) {
|
||||
if (mergeState.fieldInfo.indexOptions != IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) {
|
||||
if (docsEnum == null) {
|
||||
docsEnum = new MappingMultiDocsEnum();
|
||||
}
|
||||
|
@ -75,6 +76,7 @@ public abstract class TermsConsumer {
|
|||
final TermStats stats = postingsConsumer.merge(mergeState, docsEnum);
|
||||
if (stats.docFreq > 0) {
|
||||
finishTerm(term, stats);
|
||||
sumTotalTermFreq += stats.totalTermFreq;
|
||||
sumDFsinceLastAbortCheck += stats.docFreq;
|
||||
sumDocFreq += stats.docFreq;
|
||||
if (sumDFsinceLastAbortCheck > 60000) {
|
||||
|
|
|
@ -27,6 +27,7 @@ import java.util.TreeMap;
|
|||
import org.apache.lucene.index.DocsAndPositionsEnum;
|
||||
import org.apache.lucene.index.DocsEnum;
|
||||
import org.apache.lucene.index.FieldInfo;
|
||||
import org.apache.lucene.index.FieldInfo.IndexOptions;
|
||||
import org.apache.lucene.index.FieldInfos;
|
||||
import org.apache.lucene.index.FieldsEnum;
|
||||
import org.apache.lucene.index.IndexFileNames;
|
||||
|
@ -118,7 +119,7 @@ public class MemoryCodec extends Codec {
|
|||
lastDocID = docID;
|
||||
docCount++;
|
||||
|
||||
if (field.omitTermFreqAndPositions) {
|
||||
if (field.indexOptions == IndexOptions.DOCS_ONLY) {
|
||||
buffer.writeVInt(delta);
|
||||
} else if (termDocFreq == 1) {
|
||||
buffer.writeVInt((delta<<1) | 1);
|
||||
|
@ -192,7 +193,7 @@ public class MemoryCodec extends Codec {
|
|||
assert buffer2.getFilePointer() == 0;
|
||||
|
||||
buffer2.writeVInt(stats.docFreq);
|
||||
if (!field.omitTermFreqAndPositions) {
|
||||
if (field.indexOptions != IndexOptions.DOCS_ONLY) {
|
||||
buffer2.writeVLong(stats.totalTermFreq-stats.docFreq);
|
||||
}
|
||||
int pos = (int) buffer2.getFilePointer();
|
||||
|
@ -223,7 +224,7 @@ public class MemoryCodec extends Codec {
|
|||
if (termCount > 0) {
|
||||
out.writeVInt(termCount);
|
||||
out.writeVInt(field.number);
|
||||
if (!field.omitTermFreqAndPositions) {
|
||||
if (field.indexOptions != IndexOptions.DOCS_ONLY) {
|
||||
out.writeVLong(sumTotalTermFreq);
|
||||
}
|
||||
out.writeVLong(sumDocFreq);
|
||||
|
@ -266,7 +267,7 @@ public class MemoryCodec extends Codec {
|
|||
}
|
||||
|
||||
private final static class FSTDocsEnum extends DocsEnum {
|
||||
private final boolean omitTFAP;
|
||||
private final IndexOptions indexOptions;
|
||||
private final boolean storePayloads;
|
||||
private byte[] buffer = new byte[16];
|
||||
private final ByteArrayDataInput in = new ByteArrayDataInput(buffer);
|
||||
|
@ -278,13 +279,13 @@ public class MemoryCodec extends Codec {
|
|||
private int payloadLen;
|
||||
private int numDocs;
|
||||
|
||||
public FSTDocsEnum(boolean omitTFAP, boolean storePayloads) {
|
||||
this.omitTFAP = omitTFAP;
|
||||
public FSTDocsEnum(IndexOptions indexOptions, boolean storePayloads) {
|
||||
this.indexOptions = indexOptions;
|
||||
this.storePayloads = storePayloads;
|
||||
}
|
||||
|
||||
public boolean canReuse(boolean omitTFAP, boolean storePayloads) {
|
||||
return omitTFAP == this.omitTFAP && storePayloads == this.storePayloads;
|
||||
public boolean canReuse(IndexOptions indexOptions, boolean storePayloads) {
|
||||
return indexOptions == this.indexOptions && storePayloads == this.storePayloads;
|
||||
}
|
||||
|
||||
public FSTDocsEnum reset(BytesRef bufferIn, Bits liveDocs, int numDocs) {
|
||||
|
@ -313,7 +314,7 @@ public class MemoryCodec extends Codec {
|
|||
return docID = NO_MORE_DOCS;
|
||||
}
|
||||
docUpto++;
|
||||
if (omitTFAP) {
|
||||
if (indexOptions == IndexOptions.DOCS_ONLY) {
|
||||
docID += in.readVInt();
|
||||
freq = 1;
|
||||
} else {
|
||||
|
@ -327,6 +328,7 @@ public class MemoryCodec extends Codec {
|
|||
assert freq > 0;
|
||||
}
|
||||
|
||||
if (indexOptions == IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) {
|
||||
// Skip positions
|
||||
for(int posUpto=0;posUpto<freq;posUpto++) {
|
||||
if (!storePayloads) {
|
||||
|
@ -340,6 +342,7 @@ public class MemoryCodec extends Codec {
|
|||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (liveDocs == null || liveDocs.get(docID)) {
|
||||
if (VERBOSE) System.out.println(" return docID=" + docID + " freq=" + freq);
|
||||
|
@ -454,8 +457,8 @@ public class MemoryCodec extends Codec {
|
|||
if (!storePayloads) {
|
||||
in.readVInt();
|
||||
} else {
|
||||
final int codeSkip = in.readVInt();
|
||||
if ((codeSkip & 1) != 0) {
|
||||
final int skipCode = in.readVInt();
|
||||
if ((skipCode & 1) != 0) {
|
||||
payloadLength = in.readVInt();
|
||||
if (VERBOSE) System.out.println(" new payloadLen=" + payloadLength);
|
||||
}
|
||||
|
@ -548,7 +551,7 @@ public class MemoryCodec extends Codec {
|
|||
if (!didDecode) {
|
||||
buffer.reset(current.output.bytes, 0, current.output.length);
|
||||
docFreq = buffer.readVInt();
|
||||
if (!field.omitTermFreqAndPositions) {
|
||||
if (field.indexOptions != IndexOptions.DOCS_ONLY) {
|
||||
totalTermFreq = docFreq + buffer.readVLong();
|
||||
} else {
|
||||
totalTermFreq = 0;
|
||||
|
@ -598,11 +601,11 @@ public class MemoryCodec extends Codec {
|
|||
decodeMetaData();
|
||||
FSTDocsEnum docsEnum;
|
||||
if (reuse == null || !(reuse instanceof FSTDocsEnum)) {
|
||||
docsEnum = new FSTDocsEnum(field.omitTermFreqAndPositions, field.storePayloads);
|
||||
docsEnum = new FSTDocsEnum(field.indexOptions, field.storePayloads);
|
||||
} else {
|
||||
docsEnum = (FSTDocsEnum) reuse;
|
||||
if (!docsEnum.canReuse(field.omitTermFreqAndPositions, field.storePayloads)) {
|
||||
docsEnum = new FSTDocsEnum(field.omitTermFreqAndPositions, field.storePayloads);
|
||||
if (!docsEnum.canReuse(field.indexOptions, field.storePayloads)) {
|
||||
docsEnum = new FSTDocsEnum(field.indexOptions, field.storePayloads);
|
||||
}
|
||||
}
|
||||
return docsEnum.reset(current.output, liveDocs, docFreq);
|
||||
|
@ -610,7 +613,7 @@ public class MemoryCodec extends Codec {
|
|||
|
||||
@Override
|
||||
public DocsAndPositionsEnum docsAndPositions(Bits liveDocs, DocsAndPositionsEnum reuse) throws IOException {
|
||||
if (field.omitTermFreqAndPositions) {
|
||||
if (field.indexOptions != IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) {
|
||||
return null;
|
||||
}
|
||||
decodeMetaData();
|
||||
|
@ -686,7 +689,7 @@ public class MemoryCodec extends Codec {
|
|||
public TermsReader(FieldInfos fieldInfos, IndexInput in) throws IOException {
|
||||
final int fieldNumber = in.readVInt();
|
||||
field = fieldInfos.fieldInfo(fieldNumber);
|
||||
if (!field.omitTermFreqAndPositions) {
|
||||
if (field.indexOptions != IndexOptions.DOCS_ONLY) {
|
||||
sumTotalTermFreq = in.readVLong();
|
||||
} else {
|
||||
sumTotalTermFreq = 0;
|
||||
|
|
|
@ -25,9 +25,11 @@ import java.util.Iterator;
|
|||
import java.util.Map;
|
||||
import java.util.TreeMap;
|
||||
|
||||
import org.apache.lucene.index.CorruptIndexException;
|
||||
import org.apache.lucene.index.DocsAndPositionsEnum;
|
||||
import org.apache.lucene.index.DocsEnum;
|
||||
import org.apache.lucene.index.FieldInfo;
|
||||
import org.apache.lucene.index.FieldInfo.IndexOptions;
|
||||
import org.apache.lucene.index.FieldInfos;
|
||||
import org.apache.lucene.index.FieldsEnum;
|
||||
import org.apache.lucene.index.IndexFileNames;
|
||||
|
@ -99,7 +101,7 @@ public class PreFlexFields extends FieldsProducer {
|
|||
if (fi.isIndexed) {
|
||||
fields.put(fi.name, fi);
|
||||
preTerms.put(fi.name, new PreTerms(fi));
|
||||
if (!fi.omitTermFreqAndPositions) {
|
||||
if (fi.indexOptions == IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) {
|
||||
anyProx = true;
|
||||
}
|
||||
}
|
||||
|
@ -973,7 +975,7 @@ public class PreFlexFields extends FieldsProducer {
|
|||
@Override
|
||||
public DocsAndPositionsEnum docsAndPositions(Bits liveDocs, DocsAndPositionsEnum reuse) throws IOException {
|
||||
PreDocsAndPositionsEnum docsPosEnum;
|
||||
if (fieldInfo.omitTermFreqAndPositions) {
|
||||
if (fieldInfo.indexOptions != IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) {
|
||||
return null;
|
||||
} else if (reuse == null || !(reuse instanceof PreDocsAndPositionsEnum)) {
|
||||
docsPosEnum = new PreDocsAndPositionsEnum();
|
||||
|
|
|
@ -20,6 +20,7 @@ package org.apache.lucene.index.codecs.preflex;
|
|||
import java.io.IOException;
|
||||
|
||||
import org.apache.lucene.index.FieldInfo;
|
||||
import org.apache.lucene.index.FieldInfo.IndexOptions;
|
||||
import org.apache.lucene.index.FieldInfos;
|
||||
import org.apache.lucene.index.Term;
|
||||
import org.apache.lucene.index.codecs.standard.DefaultSkipListReader;
|
||||
|
@ -51,7 +52,7 @@ public class SegmentTermDocs {
|
|||
private boolean haveSkipped;
|
||||
|
||||
protected boolean currentFieldStoresPayloads;
|
||||
protected boolean currentFieldOmitTermFreqAndPositions;
|
||||
protected IndexOptions indexOptions;
|
||||
|
||||
public SegmentTermDocs(IndexInput freqStream, TermInfosReader tis, FieldInfos fieldInfos) {
|
||||
this.freqStream = (IndexInput) freqStream.clone();
|
||||
|
@ -89,7 +90,7 @@ public class SegmentTermDocs {
|
|||
void seek(TermInfo ti, Term term) throws IOException {
|
||||
count = 0;
|
||||
FieldInfo fi = fieldInfos.fieldInfo(term.field());
|
||||
currentFieldOmitTermFreqAndPositions = (fi != null) ? fi.omitTermFreqAndPositions : false;
|
||||
this.indexOptions = (fi != null) ? fi.indexOptions : IndexOptions.DOCS_AND_FREQS_AND_POSITIONS;
|
||||
currentFieldStoresPayloads = (fi != null) ? fi.storePayloads : false;
|
||||
if (ti == null) {
|
||||
df = 0;
|
||||
|
@ -122,7 +123,7 @@ public class SegmentTermDocs {
|
|||
return false;
|
||||
final int docCode = freqStream.readVInt();
|
||||
|
||||
if (currentFieldOmitTermFreqAndPositions) {
|
||||
if (indexOptions == IndexOptions.DOCS_ONLY) {
|
||||
doc += docCode;
|
||||
freq = 1;
|
||||
} else {
|
||||
|
@ -149,7 +150,7 @@ public class SegmentTermDocs {
|
|||
public int read(final int[] docs, final int[] freqs)
|
||||
throws IOException {
|
||||
final int length = docs.length;
|
||||
if (currentFieldOmitTermFreqAndPositions) {
|
||||
if (indexOptions == IndexOptions.DOCS_ONLY) {
|
||||
return readNoTf(docs, freqs, length);
|
||||
} else {
|
||||
int i = 0;
|
||||
|
|
|
@ -21,6 +21,7 @@ import java.io.IOException;
|
|||
|
||||
import org.apache.lucene.index.FieldInfos;
|
||||
import org.apache.lucene.index.Term;
|
||||
import org.apache.lucene.index.FieldInfo.IndexOptions;
|
||||
import org.apache.lucene.store.IndexInput;
|
||||
|
||||
/**
|
||||
|
@ -77,8 +78,8 @@ extends SegmentTermDocs {
|
|||
}
|
||||
|
||||
public final int nextPosition() throws IOException {
|
||||
if (currentFieldOmitTermFreqAndPositions)
|
||||
// This field does not store term freq, positions, payloads
|
||||
if (indexOptions != IndexOptions.DOCS_AND_FREQS_AND_POSITIONS)
|
||||
// This field does not store positions, payloads
|
||||
return 0;
|
||||
// perform lazy skips if necessary
|
||||
lazySkip();
|
||||
|
@ -140,7 +141,7 @@ extends SegmentTermDocs {
|
|||
}
|
||||
|
||||
private void skipPositions(int n) throws IOException {
|
||||
assert !currentFieldOmitTermFreqAndPositions;
|
||||
assert indexOptions == IndexOptions.DOCS_AND_FREQS_AND_POSITIONS;
|
||||
for (int f = n; f > 0; f--) { // skip unread positions
|
||||
readDeltaPosition();
|
||||
skipPayload();
|
||||
|
|
|
@ -22,6 +22,7 @@ import java.io.IOException;
|
|||
import org.apache.lucene.index.DocsAndPositionsEnum;
|
||||
import org.apache.lucene.index.DocsEnum;
|
||||
import org.apache.lucene.index.FieldInfo;
|
||||
import org.apache.lucene.index.FieldInfo.IndexOptions;
|
||||
import org.apache.lucene.index.TermState;
|
||||
import org.apache.lucene.index.codecs.PostingsReaderBase;
|
||||
import org.apache.lucene.index.codecs.BlockTermState;
|
||||
|
@ -134,8 +135,8 @@ public class PulsingPostingsReaderImpl extends PostingsReaderBase {
|
|||
//System.out.println("PR nextTerm");
|
||||
PulsingTermState termState = (PulsingTermState) _termState;
|
||||
|
||||
// total TF, but in the omitTFAP case its computed based on docFreq.
|
||||
long count = fieldInfo.omitTermFreqAndPositions ? termState.docFreq : termState.totalTermFreq;
|
||||
// if we have positions, its total TF, otherwise its computed based on docFreq.
|
||||
long count = fieldInfo.indexOptions == IndexOptions.DOCS_AND_FREQS_AND_POSITIONS ? termState.totalTermFreq : termState.docFreq;
|
||||
//System.out.println(" count=" + count + " threshold=" + maxPositions);
|
||||
|
||||
if (count <= maxPositions) {
|
||||
|
@ -193,7 +194,7 @@ public class PulsingPostingsReaderImpl extends PostingsReaderBase {
|
|||
// TODO: -- not great that we can't always reuse
|
||||
@Override
|
||||
public DocsAndPositionsEnum docsAndPositions(FieldInfo field, BlockTermState _termState, Bits liveDocs, DocsAndPositionsEnum reuse) throws IOException {
|
||||
if (field.omitTermFreqAndPositions) {
|
||||
if (field.indexOptions != IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) {
|
||||
return null;
|
||||
}
|
||||
//System.out.println("D&P: field=" + field.name);
|
||||
|
@ -223,7 +224,7 @@ public class PulsingPostingsReaderImpl extends PostingsReaderBase {
|
|||
|
||||
private static class PulsingDocsEnum extends DocsEnum {
|
||||
private final ByteArrayDataInput postings = new ByteArrayDataInput();
|
||||
private final boolean omitTF;
|
||||
private final IndexOptions indexOptions;
|
||||
private final boolean storePayloads;
|
||||
private Bits liveDocs;
|
||||
private int docID;
|
||||
|
@ -231,7 +232,7 @@ public class PulsingPostingsReaderImpl extends PostingsReaderBase {
|
|||
private int payloadLength;
|
||||
|
||||
public PulsingDocsEnum(FieldInfo fieldInfo) {
|
||||
omitTF = fieldInfo.omitTermFreqAndPositions;
|
||||
indexOptions = fieldInfo.indexOptions;
|
||||
storePayloads = fieldInfo.storePayloads;
|
||||
}
|
||||
|
||||
|
@ -249,7 +250,7 @@ public class PulsingPostingsReaderImpl extends PostingsReaderBase {
|
|||
}
|
||||
|
||||
boolean canReuse(FieldInfo fieldInfo) {
|
||||
return omitTF == fieldInfo.omitTermFreqAndPositions && storePayloads == fieldInfo.storePayloads;
|
||||
return indexOptions == fieldInfo.indexOptions && storePayloads == fieldInfo.storePayloads;
|
||||
}
|
||||
|
||||
@Override
|
||||
|
@ -262,7 +263,7 @@ public class PulsingPostingsReaderImpl extends PostingsReaderBase {
|
|||
}
|
||||
|
||||
final int code = postings.readVInt();
|
||||
if (omitTF) {
|
||||
if (indexOptions == IndexOptions.DOCS_ONLY) {
|
||||
docID += code;
|
||||
} else {
|
||||
docID += code >>> 1; // shift off low bit
|
||||
|
@ -272,6 +273,7 @@ public class PulsingPostingsReaderImpl extends PostingsReaderBase {
|
|||
freq = postings.readVInt(); // else read freq
|
||||
}
|
||||
|
||||
if (indexOptions == IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) {
|
||||
// Skip positions
|
||||
if (storePayloads) {
|
||||
for(int pos=0;pos<freq;pos++) {
|
||||
|
@ -290,6 +292,7 @@ public class PulsingPostingsReaderImpl extends PostingsReaderBase {
|
|||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (liveDocs == null || liveDocs.get(docID)) {
|
||||
//System.out.println(" return docID=" + docID + " freq=" + freq);
|
||||
|
|
|
@ -20,6 +20,7 @@ package org.apache.lucene.index.codecs.pulsing;
|
|||
import java.io.IOException;
|
||||
|
||||
import org.apache.lucene.index.FieldInfo;
|
||||
import org.apache.lucene.index.FieldInfo.IndexOptions;
|
||||
import org.apache.lucene.index.codecs.PostingsWriterBase;
|
||||
import org.apache.lucene.index.codecs.TermStats;
|
||||
import org.apache.lucene.store.IndexOutput;
|
||||
|
@ -46,7 +47,7 @@ public final class PulsingPostingsWriterImpl extends PostingsWriterBase {
|
|||
|
||||
private IndexOutput termsOut;
|
||||
|
||||
private boolean omitTF;
|
||||
private IndexOptions indexOptions;
|
||||
private boolean storePayloads;
|
||||
|
||||
// one entry per position
|
||||
|
@ -102,7 +103,7 @@ public final class PulsingPostingsWriterImpl extends PostingsWriterBase {
|
|||
// our parent calls setField whenever the field changes
|
||||
@Override
|
||||
public void setField(FieldInfo fieldInfo) {
|
||||
omitTF = fieldInfo.omitTermFreqAndPositions;
|
||||
this.indexOptions = fieldInfo.indexOptions;
|
||||
//System.out.println("PW field=" + fieldInfo.name + " omitTF=" + omitTF);
|
||||
storePayloads = fieldInfo.storePayloads;
|
||||
wrappedPostingsWriter.setField(fieldInfo);
|
||||
|
@ -123,8 +124,11 @@ public final class PulsingPostingsWriterImpl extends PostingsWriterBase {
|
|||
assert pendingCount < pending.length;
|
||||
currentDoc = pending[pendingCount];
|
||||
currentDoc.docID = docID;
|
||||
if (omitTF) {
|
||||
if (indexOptions == IndexOptions.DOCS_ONLY) {
|
||||
pendingCount++;
|
||||
} else if (indexOptions == IndexOptions.DOCS_AND_FREQS) {
|
||||
pendingCount++;
|
||||
currentDoc.termFreq = termDocFreq;
|
||||
} else {
|
||||
currentDoc.termFreq = termDocFreq;
|
||||
}
|
||||
|
@ -196,7 +200,7 @@ public final class PulsingPostingsWriterImpl extends PostingsWriterBase {
|
|||
// given codec wants to store other interesting
|
||||
// stuff, it could use this pulsing codec to do so
|
||||
|
||||
if (!omitTF) {
|
||||
if (indexOptions == IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) {
|
||||
int lastDocID = 0;
|
||||
int pendingIDX = 0;
|
||||
int lastPayloadLength = -1;
|
||||
|
@ -239,7 +243,20 @@ public final class PulsingPostingsWriterImpl extends PostingsWriterBase {
|
|||
}
|
||||
}
|
||||
}
|
||||
} else if (indexOptions == IndexOptions.DOCS_AND_FREQS) {
|
||||
int lastDocID = 0;
|
||||
for(int posIDX=0;posIDX<pendingCount;posIDX++) {
|
||||
final Position doc = pending[posIDX];
|
||||
final int delta = doc.docID - lastDocID;
|
||||
if (doc.termFreq == 1) {
|
||||
buffer.writeVInt((delta<<1)|1);
|
||||
} else {
|
||||
buffer.writeVInt(delta<<1);
|
||||
buffer.writeVInt(doc.termFreq);
|
||||
}
|
||||
lastDocID = doc.docID;
|
||||
}
|
||||
} else if (indexOptions == IndexOptions.DOCS_ONLY) {
|
||||
int lastDocID = 0;
|
||||
for(int posIDX=0;posIDX<pendingCount;posIDX++) {
|
||||
final Position doc = pending[posIDX];
|
||||
|
@ -282,7 +299,7 @@ public final class PulsingPostingsWriterImpl extends PostingsWriterBase {
|
|||
wrappedPostingsWriter.startTerm();
|
||||
|
||||
// Flush all buffered docs
|
||||
if (!omitTF) {
|
||||
if (indexOptions == IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) {
|
||||
Position doc = null;
|
||||
for(Position pos : pending) {
|
||||
if (doc == null) {
|
||||
|
@ -303,7 +320,7 @@ public final class PulsingPostingsWriterImpl extends PostingsWriterBase {
|
|||
//wrappedPostingsWriter.finishDoc();
|
||||
} else {
|
||||
for(Position doc : pending) {
|
||||
wrappedPostingsWriter.startDoc(doc.docID, 0);
|
||||
wrappedPostingsWriter.startDoc(doc.docID, indexOptions == IndexOptions.DOCS_ONLY ? 0 : doc.termFreq);
|
||||
}
|
||||
}
|
||||
pendingCount = -1;
|
||||
|
|
|
@ -23,6 +23,7 @@ import java.util.Collection;
|
|||
import org.apache.lucene.index.DocsAndPositionsEnum;
|
||||
import org.apache.lucene.index.DocsEnum;
|
||||
import org.apache.lucene.index.FieldInfo;
|
||||
import org.apache.lucene.index.FieldInfo.IndexOptions;
|
||||
import org.apache.lucene.index.IndexFileNames;
|
||||
import org.apache.lucene.index.SegmentInfo;
|
||||
import org.apache.lucene.index.TermState;
|
||||
|
@ -68,14 +69,17 @@ public class SepPostingsReaderImpl extends PostingsReaderBase {
|
|||
|
||||
skipIn = dir.openInput(IndexFileNames.segmentFileName(segmentInfo.name, codecId, SepPostingsWriterImpl.SKIP_EXTENSION), context);
|
||||
|
||||
if (segmentInfo.getHasProx()) {
|
||||
if (segmentInfo.getFieldInfos().hasFreq()) {
|
||||
freqIn = intFactory.openInput(dir, IndexFileNames.segmentFileName(segmentInfo.name, codecId, SepPostingsWriterImpl.FREQ_EXTENSION), context);
|
||||
} else {
|
||||
freqIn = null;
|
||||
}
|
||||
if (segmentInfo.getHasProx()) {
|
||||
posIn = intFactory.openInput(dir, IndexFileNames.segmentFileName(segmentInfo.name, codecId, SepPostingsWriterImpl.POS_EXTENSION), context);
|
||||
payloadIn = dir.openInput(IndexFileNames.segmentFileName(segmentInfo.name, codecId, SepPostingsWriterImpl.PAYLOAD_EXTENSION), context);
|
||||
} else {
|
||||
posIn = null;
|
||||
payloadIn = null;
|
||||
freqIn = null;
|
||||
}
|
||||
success = true;
|
||||
} finally {
|
||||
|
@ -89,8 +93,11 @@ public class SepPostingsReaderImpl extends PostingsReaderBase {
|
|||
files.add(IndexFileNames.segmentFileName(segmentInfo.name, codecId, SepPostingsWriterImpl.DOC_EXTENSION));
|
||||
files.add(IndexFileNames.segmentFileName(segmentInfo.name, codecId, SepPostingsWriterImpl.SKIP_EXTENSION));
|
||||
|
||||
if (segmentInfo.getHasProx()) {
|
||||
if (segmentInfo.getFieldInfos().hasFreq()) {
|
||||
files.add(IndexFileNames.segmentFileName(segmentInfo.name, codecId, SepPostingsWriterImpl.FREQ_EXTENSION));
|
||||
}
|
||||
|
||||
if (segmentInfo.getHasProx()) {
|
||||
files.add(IndexFileNames.segmentFileName(segmentInfo.name, codecId, SepPostingsWriterImpl.POS_EXTENSION));
|
||||
files.add(IndexFileNames.segmentFileName(segmentInfo.name, codecId, SepPostingsWriterImpl.PAYLOAD_EXTENSION));
|
||||
}
|
||||
|
@ -229,8 +236,11 @@ public class SepPostingsReaderImpl extends PostingsReaderBase {
|
|||
final boolean isFirstTerm = termState.termCount == 0;
|
||||
termState.docIndex.read(termState.bytesReader, isFirstTerm);
|
||||
//System.out.println(" docIndex=" + termState.docIndex);
|
||||
if (!fieldInfo.omitTermFreqAndPositions) {
|
||||
if (fieldInfo.indexOptions != IndexOptions.DOCS_ONLY) {
|
||||
termState.freqIndex.read(termState.bytesReader, isFirstTerm);
|
||||
}
|
||||
|
||||
if (fieldInfo.indexOptions == IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) {
|
||||
//System.out.println(" freqIndex=" + termState.freqIndex);
|
||||
termState.posIndex.read(termState.bytesReader, isFirstTerm);
|
||||
//System.out.println(" posIndex=" + termState.posIndex);
|
||||
|
@ -277,7 +287,7 @@ public class SepPostingsReaderImpl extends PostingsReaderBase {
|
|||
|
||||
@Override
|
||||
public DocsAndPositionsEnum docsAndPositions(FieldInfo fieldInfo, BlockTermState _termState, Bits liveDocs, DocsAndPositionsEnum reuse) throws IOException {
|
||||
assert !fieldInfo.omitTermFreqAndPositions;
|
||||
assert fieldInfo.indexOptions == IndexOptions.DOCS_AND_FREQS_AND_POSITIONS;
|
||||
final SepTermState termState = (SepTermState) _termState;
|
||||
SepDocsAndPositionsEnum postingsEnum;
|
||||
if (reuse == null || !(reuse instanceof SepDocsAndPositionsEnum)) {
|
||||
|
@ -304,6 +314,7 @@ public class SepPostingsReaderImpl extends PostingsReaderBase {
|
|||
|
||||
// TODO: -- should we do omitTF with 2 different enum classes?
|
||||
private boolean omitTF;
|
||||
private IndexOptions indexOptions;
|
||||
private boolean storePayloads;
|
||||
private Bits liveDocs;
|
||||
private final IntIndexInput.Reader docReader;
|
||||
|
@ -340,7 +351,8 @@ public class SepPostingsReaderImpl extends PostingsReaderBase {
|
|||
|
||||
SepDocsEnum init(FieldInfo fieldInfo, SepTermState termState, Bits liveDocs) throws IOException {
|
||||
this.liveDocs = liveDocs;
|
||||
omitTF = fieldInfo.omitTermFreqAndPositions;
|
||||
this.indexOptions = fieldInfo.indexOptions;
|
||||
omitTF = indexOptions == IndexOptions.DOCS_ONLY;
|
||||
storePayloads = fieldInfo.storePayloads;
|
||||
|
||||
// TODO: can't we only do this if consumer
|
||||
|
@ -456,7 +468,7 @@ public class SepPostingsReaderImpl extends PostingsReaderBase {
|
|||
0,
|
||||
docFreq,
|
||||
storePayloads);
|
||||
skipper.setOmitTF(omitTF);
|
||||
skipper.setIndexOptions(indexOptions);
|
||||
|
||||
skipped = true;
|
||||
}
|
||||
|
@ -633,7 +645,7 @@ public class SepPostingsReaderImpl extends PostingsReaderBase {
|
|||
payloadFP,
|
||||
docFreq,
|
||||
storePayloads);
|
||||
|
||||
skipper.setIndexOptions(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS);
|
||||
skipped = true;
|
||||
}
|
||||
final int newCount = skipper.skipTo(target);
|
||||
|
|
|
@ -23,6 +23,7 @@ import java.util.Set;
|
|||
import org.apache.lucene.index.CorruptIndexException;
|
||||
import org.apache.lucene.index.DocsEnum;
|
||||
import org.apache.lucene.index.FieldInfo;
|
||||
import org.apache.lucene.index.FieldInfo.IndexOptions;
|
||||
import org.apache.lucene.index.IndexFileNames;
|
||||
import org.apache.lucene.index.SegmentWriteState;
|
||||
import org.apache.lucene.index.codecs.PostingsWriterBase;
|
||||
|
@ -86,7 +87,7 @@ public final class SepPostingsWriterImpl extends PostingsWriterBase {
|
|||
final int totalNumDocs;
|
||||
|
||||
boolean storePayloads;
|
||||
boolean omitTF;
|
||||
IndexOptions indexOptions;
|
||||
|
||||
long lastSkipFP;
|
||||
|
||||
|
@ -121,11 +122,13 @@ public final class SepPostingsWriterImpl extends PostingsWriterBase {
|
|||
docOut = factory.createOutput(state.directory, docFileName, state.context);
|
||||
docIndex = docOut.index();
|
||||
|
||||
if (state.fieldInfos.hasProx()) {
|
||||
if (state.fieldInfos.hasFreq()) {
|
||||
final String frqFileName = IndexFileNames.segmentFileName(state.segmentName, state.codecId, FREQ_EXTENSION);
|
||||
freqOut = factory.createOutput(state.directory, frqFileName, state.context);
|
||||
freqIndex = freqOut.index();
|
||||
}
|
||||
|
||||
if (state.fieldInfos.hasProx()) {
|
||||
final String posFileName = IndexFileNames.segmentFileName(state.segmentName, state.codecId, POS_EXTENSION);
|
||||
posOut = factory.createOutput(state.directory, posFileName, state.context);
|
||||
posIndex = posOut.index();
|
||||
|
@ -168,12 +171,17 @@ public final class SepPostingsWriterImpl extends PostingsWriterBase {
|
|||
@Override
|
||||
public void startTerm() throws IOException {
|
||||
docIndex.mark();
|
||||
if (!omitTF) {
|
||||
|
||||
if (indexOptions != IndexOptions.DOCS_ONLY) {
|
||||
freqIndex.mark();
|
||||
}
|
||||
|
||||
if (indexOptions == IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) {
|
||||
posIndex.mark();
|
||||
payloadStart = payloadOut.getFilePointer();
|
||||
lastPayloadLength = -1;
|
||||
}
|
||||
|
||||
skipListWriter.resetSkip(docIndex, freqIndex, posIndex);
|
||||
}
|
||||
|
||||
|
@ -182,9 +190,9 @@ public final class SepPostingsWriterImpl extends PostingsWriterBase {
|
|||
@Override
|
||||
public void setField(FieldInfo fieldInfo) {
|
||||
this.fieldInfo = fieldInfo;
|
||||
omitTF = fieldInfo.omitTermFreqAndPositions;
|
||||
skipListWriter.setOmitTF(omitTF);
|
||||
storePayloads = !omitTF && fieldInfo.storePayloads;
|
||||
this.indexOptions = fieldInfo.indexOptions;
|
||||
skipListWriter.setIndexOptions(indexOptions);
|
||||
storePayloads = indexOptions == IndexOptions.DOCS_AND_FREQS_AND_POSITIONS && fieldInfo.storePayloads;
|
||||
}
|
||||
|
||||
/** Adds a new doc in this term. If this returns null
|
||||
|
@ -209,7 +217,7 @@ public final class SepPostingsWriterImpl extends PostingsWriterBase {
|
|||
|
||||
lastDocID = docID;
|
||||
docOut.write(delta);
|
||||
if (!omitTF) {
|
||||
if (indexOptions != IndexOptions.DOCS_ONLY) {
|
||||
//System.out.println(" sepw startDoc: write freq=" + termDocFreq);
|
||||
freqOut.write(termDocFreq);
|
||||
}
|
||||
|
@ -227,7 +235,7 @@ public final class SepPostingsWriterImpl extends PostingsWriterBase {
|
|||
/** Add a new position & payload */
|
||||
@Override
|
||||
public void addPosition(int position, BytesRef payload) throws IOException {
|
||||
assert !omitTF;
|
||||
assert indexOptions == IndexOptions.DOCS_AND_FREQS_AND_POSITIONS;
|
||||
|
||||
final int delta = position - lastPosition;
|
||||
assert delta >= 0: "position=" + position + " lastPosition=" + lastPosition; // not quite right (if pos=0 is repeated twice we don't catch it)
|
||||
|
@ -274,10 +282,12 @@ public final class SepPostingsWriterImpl extends PostingsWriterBase {
|
|||
docIndex.write(indexBytesWriter, isFirstTerm);
|
||||
//System.out.println(" docIndex=" + docIndex);
|
||||
|
||||
if (!omitTF) {
|
||||
if (indexOptions != IndexOptions.DOCS_ONLY) {
|
||||
freqIndex.write(indexBytesWriter, isFirstTerm);
|
||||
//System.out.println(" freqIndex=" + freqIndex);
|
||||
}
|
||||
|
||||
if (indexOptions == IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) {
|
||||
posIndex.write(indexBytesWriter, isFirstTerm);
|
||||
//System.out.println(" posIndex=" + posIndex);
|
||||
if (storePayloads) {
|
||||
|
|
|
@ -21,6 +21,7 @@ import java.io.IOException;
|
|||
import java.util.Arrays;
|
||||
|
||||
import org.apache.lucene.store.IndexInput;
|
||||
import org.apache.lucene.index.FieldInfo.IndexOptions;
|
||||
import org.apache.lucene.index.codecs.MultiLevelSkipListReader;
|
||||
|
||||
/**
|
||||
|
@ -87,10 +88,10 @@ class SepSkipListReader extends MultiLevelSkipListReader {
|
|||
}
|
||||
}
|
||||
|
||||
boolean omitTF;
|
||||
IndexOptions indexOptions;
|
||||
|
||||
void setOmitTF(boolean v) {
|
||||
omitTF = v;
|
||||
void setIndexOptions(IndexOptions v) {
|
||||
indexOptions = v;
|
||||
}
|
||||
|
||||
void init(long skipPointer,
|
||||
|
@ -177,7 +178,7 @@ class SepSkipListReader extends MultiLevelSkipListReader {
|
|||
@Override
|
||||
protected int readSkipData(int level, IndexInput skipStream) throws IOException {
|
||||
int delta;
|
||||
assert !omitTF || !currentFieldStoresPayloads;
|
||||
assert indexOptions == IndexOptions.DOCS_AND_FREQS_AND_POSITIONS || !currentFieldStoresPayloads;
|
||||
if (currentFieldStoresPayloads) {
|
||||
// the current field stores payloads.
|
||||
// if the doc delta is odd then we have
|
||||
|
@ -192,11 +193,11 @@ class SepSkipListReader extends MultiLevelSkipListReader {
|
|||
} else {
|
||||
delta = skipStream.readVInt();
|
||||
}
|
||||
if (!omitTF) {
|
||||
if (indexOptions != IndexOptions.DOCS_ONLY) {
|
||||
freqIndex[level].read(skipStream, false);
|
||||
}
|
||||
docIndex[level].read(skipStream, false);
|
||||
if (!omitTF) {
|
||||
if (indexOptions == IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) {
|
||||
posIndex[level].read(skipStream, false);
|
||||
if (currentFieldStoresPayloads) {
|
||||
payloadPointer[level] += skipStream.readVInt();
|
||||
|
|
|
@ -21,6 +21,7 @@ import java.io.IOException;
|
|||
import java.util.Arrays;
|
||||
|
||||
import org.apache.lucene.store.IndexOutput;
|
||||
import org.apache.lucene.index.FieldInfo.IndexOptions;
|
||||
import org.apache.lucene.index.codecs.MultiLevelSkipListWriter;
|
||||
|
||||
// TODO: -- skip data should somehow be more local to the
|
||||
|
@ -84,10 +85,10 @@ class SepSkipListWriter extends MultiLevelSkipListWriter {
|
|||
}
|
||||
}
|
||||
|
||||
boolean omitTF;
|
||||
IndexOptions indexOptions;
|
||||
|
||||
void setOmitTF(boolean v) {
|
||||
omitTF = v;
|
||||
void setIndexOptions(IndexOptions v) {
|
||||
indexOptions = v;
|
||||
}
|
||||
|
||||
void setPosOutput(IntIndexOutput posOutput) throws IOException {
|
||||
|
@ -159,7 +160,7 @@ class SepSkipListWriter extends MultiLevelSkipListWriter {
|
|||
// current payload length equals the length at the previous
|
||||
// skip point
|
||||
|
||||
assert !omitTF || !curStorePayloads;
|
||||
assert indexOptions == IndexOptions.DOCS_AND_FREQS_AND_POSITIONS || !curStorePayloads;
|
||||
|
||||
if (curStorePayloads) {
|
||||
int delta = curDoc - lastSkipDoc[level];
|
||||
|
@ -179,13 +180,13 @@ class SepSkipListWriter extends MultiLevelSkipListWriter {
|
|||
skipBuffer.writeVInt(curDoc - lastSkipDoc[level]);
|
||||
}
|
||||
|
||||
if (!omitTF) {
|
||||
if (indexOptions != IndexOptions.DOCS_ONLY) {
|
||||
freqIndex[level].mark();
|
||||
freqIndex[level].write(skipBuffer, false);
|
||||
}
|
||||
docIndex[level].mark();
|
||||
docIndex[level].write(skipBuffer, false);
|
||||
if (!omitTF) {
|
||||
if (indexOptions == IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) {
|
||||
posIndex[level].mark();
|
||||
posIndex[level].write(skipBuffer, false);
|
||||
if (curStorePayloads) {
|
||||
|
|
|
@ -19,6 +19,7 @@ package org.apache.lucene.index.codecs.simpletext;
|
|||
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.apache.lucene.index.codecs.FieldsProducer;
|
||||
import org.apache.lucene.index.FieldInfo.IndexOptions;
|
||||
import org.apache.lucene.index.SegmentReadState;
|
||||
import org.apache.lucene.index.FieldsEnum;
|
||||
import org.apache.lucene.index.Terms;
|
||||
|
@ -53,6 +54,7 @@ class SimpleTextFieldsReader extends FieldsProducer {
|
|||
final static BytesRef FIELD = SimpleTextFieldsWriter.FIELD;
|
||||
final static BytesRef TERM = SimpleTextFieldsWriter.TERM;
|
||||
final static BytesRef DOC = SimpleTextFieldsWriter.DOC;
|
||||
final static BytesRef FREQ = SimpleTextFieldsWriter.FREQ;
|
||||
final static BytesRef POS = SimpleTextFieldsWriter.POS;
|
||||
final static BytesRef PAYLOAD = SimpleTextFieldsWriter.PAYLOAD;
|
||||
|
||||
|
@ -114,16 +116,16 @@ class SimpleTextFieldsReader extends FieldsProducer {
|
|||
|
||||
private class SimpleTextTermsEnum extends TermsEnum {
|
||||
private final IndexInput in;
|
||||
private final boolean omitTF;
|
||||
private final IndexOptions indexOptions;
|
||||
private int docFreq;
|
||||
private long totalTermFreq;
|
||||
private long docsStart;
|
||||
private boolean ended;
|
||||
private final BytesRefFSTEnum<PairOutputs.Pair<Long,PairOutputs.Pair<Long,Long>>> fstEnum;
|
||||
|
||||
public SimpleTextTermsEnum(FST<PairOutputs.Pair<Long,PairOutputs.Pair<Long,Long>>> fst, boolean omitTF) throws IOException {
|
||||
public SimpleTextTermsEnum(FST<PairOutputs.Pair<Long,PairOutputs.Pair<Long,Long>>> fst, IndexOptions indexOptions) throws IOException {
|
||||
this.in = (IndexInput) SimpleTextFieldsReader.this.in.clone();
|
||||
this.omitTF = omitTF;
|
||||
this.indexOptions = indexOptions;
|
||||
fstEnum = new BytesRefFSTEnum<PairOutputs.Pair<Long,PairOutputs.Pair<Long,Long>>>(fst);
|
||||
}
|
||||
|
||||
|
@ -218,12 +220,12 @@ class SimpleTextFieldsReader extends FieldsProducer {
|
|||
} else {
|
||||
docsEnum = new SimpleTextDocsEnum();
|
||||
}
|
||||
return docsEnum.reset(docsStart, liveDocs, omitTF);
|
||||
return docsEnum.reset(docsStart, liveDocs, indexOptions == IndexOptions.DOCS_ONLY);
|
||||
}
|
||||
|
||||
@Override
|
||||
public DocsAndPositionsEnum docsAndPositions(Bits liveDocs, DocsAndPositionsEnum reuse) throws IOException {
|
||||
if (omitTF) {
|
||||
if (indexOptions != IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) {
|
||||
return null;
|
||||
}
|
||||
|
||||
|
@ -303,8 +305,11 @@ class SimpleTextFieldsReader extends FieldsProducer {
|
|||
docID = ArrayUtil.parseInt(scratchUTF16.chars, 0, scratchUTF16.length);
|
||||
termFreq = 0;
|
||||
first = false;
|
||||
} else if (scratch.startsWith(FREQ)) {
|
||||
UnicodeUtil.UTF8toUTF16(scratch.bytes, scratch.offset+FREQ.length, scratch.length-FREQ.length, scratchUTF16);
|
||||
termFreq = ArrayUtil.parseInt(scratchUTF16.chars, 0, scratchUTF16.length);
|
||||
} else if (scratch.startsWith(POS)) {
|
||||
termFreq++;
|
||||
// skip termFreq++;
|
||||
} else if (scratch.startsWith(PAYLOAD)) {
|
||||
// skip
|
||||
} else {
|
||||
|
@ -384,10 +389,13 @@ class SimpleTextFieldsReader extends FieldsProducer {
|
|||
UnicodeUtil.UTF8toUTF16(scratch.bytes, scratch.offset+DOC.length, scratch.length-DOC.length, scratchUTF16);
|
||||
docID = ArrayUtil.parseInt(scratchUTF16.chars, 0, scratchUTF16.length);
|
||||
tf = 0;
|
||||
posStart = in.getFilePointer();
|
||||
first = false;
|
||||
} else if (scratch.startsWith(FREQ)) {
|
||||
UnicodeUtil.UTF8toUTF16(scratch.bytes, scratch.offset+FREQ.length, scratch.length-FREQ.length, scratchUTF16);
|
||||
tf = ArrayUtil.parseInt(scratchUTF16.chars, 0, scratchUTF16.length);
|
||||
posStart = in.getFilePointer();
|
||||
} else if (scratch.startsWith(POS)) {
|
||||
tf++;
|
||||
// skip
|
||||
} else if (scratch.startsWith(PAYLOAD)) {
|
||||
// skip
|
||||
} else {
|
||||
|
@ -461,7 +469,7 @@ class SimpleTextFieldsReader extends FieldsProducer {
|
|||
|
||||
private class SimpleTextTerms extends Terms {
|
||||
private final long termsStart;
|
||||
private final boolean omitTF;
|
||||
private final IndexOptions indexOptions;
|
||||
private long sumTotalTermFreq;
|
||||
private long sumDocFreq;
|
||||
private FST<PairOutputs.Pair<Long,PairOutputs.Pair<Long,Long>>> fst;
|
||||
|
@ -470,7 +478,7 @@ class SimpleTextFieldsReader extends FieldsProducer {
|
|||
|
||||
public SimpleTextTerms(String field, long termsStart) throws IOException {
|
||||
this.termsStart = termsStart;
|
||||
omitTF = fieldInfos.fieldInfo(field).omitTermFreqAndPositions;
|
||||
indexOptions = fieldInfos.fieldInfo(field).indexOptions;
|
||||
loadTerms();
|
||||
}
|
||||
|
||||
|
@ -533,7 +541,7 @@ class SimpleTextFieldsReader extends FieldsProducer {
|
|||
@Override
|
||||
public TermsEnum iterator() throws IOException {
|
||||
if (fst != null) {
|
||||
return new SimpleTextTermsEnum(fst, omitTF);
|
||||
return new SimpleTextTermsEnum(fst, indexOptions);
|
||||
} else {
|
||||
return TermsEnum.EMPTY;
|
||||
}
|
||||
|
|
|
@ -24,6 +24,7 @@ import org.apache.lucene.index.codecs.TermsConsumer;
|
|||
import org.apache.lucene.index.codecs.PostingsConsumer;
|
||||
import org.apache.lucene.index.codecs.TermStats;
|
||||
import org.apache.lucene.index.FieldInfo;
|
||||
import org.apache.lucene.index.FieldInfo.IndexOptions;
|
||||
import org.apache.lucene.index.SegmentWriteState;
|
||||
import org.apache.lucene.store.IndexOutput;
|
||||
|
||||
|
@ -41,6 +42,7 @@ class SimpleTextFieldsWriter extends FieldsConsumer {
|
|||
final static BytesRef FIELD = new BytesRef("field ");
|
||||
final static BytesRef TERM = new BytesRef(" term ");
|
||||
final static BytesRef DOC = new BytesRef(" doc ");
|
||||
final static BytesRef FREQ = new BytesRef(" freq ");
|
||||
final static BytesRef POS = new BytesRef(" pos ");
|
||||
final static BytesRef PAYLOAD = new BytesRef(" payload ");
|
||||
|
||||
|
@ -73,11 +75,15 @@ class SimpleTextFieldsWriter extends FieldsConsumer {
|
|||
write(FIELD);
|
||||
write(field.name);
|
||||
out.writeByte(NEWLINE);
|
||||
return new SimpleTextTermsWriter();
|
||||
return new SimpleTextTermsWriter(field);
|
||||
}
|
||||
|
||||
private class SimpleTextTermsWriter extends TermsConsumer {
|
||||
private final SimpleTextPostingsWriter postingsWriter = new SimpleTextPostingsWriter();
|
||||
private final SimpleTextPostingsWriter postingsWriter;
|
||||
|
||||
public SimpleTextTermsWriter(FieldInfo field) {
|
||||
postingsWriter = new SimpleTextPostingsWriter(field);
|
||||
}
|
||||
|
||||
@Override
|
||||
public PostingsConsumer startTerm(BytesRef term) throws IOException {
|
||||
|
@ -101,6 +107,11 @@ class SimpleTextFieldsWriter extends FieldsConsumer {
|
|||
private class SimpleTextPostingsWriter extends PostingsConsumer {
|
||||
private BytesRef term;
|
||||
private boolean wroteTerm;
|
||||
private IndexOptions indexOptions;
|
||||
|
||||
public SimpleTextPostingsWriter(FieldInfo field) {
|
||||
this.indexOptions = field.indexOptions;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void startDoc(int docID, int termDocFreq) throws IOException {
|
||||
|
@ -115,7 +126,14 @@ class SimpleTextFieldsWriter extends FieldsConsumer {
|
|||
write(DOC);
|
||||
write(Integer.toString(docID));
|
||||
newline();
|
||||
if (indexOptions != IndexOptions.DOCS_ONLY) {
|
||||
write(FREQ);
|
||||
write(Integer.toString(termDocFreq));
|
||||
newline();
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
public PostingsConsumer reset(BytesRef term) {
|
||||
this.term = term;
|
||||
|
|
|
@ -23,6 +23,7 @@ import java.util.Collection;
|
|||
import org.apache.lucene.index.DocsAndPositionsEnum;
|
||||
import org.apache.lucene.index.DocsEnum;
|
||||
import org.apache.lucene.index.FieldInfo;
|
||||
import org.apache.lucene.index.FieldInfo.IndexOptions;
|
||||
import org.apache.lucene.index.IndexFileNames;
|
||||
import org.apache.lucene.index.SegmentInfo;
|
||||
import org.apache.lucene.index.TermState;
|
||||
|
@ -190,7 +191,7 @@ public class StandardPostingsReader extends PostingsReaderBase {
|
|||
// undefined
|
||||
}
|
||||
|
||||
if (!fieldInfo.omitTermFreqAndPositions) {
|
||||
if (fieldInfo.indexOptions == IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) {
|
||||
if (isFirstTerm) {
|
||||
termState.proxOffset = termState.bytesReader.readVLong();
|
||||
} else {
|
||||
|
@ -219,7 +220,7 @@ public class StandardPostingsReader extends PostingsReaderBase {
|
|||
|
||||
@Override
|
||||
public DocsAndPositionsEnum docsAndPositions(FieldInfo fieldInfo, BlockTermState termState, Bits liveDocs, DocsAndPositionsEnum reuse) throws IOException {
|
||||
if (fieldInfo.omitTermFreqAndPositions) {
|
||||
if (fieldInfo.indexOptions != IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) {
|
||||
return null;
|
||||
}
|
||||
|
||||
|
@ -282,7 +283,7 @@ public class StandardPostingsReader extends PostingsReaderBase {
|
|||
}
|
||||
|
||||
public SegmentDocsEnum reset(FieldInfo fieldInfo, StandardTermState termState, Bits liveDocs) throws IOException {
|
||||
omitTF = fieldInfo.omitTermFreqAndPositions;
|
||||
omitTF = fieldInfo.indexOptions == IndexOptions.DOCS_ONLY;
|
||||
if (omitTF) {
|
||||
freq = 1;
|
||||
}
|
||||
|
@ -455,7 +456,7 @@ public class StandardPostingsReader extends PostingsReaderBase {
|
|||
}
|
||||
|
||||
public SegmentDocsAndPositionsEnum reset(FieldInfo fieldInfo, StandardTermState termState, Bits liveDocs) throws IOException {
|
||||
assert !fieldInfo.omitTermFreqAndPositions;
|
||||
assert fieldInfo.indexOptions == IndexOptions.DOCS_AND_FREQS_AND_POSITIONS;
|
||||
assert !fieldInfo.storePayloads;
|
||||
|
||||
this.liveDocs = liveDocs;
|
||||
|
@ -649,7 +650,7 @@ public class StandardPostingsReader extends PostingsReaderBase {
|
|||
}
|
||||
|
||||
public SegmentDocsAndPositionsAndPayloadsEnum reset(FieldInfo fieldInfo, StandardTermState termState, Bits liveDocs) throws IOException {
|
||||
assert !fieldInfo.omitTermFreqAndPositions;
|
||||
assert fieldInfo.indexOptions == IndexOptions.DOCS_AND_FREQS_AND_POSITIONS;
|
||||
assert fieldInfo.storePayloads;
|
||||
if (payload == null) {
|
||||
payload = new BytesRef();
|
||||
|
|
|
@ -25,6 +25,7 @@ import java.io.IOException;
|
|||
import org.apache.lucene.index.CorruptIndexException;
|
||||
import org.apache.lucene.index.DocsEnum;
|
||||
import org.apache.lucene.index.FieldInfo;
|
||||
import org.apache.lucene.index.FieldInfo.IndexOptions;
|
||||
import org.apache.lucene.index.IndexFileNames;
|
||||
import org.apache.lucene.index.SegmentWriteState;
|
||||
import org.apache.lucene.index.codecs.PostingsWriterBase;
|
||||
|
@ -66,7 +67,7 @@ public final class StandardPostingsWriter extends PostingsWriterBase {
|
|||
final int totalNumDocs;
|
||||
IndexOutput termsOut;
|
||||
|
||||
boolean omitTermFreqAndPositions;
|
||||
IndexOptions indexOptions;
|
||||
boolean storePayloads;
|
||||
// Starts a new term
|
||||
long lastFreqStart;
|
||||
|
@ -144,7 +145,7 @@ public final class StandardPostingsWriter extends PostingsWriterBase {
|
|||
public void setField(FieldInfo fieldInfo) {
|
||||
//System.out.println("SPW: setField");
|
||||
this.fieldInfo = fieldInfo;
|
||||
omitTermFreqAndPositions = fieldInfo.omitTermFreqAndPositions;
|
||||
indexOptions = fieldInfo.indexOptions;
|
||||
storePayloads = fieldInfo.storePayloads;
|
||||
//System.out.println(" set init blockFreqStart=" + freqStart);
|
||||
//System.out.println(" set init blockProxStart=" + proxStart);
|
||||
|
@ -173,7 +174,7 @@ public final class StandardPostingsWriter extends PostingsWriterBase {
|
|||
assert docID < totalNumDocs: "docID=" + docID + " totalNumDocs=" + totalNumDocs;
|
||||
|
||||
lastDocID = docID;
|
||||
if (omitTermFreqAndPositions) {
|
||||
if (indexOptions == IndexOptions.DOCS_ONLY) {
|
||||
freqOut.writeVInt(delta);
|
||||
} else if (1 == termDocFreq) {
|
||||
freqOut.writeVInt((delta<<1) | 1);
|
||||
|
@ -189,7 +190,7 @@ public final class StandardPostingsWriter extends PostingsWriterBase {
|
|||
@Override
|
||||
public void addPosition(int position, BytesRef payload) throws IOException {
|
||||
//System.out.println("StandardW: addPos pos=" + position + " payload=" + (payload == null ? "null" : (payload.length + " bytes")) + " proxFP=" + proxOut.getFilePointer());
|
||||
assert !omitTermFreqAndPositions: "omitTermFreqAndPositions is true";
|
||||
assert indexOptions == IndexOptions.DOCS_AND_FREQS_AND_POSITIONS: "invalid indexOptions: " + indexOptions;
|
||||
assert proxOut != null;
|
||||
|
||||
final int delta = position - lastPosition;
|
||||
|
@ -246,7 +247,7 @@ public final class StandardPostingsWriter extends PostingsWriterBase {
|
|||
bytesWriter.writeVInt((int) (skipListWriter.writeSkip(freqOut)-freqStart));
|
||||
}
|
||||
|
||||
if (!omitTermFreqAndPositions) {
|
||||
if (indexOptions == IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) {
|
||||
//System.out.println(" proxFP=" + proxStart);
|
||||
if (isFirstTerm) {
|
||||
bytesWriter.writeVLong(proxStart);
|
||||
|
|
|
@ -195,7 +195,7 @@ public class MultiPhraseQuery extends Query {
|
|||
if (postingsEnum == null) {
|
||||
if (reader.termDocsEnum(liveDocs, term.field(), term.bytes()) != null) {
|
||||
// term does exist, but has no positions
|
||||
throw new IllegalStateException("field \"" + term.field() + "\" was indexed with Field.omitTermFreqAndPositions=true; cannot run PhraseQuery (term=" + term.text() + ")");
|
||||
throw new IllegalStateException("field \"" + term.field() + "\" was indexed without position data; cannot run PhraseQuery (term=" + term.text() + ")");
|
||||
} else {
|
||||
// term does not exist
|
||||
return null;
|
||||
|
@ -443,7 +443,7 @@ class UnionDocsAndPositionsEnum extends DocsAndPositionsEnum {
|
|||
} else {
|
||||
if (indexReader.termDocsEnum(liveDocs, terms[i].field(), terms[i].bytes()) != null) {
|
||||
// term does exist, but has no positions
|
||||
throw new IllegalStateException("field \"" + terms[i].field() + "\" was indexed with Field.omitTermFreqAndPositions=true; cannot run PhraseQuery (term=" + terms[i].text() + ")");
|
||||
throw new IllegalStateException("field \"" + terms[i].field() + "\" was indexed without position data; cannot run PhraseQuery (term=" + terms[i].text() + ")");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -229,7 +229,7 @@ public class PhraseQuery extends Query {
|
|||
if (postingsEnum == null) {
|
||||
assert (reader.termDocsEnum(liveDocs, t.field(), t.bytes(), state) != null) : "termstate found but no term exists in reader";
|
||||
// term does exist, but has no positions
|
||||
throw new IllegalStateException("field \"" + t.field() + "\" was indexed with Field.omitTermFreqAndPositions=true; cannot run PhraseQuery (term=" + t.text() + ")");
|
||||
throw new IllegalStateException("field \"" + t.field() + "\" was indexed without position data; cannot run PhraseQuery (term=" + t.text() + ")");
|
||||
}
|
||||
// get the docFreq without seeking
|
||||
TermsEnum te = reader.fields().terms(field).getThreadTermsEnum();
|
||||
|
|
|
@ -92,7 +92,7 @@ public class SpanTermQuery extends SpanQuery {
|
|||
} else {
|
||||
if (reader.termDocsEnum(reader.getLiveDocs(), term.field(), term.bytes()) != null) {
|
||||
// term does exist, but has no positions
|
||||
throw new IllegalStateException("field \"" + term.field() + "\" was indexed with Field.omitTermFreqAndPositions=true; cannot run SpanTermQuery (term=" + term.text() + ")");
|
||||
throw new IllegalStateException("field \"" + term.field() + "\" was indexed without position data; cannot run SpanTermQuery (term=" + term.text() + ")");
|
||||
} else {
|
||||
// term does not exist
|
||||
return TermSpans.EMPTY_TERM_SPANS;
|
||||
|
|
|
@ -99,6 +99,10 @@
|
|||
to stored fields file, previously they were stored in
|
||||
text format only.
|
||||
</p>
|
||||
<p>
|
||||
In version 3.4, fields can omit position data while
|
||||
still indexing term frequencies.
|
||||
</p>
|
||||
</section>
|
||||
|
||||
<section id="Definitions"><title>Definitions</title>
|
||||
|
@ -276,7 +280,7 @@
|
|||
<p>Term Frequency
|
||||
data. For each term in the dictionary, the numbers of all the
|
||||
documents that contain that term, and the frequency of the term in
|
||||
that document if omitTf is false.
|
||||
that document, unless frequencies are omitted (IndexOptions.DOCS_ONLY)
|
||||
</p>
|
||||
</li>
|
||||
|
||||
|
@ -284,8 +288,7 @@
|
|||
<p>Term Proximity
|
||||
data. For each term in the dictionary, the positions that the term
|
||||
occurs in each document. Note that this will
|
||||
not exist if all fields in all documents set
|
||||
omitTf to true.
|
||||
not exist if all fields in all documents omit position data.
|
||||
</p>
|
||||
</li>
|
||||
|
||||
|
@ -1080,7 +1083,7 @@
|
|||
|
||||
<p>
|
||||
HasProx is 1 if any fields in this segment have
|
||||
omitTf set to false; else, it's 0.
|
||||
position data (IndexOptions.DOCS_AND_FREQS_AND_POSITIONS); else, it's 0.
|
||||
</p>
|
||||
|
||||
<p>
|
||||
|
@ -1217,11 +1220,13 @@
|
|||
<li>If the fourth lowest-order bit is set (0x08), term offsets are stored with the term vectors.</li>
|
||||
<li>If the fifth lowest-order bit is set (0x10), norms are omitted for the indexed field.</li>
|
||||
<li>If the sixth lowest-order bit is set (0x20), payloads are stored for the indexed field.</li>
|
||||
<li>If the seventh lowest-order bit is set (0x40), term frequencies and positions omitted for the indexed field.</li>
|
||||
<li>If the eighth lowest-order bit is set (0x80), positions are omitted for the indexed field.</li>
|
||||
</ul>
|
||||
</p>
|
||||
|
||||
<p>
|
||||
FNMVersion (added in 2.9) is always -2.
|
||||
FNMVersion (added in 2.9) is -2 for indexes from 2.9 - 3.3. It is -3 for indexes in Lucene 3.4+
|
||||
</p>
|
||||
|
||||
<p>
|
||||
|
@ -1419,7 +1424,7 @@
|
|||
file. In particular, it is the difference between the position of
|
||||
this term's data in that file and the position of the previous
|
||||
term's data (or zero, for the first term in the file. For fields
|
||||
with omitTf true, this will be 0 since
|
||||
that omit position data, this will be 0 since
|
||||
prox information is not stored.
|
||||
</p>
|
||||
<p>SkipDelta determines the position of this
|
||||
|
@ -1494,7 +1499,7 @@
|
|||
<p>
|
||||
The .frq file contains the lists of documents
|
||||
which contain each term, along with the frequency of the term in that
|
||||
document (if omitTf is false).
|
||||
document (except when frequencies are omitted: IndexOptions.DOCS_ONLY).
|
||||
</p>
|
||||
<p>FreqFile (.frq) -->
|
||||
<TermFreqs, SkipData>
|
||||
|
@ -1531,26 +1536,26 @@
|
|||
<p>TermFreq
|
||||
entries are ordered by increasing document number.
|
||||
</p>
|
||||
<p>DocDelta: if omitTf is false, this determines both
|
||||
<p>DocDelta: if frequencies are indexed, this determines both
|
||||
the document number and the frequency. In
|
||||
particular, DocDelta/2 is the difference between
|
||||
this document number and the previous document
|
||||
number (or zero when this is the first document in
|
||||
a TermFreqs). When DocDelta is odd, the frequency
|
||||
is one. When DocDelta is even, the frequency is
|
||||
read as another VInt. If omitTf is true, DocDelta
|
||||
read as another VInt. If frequencies are omitted, DocDelta
|
||||
contains the gap (not multiplied by 2) between
|
||||
document numbers and no frequency information is
|
||||
stored.
|
||||
</p>
|
||||
<p>For example, the TermFreqs for a term which occurs
|
||||
once in document seven and three times in document
|
||||
eleven, with omitTf false, would be the following
|
||||
eleven, with frequencies indexed, would be the following
|
||||
sequence of VInts:
|
||||
</p>
|
||||
<p>15, 8, 3
|
||||
</p>
|
||||
<p> If omitTf were true it would be this sequence
|
||||
<p> If frequencies were omitted (IndexOptions.DOCS_ONLY) it would be this sequence
|
||||
of VInts instead:
|
||||
</p>
|
||||
<p>
|
||||
|
@ -1621,9 +1626,9 @@
|
|||
<p>
|
||||
The .prx file contains the lists of positions that
|
||||
each term occurs at within documents. Note that
|
||||
fields with omitTf true do not store
|
||||
fields omitting positional data do not store
|
||||
anything into this file, and if all fields in the
|
||||
index have omitTf true then the .prx file will not
|
||||
index omit positional data then the .prx file will not
|
||||
exist.
|
||||
</p>
|
||||
<p>ProxFile (.prx) -->
|
||||
|
|
|
@ -29,6 +29,7 @@ import org.apache.lucene.analysis.MockTokenizer;
|
|||
import org.apache.lucene.document.Document;
|
||||
import org.apache.lucene.document.Field;
|
||||
import org.apache.lucene.document.Fieldable;
|
||||
import org.apache.lucene.index.FieldInfo.IndexOptions;
|
||||
import org.apache.lucene.search.SimilarityProvider;
|
||||
import org.apache.lucene.store.Directory;
|
||||
import org.apache.lucene.util.LuceneTestCase;
|
||||
|
@ -67,7 +68,7 @@ class DocHelper {
|
|||
public static Field noTFField = new Field(NO_TF_KEY, NO_TF_TEXT,
|
||||
Field.Store.YES, Field.Index.ANALYZED);
|
||||
static {
|
||||
noTFField.setOmitTermFreqAndPositions(true);
|
||||
noTFField.setIndexOptions(IndexOptions.DOCS_ONLY);
|
||||
}
|
||||
|
||||
public static final String UNINDEXED_FIELD_TEXT = "unindexed field text";
|
||||
|
@ -173,7 +174,7 @@ class DocHelper {
|
|||
if (f.isStored()) add(stored,f);
|
||||
else add(unstored,f);
|
||||
if (f.getOmitNorms()) add(noNorms,f);
|
||||
if (f.getOmitTermFreqAndPositions()) add(noTf,f);
|
||||
if (f.getIndexOptions() == IndexOptions.DOCS_ONLY) add(noTf,f);
|
||||
if (f.isLazy()) add(lazy, f);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -22,6 +22,7 @@ import java.util.Comparator;
|
|||
|
||||
import org.apache.lucene.index.CorruptIndexException;
|
||||
import org.apache.lucene.index.FieldInfo;
|
||||
import org.apache.lucene.index.FieldInfo.IndexOptions;
|
||||
import org.apache.lucene.index.IndexFileNames;
|
||||
import org.apache.lucene.index.SegmentWriteState;
|
||||
import org.apache.lucene.index.codecs.FieldsConsumer;
|
||||
|
@ -90,7 +91,7 @@ class PreFlexFieldsWriter extends FieldsConsumer {
|
|||
|
||||
public PreFlexTermsWriter(FieldInfo fieldInfo) {
|
||||
this.fieldInfo = fieldInfo;
|
||||
omitTF = fieldInfo.omitTermFreqAndPositions;
|
||||
omitTF = fieldInfo.indexOptions == IndexOptions.DOCS_ONLY;
|
||||
storePayloads = fieldInfo.storePayloads;
|
||||
}
|
||||
|
||||
|
|
|
@ -422,7 +422,7 @@ public class _TestUtil {
|
|||
List<Fieldable> fields = doc.getFields();
|
||||
for (Fieldable field : fields) {
|
||||
fieldInfos.addOrUpdate(field.name(), field.isIndexed(), field.isTermVectorStored(), field.isStorePositionWithTermVector(),
|
||||
field.isStoreOffsetWithTermVector(), field.getOmitNorms(), false, field.getOmitTermFreqAndPositions(), field.docValuesType());
|
||||
field.isStoreOffsetWithTermVector(), field.getOmitNorms(), false, field.getIndexOptions(), field.docValuesType());
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -507,7 +507,7 @@ public class _TestUtil {
|
|||
field1.isStored() ? Field.Store.YES : Field.Store.NO,
|
||||
field1.isIndexed() ? (field1.isTokenized() ? Field.Index.ANALYZED : Field.Index.NOT_ANALYZED) : Field.Index.NO);
|
||||
field2.setOmitNorms(field1.getOmitNorms());
|
||||
field2.setOmitTermFreqAndPositions(field1.getOmitTermFreqAndPositions());
|
||||
field2.setIndexOptions(field1.getIndexOptions());
|
||||
doc2.add(field2);
|
||||
}
|
||||
|
||||
|
|
|
@ -24,6 +24,7 @@ import org.apache.lucene.analysis.TokenStream;
|
|||
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
|
||||
import org.apache.lucene.document.Document;
|
||||
import org.apache.lucene.document.Field;
|
||||
import org.apache.lucene.index.FieldInfo.IndexOptions;
|
||||
import org.apache.lucene.index.codecs.CodecProvider;
|
||||
import org.apache.lucene.store.MockDirectoryWrapper;
|
||||
import org.apache.lucene.util.LuceneTestCase;
|
||||
|
@ -62,7 +63,7 @@ public class Test2BPostings extends LuceneTestCase {
|
|||
|
||||
Document doc = new Document();
|
||||
Field field = new Field("field", new MyTokenStream());
|
||||
field.setOmitTermFreqAndPositions(true);
|
||||
field.setIndexOptions(IndexOptions.DOCS_ONLY);
|
||||
field.setOmitNorms(true);
|
||||
doc.add(field);
|
||||
|
||||
|
|
|
@ -23,6 +23,7 @@ import org.apache.lucene.search.*;
|
|||
import org.apache.lucene.analysis.*;
|
||||
import org.apache.lucene.analysis.tokenattributes.*;
|
||||
import org.apache.lucene.document.*;
|
||||
import org.apache.lucene.index.FieldInfo.IndexOptions;
|
||||
import org.apache.lucene.index.codecs.CodecProvider;
|
||||
import java.io.File;
|
||||
import java.io.IOException;
|
||||
|
@ -177,7 +178,7 @@ public class Test2BTerms extends LuceneTestCase {
|
|||
Document doc = new Document();
|
||||
final MyTokenStream ts = new MyTokenStream(random, TERMS_PER_DOC);
|
||||
Field field = new Field("field", ts);
|
||||
field.setOmitTermFreqAndPositions(true);
|
||||
field.setIndexOptions(IndexOptions.DOCS_ONLY);
|
||||
field.setOmitNorms(true);
|
||||
doc.add(field);
|
||||
//w.setInfoStream(System.out);
|
||||
|
|
|
@ -31,6 +31,7 @@ import org.apache.lucene.document.Document;
|
|||
import org.apache.lucene.document.Field;
|
||||
import org.apache.lucene.document.Fieldable;
|
||||
import org.apache.lucene.document.NumericField;
|
||||
import org.apache.lucene.index.FieldInfo.IndexOptions;
|
||||
import org.apache.lucene.index.IndexWriterConfig.OpenMode;
|
||||
import org.apache.lucene.search.DefaultSimilarity;
|
||||
import org.apache.lucene.search.DocIdSetIterator;
|
||||
|
@ -606,10 +607,10 @@ public class TestBackwardsCompatibility extends LuceneTestCase {
|
|||
private void addNoProxDoc(IndexWriter writer) throws IOException {
|
||||
Document doc = new Document();
|
||||
Field f = new Field("content3", "aaa", Field.Store.YES, Field.Index.ANALYZED);
|
||||
f.setOmitTermFreqAndPositions(true);
|
||||
f.setIndexOptions(IndexOptions.DOCS_ONLY);
|
||||
doc.add(f);
|
||||
f = new Field("content4", "aaa", Field.Store.YES, Field.Index.NO);
|
||||
f.setOmitTermFreqAndPositions(true);
|
||||
f.setIndexOptions(IndexOptions.DOCS_ONLY);
|
||||
doc.add(f);
|
||||
writer.addDocument(doc);
|
||||
}
|
||||
|
|
|
@ -25,6 +25,7 @@ import org.apache.lucene.analysis.MockAnalyzer;
|
|||
import org.apache.lucene.document.Document;
|
||||
import org.apache.lucene.document.Field;
|
||||
import org.apache.lucene.document.Field.Store;
|
||||
import org.apache.lucene.index.FieldInfo.IndexOptions;
|
||||
import org.apache.lucene.index.codecs.CodecProvider;
|
||||
import org.apache.lucene.index.codecs.FieldsConsumer;
|
||||
import org.apache.lucene.index.codecs.FieldsProducer;
|
||||
|
@ -84,7 +85,8 @@ public class TestCodecs extends LuceneTestCase {
|
|||
this.storePayloads = storePayloads;
|
||||
fieldInfos.addOrUpdate(name, true);
|
||||
fieldInfo = fieldInfos.fieldInfo(name);
|
||||
fieldInfo.omitTermFreqAndPositions = omitTF;
|
||||
// TODO: change this test to use all three
|
||||
fieldInfo.indexOptions = omitTF ? IndexOptions.DOCS_ONLY : IndexOptions.DOCS_AND_FREQS_AND_POSITIONS;
|
||||
fieldInfo.storePayloads = storePayloads;
|
||||
this.terms = terms;
|
||||
for(int i=0;i<terms.length;i++)
|
||||
|
|
|
@ -34,6 +34,7 @@ import org.apache.lucene.document.Field.Index;
|
|||
import org.apache.lucene.document.Field.Store;
|
||||
import org.apache.lucene.document.Field.TermVector;
|
||||
import org.apache.lucene.document.Fieldable;
|
||||
import org.apache.lucene.index.FieldInfo.IndexOptions;
|
||||
import org.apache.lucene.store.Directory;
|
||||
import org.apache.lucene.store.IOContext;
|
||||
import org.apache.lucene.store.IOContext.Context;
|
||||
|
@ -303,7 +304,7 @@ public class TestDocumentWriter extends LuceneTestCase {
|
|||
doc.add(newField("f1", "v2", Store.YES, Index.NO));
|
||||
// f2 has no TF
|
||||
Field f = newField("f2", "v1", Store.NO, Index.ANALYZED);
|
||||
f.setOmitTermFreqAndPositions(true);
|
||||
f.setIndexOptions(IndexOptions.DOCS_ONLY);
|
||||
doc.add(f);
|
||||
doc.add(newField("f2", "v2", Store.YES, Index.NO));
|
||||
|
||||
|
@ -319,10 +320,10 @@ public class TestDocumentWriter extends LuceneTestCase {
|
|||
FieldInfos fi = reader.fieldInfos();
|
||||
// f1
|
||||
assertFalse("f1 should have no norms", reader.hasNorms("f1"));
|
||||
assertFalse("omitTermFreqAndPositions field bit should not be set for f1", fi.fieldInfo("f1").omitTermFreqAndPositions);
|
||||
assertEquals("omitTermFreqAndPositions field bit should not be set for f1", IndexOptions.DOCS_AND_FREQS_AND_POSITIONS, fi.fieldInfo("f1").indexOptions);
|
||||
// f2
|
||||
assertTrue("f2 should have norms", reader.hasNorms("f2"));
|
||||
assertTrue("omitTermFreqAndPositions field bit should be set for f2", fi.fieldInfo("f2").omitTermFreqAndPositions);
|
||||
assertEquals("omitTermFreqAndPositions field bit should be set for f2", IndexOptions.DOCS_ONLY, fi.fieldInfo("f2").indexOptions);
|
||||
reader.close();
|
||||
}
|
||||
}
|
||||
|
|
|
@ -20,6 +20,7 @@ package org.apache.lucene.index;
|
|||
import org.apache.lucene.util.LuceneTestCase;
|
||||
import org.apache.lucene.util._TestUtil;
|
||||
import org.apache.lucene.document.Document;
|
||||
import org.apache.lucene.index.FieldInfo.IndexOptions;
|
||||
import org.apache.lucene.store.Directory;
|
||||
import org.apache.lucene.store.IndexOutput;
|
||||
|
||||
|
@ -137,7 +138,7 @@ public class TestFieldInfos extends LuceneTestCase {
|
|||
try {
|
||||
readOnly.addOrUpdate("bogus", random.nextBoolean(), random.nextBoolean(),
|
||||
random.nextBoolean(), random.nextBoolean(), random.nextBoolean(),
|
||||
random.nextBoolean(), random.nextBoolean(), null);
|
||||
random.nextBoolean(), random.nextBoolean() ? IndexOptions.DOCS_ONLY : IndexOptions.DOCS_AND_FREQS_AND_POSITIONS, null);
|
||||
fail("instance should be read only");
|
||||
} catch (IllegalStateException e) {
|
||||
// expected
|
||||
|
|
|
@ -30,6 +30,7 @@ import org.apache.lucene.document.FieldSelectorResult;
|
|||
import org.apache.lucene.document.Fieldable;
|
||||
import org.apache.lucene.document.LoadFirstFieldSelector;
|
||||
import org.apache.lucene.document.SetBasedFieldSelector;
|
||||
import org.apache.lucene.index.FieldInfo.IndexOptions;
|
||||
import org.apache.lucene.index.IndexWriterConfig.OpenMode;
|
||||
import org.apache.lucene.search.FieldCache;
|
||||
import org.apache.lucene.store.AlreadyClosedException;
|
||||
|
@ -91,7 +92,7 @@ public class TestFieldsReader extends LuceneTestCase {
|
|||
assertTrue(field.isStoreOffsetWithTermVector() == true);
|
||||
assertTrue(field.isStorePositionWithTermVector() == true);
|
||||
assertTrue(field.getOmitNorms() == false);
|
||||
assertTrue(field.getOmitTermFreqAndPositions() == false);
|
||||
assertTrue(field.getIndexOptions() == IndexOptions.DOCS_AND_FREQS_AND_POSITIONS);
|
||||
|
||||
field = doc.getField(DocHelper.TEXT_FIELD_3_KEY);
|
||||
assertTrue(field != null);
|
||||
|
@ -99,7 +100,7 @@ public class TestFieldsReader extends LuceneTestCase {
|
|||
assertTrue(field.isStoreOffsetWithTermVector() == false);
|
||||
assertTrue(field.isStorePositionWithTermVector() == false);
|
||||
assertTrue(field.getOmitNorms() == true);
|
||||
assertTrue(field.getOmitTermFreqAndPositions() == false);
|
||||
assertTrue(field.getIndexOptions() == IndexOptions.DOCS_AND_FREQS_AND_POSITIONS);
|
||||
|
||||
field = doc.getField(DocHelper.NO_TF_KEY);
|
||||
assertTrue(field != null);
|
||||
|
@ -107,7 +108,7 @@ public class TestFieldsReader extends LuceneTestCase {
|
|||
assertTrue(field.isStoreOffsetWithTermVector() == false);
|
||||
assertTrue(field.isStorePositionWithTermVector() == false);
|
||||
assertTrue(field.getOmitNorms() == false);
|
||||
assertTrue(field.getOmitTermFreqAndPositions() == true);
|
||||
assertTrue(field.getIndexOptions() == IndexOptions.DOCS_ONLY);
|
||||
reader.close();
|
||||
}
|
||||
|
||||
|
|
|
@ -26,6 +26,7 @@ import org.apache.lucene.analysis.TokenStream;
|
|||
import org.apache.lucene.analysis.tokenattributes.TermToBytesRefAttribute;
|
||||
import org.apache.lucene.document.Document;
|
||||
import org.apache.lucene.document.Field;
|
||||
import org.apache.lucene.index.FieldInfo.IndexOptions;
|
||||
import org.apache.lucene.index.codecs.CodecProvider;
|
||||
import org.apache.lucene.store.Directory;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
|
@ -63,8 +64,8 @@ public class TestLongPostings extends LuceneTestCase {
|
|||
}
|
||||
|
||||
public void testLongPostings() throws Exception {
|
||||
assumeFalse("Too slow with SimpleText codec", CodecProvider.getDefault().getFieldCodec("field").equals("SimpleText"));
|
||||
assumeFalse("Too slow with Memory codec", CodecProvider.getDefault().getFieldCodec("field").equals("Memory"));
|
||||
assumeFalse("Too slow with SimpleText codec at night", TEST_NIGHTLY && CodecProvider.getDefault().getFieldCodec("field").equals("SimpleText"));
|
||||
assumeFalse("Too slow with Memory codec at night", TEST_NIGHTLY && CodecProvider.getDefault().getFieldCodec("field").equals("Memory"));
|
||||
|
||||
// Don't use _TestUtil.getTempDir so that we own the
|
||||
// randomness (ie same seed will point to same dir):
|
||||
|
@ -250,4 +251,187 @@ public class TestLongPostings extends LuceneTestCase {
|
|||
r.close();
|
||||
dir.close();
|
||||
}
|
||||
|
||||
// a weaker form of testLongPostings, that doesnt check positions
|
||||
public void testLongPostingsNoPositions() throws Exception {
|
||||
doTestLongPostingsNoPositions(IndexOptions.DOCS_ONLY);
|
||||
doTestLongPostingsNoPositions(IndexOptions.DOCS_AND_FREQS);
|
||||
}
|
||||
|
||||
public void doTestLongPostingsNoPositions(IndexOptions options) throws Exception {
|
||||
assumeFalse("Too slow with SimpleText codec at night", TEST_NIGHTLY && CodecProvider.getDefault().getFieldCodec("field").equals("SimpleText"));
|
||||
assumeFalse("Too slow with Memory codec at night", TEST_NIGHTLY && CodecProvider.getDefault().getFieldCodec("field").equals("Memory"));
|
||||
// Don't use _TestUtil.getTempDir so that we own the
|
||||
// randomness (ie same seed will point to same dir):
|
||||
Directory dir = newFSDirectory(_TestUtil.getTempDir("longpostings" + "." + random.nextLong()));
|
||||
|
||||
final int NUM_DOCS = atLeast(2000);
|
||||
|
||||
if (VERBOSE) {
|
||||
System.out.println("TEST: NUM_DOCS=" + NUM_DOCS);
|
||||
}
|
||||
|
||||
final String s1 = getRandomTerm(null);
|
||||
final String s2 = getRandomTerm(s1);
|
||||
|
||||
if (VERBOSE) {
|
||||
System.out.println("\nTEST: s1=" + s1 + " s2=" + s2);
|
||||
/*
|
||||
for(int idx=0;idx<s1.length();idx++) {
|
||||
System.out.println(" s1 ch=0x" + Integer.toHexString(s1.charAt(idx)));
|
||||
}
|
||||
for(int idx=0;idx<s2.length();idx++) {
|
||||
System.out.println(" s2 ch=0x" + Integer.toHexString(s2.charAt(idx)));
|
||||
}
|
||||
*/
|
||||
}
|
||||
|
||||
final FixedBitSet isS1 = new FixedBitSet(NUM_DOCS);
|
||||
for(int idx=0;idx<NUM_DOCS;idx++) {
|
||||
if (random.nextBoolean()) {
|
||||
isS1.set(idx);
|
||||
}
|
||||
}
|
||||
|
||||
final IndexReader r;
|
||||
if (true) {
|
||||
final IndexWriterConfig iwc = newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random))
|
||||
.setOpenMode(IndexWriterConfig.OpenMode.CREATE)
|
||||
.setMergePolicy(newLogMergePolicy());
|
||||
iwc.setRAMBufferSizeMB(16.0 + 16.0 * random.nextDouble());
|
||||
iwc.setMaxBufferedDocs(-1);
|
||||
final RandomIndexWriter riw = new RandomIndexWriter(random, dir, iwc);
|
||||
|
||||
for(int idx=0;idx<NUM_DOCS;idx++) {
|
||||
final Document doc = new Document();
|
||||
String s = isS1.get(idx) ? s1 : s2;
|
||||
final Field f = newField("field", s, Field.Index.ANALYZED);
|
||||
f.setIndexOptions(options);
|
||||
final int count = _TestUtil.nextInt(random, 1, 4);
|
||||
for(int ct=0;ct<count;ct++) {
|
||||
doc.add(f);
|
||||
}
|
||||
riw.addDocument(doc);
|
||||
}
|
||||
|
||||
r = riw.getReader();
|
||||
riw.close();
|
||||
} else {
|
||||
r = IndexReader.open(dir);
|
||||
}
|
||||
|
||||
/*
|
||||
if (VERBOSE) {
|
||||
System.out.println("TEST: terms");
|
||||
TermEnum termEnum = r.terms();
|
||||
while(termEnum.next()) {
|
||||
System.out.println(" term=" + termEnum.term() + " len=" + termEnum.term().text().length());
|
||||
assertTrue(termEnum.docFreq() > 0);
|
||||
System.out.println(" s1?=" + (termEnum.term().text().equals(s1)) + " s1len=" + s1.length());
|
||||
System.out.println(" s2?=" + (termEnum.term().text().equals(s2)) + " s2len=" + s2.length());
|
||||
final String s = termEnum.term().text();
|
||||
for(int idx=0;idx<s.length();idx++) {
|
||||
System.out.println(" ch=0x" + Integer.toHexString(s.charAt(idx)));
|
||||
}
|
||||
}
|
||||
}
|
||||
*/
|
||||
|
||||
assertEquals(NUM_DOCS, r.numDocs());
|
||||
assertTrue(r.docFreq(new Term("field", s1)) > 0);
|
||||
assertTrue(r.docFreq(new Term("field", s2)) > 0);
|
||||
|
||||
int num = atLeast(1000);
|
||||
for(int iter=0;iter<num;iter++) {
|
||||
|
||||
final String term;
|
||||
final boolean doS1;
|
||||
if (random.nextBoolean()) {
|
||||
term = s1;
|
||||
doS1 = true;
|
||||
} else {
|
||||
term = s2;
|
||||
doS1 = false;
|
||||
}
|
||||
|
||||
if (VERBOSE) {
|
||||
System.out.println("\nTEST: iter=" + iter + " doS1=" + doS1);
|
||||
}
|
||||
|
||||
final DocsEnum postings = MultiFields.getTermDocsEnum(r, null, "field", new BytesRef(term));
|
||||
|
||||
int docID = -1;
|
||||
while(docID < DocsEnum.NO_MORE_DOCS) {
|
||||
final int what = random.nextInt(3);
|
||||
if (what == 0) {
|
||||
if (VERBOSE) {
|
||||
System.out.println("TEST: docID=" + docID + "; do next()");
|
||||
}
|
||||
// nextDoc
|
||||
int expected = docID+1;
|
||||
while(true) {
|
||||
if (expected == NUM_DOCS) {
|
||||
expected = Integer.MAX_VALUE;
|
||||
break;
|
||||
} else if (isS1.get(expected) == doS1) {
|
||||
break;
|
||||
} else {
|
||||
expected++;
|
||||
}
|
||||
}
|
||||
docID = postings.nextDoc();
|
||||
if (VERBOSE) {
|
||||
System.out.println(" got docID=" + docID);
|
||||
}
|
||||
assertEquals(expected, docID);
|
||||
if (docID == DocsEnum.NO_MORE_DOCS) {
|
||||
break;
|
||||
}
|
||||
|
||||
if (random.nextInt(6) == 3) {
|
||||
final int freq = postings.freq();
|
||||
assertTrue(freq >=1 && freq <= 4);
|
||||
}
|
||||
} else {
|
||||
// advance
|
||||
final int targetDocID;
|
||||
if (docID == -1) {
|
||||
targetDocID = random.nextInt(NUM_DOCS+1);
|
||||
} else {
|
||||
targetDocID = docID + _TestUtil.nextInt(random, 1, NUM_DOCS - docID);
|
||||
}
|
||||
if (VERBOSE) {
|
||||
System.out.println("TEST: docID=" + docID + "; do advance(" + targetDocID + ")");
|
||||
}
|
||||
int expected = targetDocID;
|
||||
while(true) {
|
||||
if (expected == NUM_DOCS) {
|
||||
expected = Integer.MAX_VALUE;
|
||||
break;
|
||||
} else if (isS1.get(expected) == doS1) {
|
||||
break;
|
||||
} else {
|
||||
expected++;
|
||||
}
|
||||
}
|
||||
|
||||
docID = postings.advance(targetDocID);
|
||||
if (VERBOSE) {
|
||||
System.out.println(" got docID=" + docID);
|
||||
}
|
||||
assertEquals(expected, docID);
|
||||
if (docID == DocsEnum.NO_MORE_DOCS) {
|
||||
break;
|
||||
}
|
||||
|
||||
if (random.nextInt(6) == 3) {
|
||||
final int freq = postings.freq();
|
||||
assertTrue(freq >=1 && freq <= 4);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
r.close();
|
||||
dir.close();
|
||||
}
|
||||
}
|
||||
|
|
|
@ -0,0 +1,232 @@
|
|||
package org.apache.lucene.index;
|
||||
|
||||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import org.apache.lucene.analysis.Analyzer;
|
||||
import org.apache.lucene.analysis.MockAnalyzer;
|
||||
import org.apache.lucene.document.Document;
|
||||
import org.apache.lucene.document.Field;
|
||||
import org.apache.lucene.index.FieldInfo.IndexOptions;
|
||||
import org.apache.lucene.search.DocIdSetIterator;
|
||||
import org.apache.lucene.store.Directory;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.apache.lucene.util.LuceneTestCase;
|
||||
|
||||
/**
|
||||
*
|
||||
* @lucene.experimental
|
||||
*/
|
||||
public class TestOmitPositions extends LuceneTestCase {
|
||||
|
||||
public void testBasic() throws Exception {
|
||||
Directory dir = newDirectory();
|
||||
RandomIndexWriter w = new RandomIndexWriter(random, dir);
|
||||
Document doc = new Document();
|
||||
Field f = newField("foo", "this is a test test", Field.Index.ANALYZED);
|
||||
f.setIndexOptions(IndexOptions.DOCS_AND_FREQS);
|
||||
doc.add(f);
|
||||
for (int i = 0; i < 100; i++) {
|
||||
w.addDocument(doc);
|
||||
}
|
||||
|
||||
IndexReader reader = w.getReader();
|
||||
w.close();
|
||||
|
||||
assertNull(MultiFields.getTermPositionsEnum(reader, null, "foo", new BytesRef("test")));
|
||||
|
||||
DocsEnum de = MultiFields.getTermDocsEnum(reader, null, "foo", new BytesRef("test"));
|
||||
while (de.nextDoc() != DocIdSetIterator.NO_MORE_DOCS) {
|
||||
assertEquals(2, de.freq());
|
||||
}
|
||||
|
||||
reader.close();
|
||||
dir.close();
|
||||
}
|
||||
|
||||
// Tests whether the DocumentWriter correctly enable the
|
||||
// omitTermFreqAndPositions bit in the FieldInfo
|
||||
public void testPositions() throws Exception {
|
||||
Directory ram = newDirectory();
|
||||
Analyzer analyzer = new MockAnalyzer(random);
|
||||
IndexWriter writer = new IndexWriter(ram, newIndexWriterConfig( TEST_VERSION_CURRENT, analyzer));
|
||||
Document d = new Document();
|
||||
|
||||
// f1,f2,f3: docs only
|
||||
Field f1 = newField("f1", "This field has docs only", Field.Store.NO, Field.Index.ANALYZED);
|
||||
f1.setIndexOptions(IndexOptions.DOCS_ONLY);
|
||||
d.add(f1);
|
||||
|
||||
Field f2 = newField("f2", "This field has docs only", Field.Store.NO, Field.Index.ANALYZED);
|
||||
f2.setIndexOptions(IndexOptions.DOCS_ONLY);
|
||||
d.add(f2);
|
||||
|
||||
Field f3 = newField("f3", "This field has docs only", Field.Store.NO, Field.Index.ANALYZED);
|
||||
f3.setIndexOptions(IndexOptions.DOCS_ONLY);
|
||||
d.add(f3);
|
||||
|
||||
// f4,f5,f6 docs and freqs
|
||||
Field f4 = newField("f4", "This field has docs and freqs", Field.Store.NO, Field.Index.ANALYZED);
|
||||
f4.setIndexOptions(IndexOptions.DOCS_AND_FREQS);
|
||||
d.add(f4);
|
||||
|
||||
Field f5 = newField("f5", "This field has docs and freqs", Field.Store.NO, Field.Index.ANALYZED);
|
||||
f5.setIndexOptions(IndexOptions.DOCS_AND_FREQS);
|
||||
d.add(f5);
|
||||
|
||||
Field f6 = newField("f6", "This field has docs and freqs", Field.Store.NO, Field.Index.ANALYZED);
|
||||
f6.setIndexOptions(IndexOptions.DOCS_AND_FREQS);
|
||||
d.add(f6);
|
||||
|
||||
// f7,f8,f9 docs/freqs/positions
|
||||
Field f7 = newField("f7", "This field has docs and freqs and positions", Field.Store.NO, Field.Index.ANALYZED);
|
||||
f7.setIndexOptions(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS);
|
||||
d.add(f7);
|
||||
|
||||
Field f8 = newField("f8", "This field has docs and freqs and positions", Field.Store.NO, Field.Index.ANALYZED);
|
||||
f8.setIndexOptions(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS);
|
||||
d.add(f8);
|
||||
|
||||
Field f9 = newField("f9", "This field has docs and freqs and positions", Field.Store.NO, Field.Index.ANALYZED);
|
||||
f9.setIndexOptions(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS);
|
||||
d.add(f9);
|
||||
|
||||
writer.addDocument(d);
|
||||
writer.optimize();
|
||||
|
||||
// now we add another document which has docs-only for f1, f4, f7, docs/freqs for f2, f5, f8,
|
||||
// and docs/freqs/positions for f3, f6, f9
|
||||
d = new Document();
|
||||
|
||||
// f1,f4,f7: docs only
|
||||
f1 = newField("f1", "This field has docs only", Field.Store.NO, Field.Index.ANALYZED);
|
||||
f1.setIndexOptions(IndexOptions.DOCS_ONLY);
|
||||
d.add(f1);
|
||||
|
||||
f4 = newField("f4", "This field has docs only", Field.Store.NO, Field.Index.ANALYZED);
|
||||
f4.setIndexOptions(IndexOptions.DOCS_ONLY);
|
||||
d.add(f4);
|
||||
|
||||
f7 = newField("f7", "This field has docs only", Field.Store.NO, Field.Index.ANALYZED);
|
||||
f7.setIndexOptions(IndexOptions.DOCS_ONLY);
|
||||
d.add(f7);
|
||||
|
||||
// f2, f5, f8: docs and freqs
|
||||
f2 = newField("f2", "This field has docs and freqs", Field.Store.NO, Field.Index.ANALYZED);
|
||||
f2.setIndexOptions(IndexOptions.DOCS_AND_FREQS);
|
||||
d.add(f2);
|
||||
|
||||
f5 = newField("f5", "This field has docs and freqs", Field.Store.NO, Field.Index.ANALYZED);
|
||||
f5.setIndexOptions(IndexOptions.DOCS_AND_FREQS);
|
||||
d.add(f5);
|
||||
|
||||
f8 = newField("f8", "This field has docs and freqs", Field.Store.NO, Field.Index.ANALYZED);
|
||||
f8.setIndexOptions(IndexOptions.DOCS_AND_FREQS);
|
||||
d.add(f8);
|
||||
|
||||
// f3, f6, f9: docs and freqs and positions
|
||||
f3 = newField("f3", "This field has docs and freqs and positions", Field.Store.NO, Field.Index.ANALYZED);
|
||||
f3.setIndexOptions(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS);
|
||||
d.add(f3);
|
||||
|
||||
f6 = newField("f6", "This field has docs and freqs and positions", Field.Store.NO, Field.Index.ANALYZED);
|
||||
f6.setIndexOptions(IndexOptions.DOCS_AND_FREQS);
|
||||
d.add(f6);
|
||||
|
||||
f9 = newField("f9", "This field has docs and freqs and positions", Field.Store.NO, Field.Index.ANALYZED);
|
||||
f9.setIndexOptions(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS);
|
||||
d.add(f9);
|
||||
|
||||
writer.addDocument(d);
|
||||
|
||||
// force merge
|
||||
writer.optimize();
|
||||
// flush
|
||||
writer.close();
|
||||
|
||||
SegmentReader reader = getOnlySegmentReader(IndexReader.open(ram, false));
|
||||
FieldInfos fi = reader.fieldInfos();
|
||||
// docs + docs = docs
|
||||
assertEquals(IndexOptions.DOCS_ONLY, fi.fieldInfo("f1").indexOptions);
|
||||
// docs + docs/freqs = docs
|
||||
assertEquals(IndexOptions.DOCS_ONLY, fi.fieldInfo("f2").indexOptions);
|
||||
// docs + docs/freqs/pos = docs
|
||||
assertEquals(IndexOptions.DOCS_ONLY, fi.fieldInfo("f3").indexOptions);
|
||||
// docs/freqs + docs = docs
|
||||
assertEquals(IndexOptions.DOCS_ONLY, fi.fieldInfo("f4").indexOptions);
|
||||
// docs/freqs + docs/freqs = docs/freqs
|
||||
assertEquals(IndexOptions.DOCS_AND_FREQS, fi.fieldInfo("f5").indexOptions);
|
||||
// docs/freqs + docs/freqs/pos = docs/freqs
|
||||
assertEquals(IndexOptions.DOCS_AND_FREQS, fi.fieldInfo("f6").indexOptions);
|
||||
// docs/freqs/pos + docs = docs
|
||||
assertEquals(IndexOptions.DOCS_ONLY, fi.fieldInfo("f7").indexOptions);
|
||||
// docs/freqs/pos + docs/freqs = docs/freqs
|
||||
assertEquals(IndexOptions.DOCS_AND_FREQS, fi.fieldInfo("f8").indexOptions);
|
||||
// docs/freqs/pos + docs/freqs/pos = docs/freqs/pos
|
||||
assertEquals(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS, fi.fieldInfo("f9").indexOptions);
|
||||
|
||||
reader.close();
|
||||
ram.close();
|
||||
}
|
||||
|
||||
private void assertNoPrx(Directory dir) throws Throwable {
|
||||
final String[] files = dir.listAll();
|
||||
for(int i=0;i<files.length;i++) {
|
||||
assertFalse(files[i].endsWith(".prx"));
|
||||
assertFalse(files[i].endsWith(".pos"));
|
||||
}
|
||||
}
|
||||
|
||||
// Verifies no *.prx exists when all fields omit term positions:
|
||||
public void testNoPrxFile() throws Throwable {
|
||||
Directory ram = newDirectory();
|
||||
Analyzer analyzer = new MockAnalyzer(random);
|
||||
IndexWriter writer = new IndexWriter(ram, newIndexWriterConfig(
|
||||
TEST_VERSION_CURRENT, analyzer).setMaxBufferedDocs(3).setMergePolicy(newLogMergePolicy()));
|
||||
LogMergePolicy lmp = (LogMergePolicy) writer.getConfig().getMergePolicy();
|
||||
lmp.setMergeFactor(2);
|
||||
lmp.setUseCompoundFile(false);
|
||||
Document d = new Document();
|
||||
|
||||
Field f1 = newField("f1", "This field has term freqs", Field.Store.NO, Field.Index.ANALYZED);
|
||||
f1.setIndexOptions(IndexOptions.DOCS_AND_FREQS);
|
||||
d.add(f1);
|
||||
|
||||
for(int i=0;i<30;i++)
|
||||
writer.addDocument(d);
|
||||
|
||||
writer.commit();
|
||||
|
||||
assertNoPrx(ram);
|
||||
|
||||
// now add some documents with positions, and check there is no prox after optimization
|
||||
d = new Document();
|
||||
f1 = newField("f1", "This field has positions", Field.Store.NO, Field.Index.ANALYZED);
|
||||
d.add(f1);
|
||||
|
||||
for(int i=0;i<30;i++)
|
||||
writer.addDocument(d);
|
||||
|
||||
// force merge
|
||||
writer.optimize();
|
||||
// flush
|
||||
writer.close();
|
||||
|
||||
assertNoPrx(ram);
|
||||
ram.close();
|
||||
}
|
||||
}
|
|
@ -26,6 +26,7 @@ import org.apache.lucene.analysis.Analyzer;
|
|||
import org.apache.lucene.analysis.MockAnalyzer;
|
||||
import org.apache.lucene.document.Document;
|
||||
import org.apache.lucene.document.Field;
|
||||
import org.apache.lucene.index.FieldInfo.IndexOptions;
|
||||
import org.apache.lucene.index.IndexReader.AtomicReaderContext;
|
||||
import org.apache.lucene.search.*;
|
||||
import org.apache.lucene.search.BooleanClause.Occur;
|
||||
|
@ -65,7 +66,7 @@ public class TestOmitTf extends LuceneTestCase {
|
|||
|
||||
// this field will NOT have Tf
|
||||
Field f2 = newField("f2", "This field has NO Tf in all docs", Field.Store.NO, Field.Index.ANALYZED);
|
||||
f2.setOmitTermFreqAndPositions(true);
|
||||
f2.setIndexOptions(IndexOptions.DOCS_ONLY);
|
||||
d.add(f2);
|
||||
|
||||
writer.addDocument(d);
|
||||
|
@ -75,10 +76,10 @@ public class TestOmitTf extends LuceneTestCase {
|
|||
d = new Document();
|
||||
|
||||
// Reverse
|
||||
f1.setOmitTermFreqAndPositions(true);
|
||||
f1.setIndexOptions(IndexOptions.DOCS_ONLY);
|
||||
d.add(f1);
|
||||
|
||||
f2.setOmitTermFreqAndPositions(false);
|
||||
f2.setIndexOptions(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS);
|
||||
d.add(f2);
|
||||
|
||||
writer.addDocument(d);
|
||||
|
@ -90,8 +91,8 @@ public class TestOmitTf extends LuceneTestCase {
|
|||
|
||||
SegmentReader reader = getOnlySegmentReader(IndexReader.open(ram, false));
|
||||
FieldInfos fi = reader.fieldInfos();
|
||||
assertTrue("OmitTermFreqAndPositions field bit should be set.", fi.fieldInfo("f1").omitTermFreqAndPositions);
|
||||
assertTrue("OmitTermFreqAndPositions field bit should be set.", fi.fieldInfo("f2").omitTermFreqAndPositions);
|
||||
assertEquals("OmitTermFreqAndPositions field bit should be set.", IndexOptions.DOCS_ONLY, fi.fieldInfo("f1").indexOptions);
|
||||
assertEquals("OmitTermFreqAndPositions field bit should be set.", IndexOptions.DOCS_ONLY, fi.fieldInfo("f2").indexOptions);
|
||||
|
||||
reader.close();
|
||||
ram.close();
|
||||
|
@ -117,7 +118,7 @@ public class TestOmitTf extends LuceneTestCase {
|
|||
|
||||
// this field will NOT have Tf
|
||||
Field f2 = newField("f2", "This field has NO Tf in all docs", Field.Store.NO, Field.Index.ANALYZED);
|
||||
f2.setOmitTermFreqAndPositions(true);
|
||||
f2.setIndexOptions(IndexOptions.DOCS_ONLY);
|
||||
d.add(f2);
|
||||
|
||||
for(int i=0;i<30;i++)
|
||||
|
@ -128,10 +129,10 @@ public class TestOmitTf extends LuceneTestCase {
|
|||
d = new Document();
|
||||
|
||||
// Reverese
|
||||
f1.setOmitTermFreqAndPositions(true);
|
||||
f1.setIndexOptions(IndexOptions.DOCS_ONLY);
|
||||
d.add(f1);
|
||||
|
||||
f2.setOmitTermFreqAndPositions(false);
|
||||
f2.setIndexOptions(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS);
|
||||
d.add(f2);
|
||||
|
||||
for(int i=0;i<30;i++)
|
||||
|
@ -144,8 +145,8 @@ public class TestOmitTf extends LuceneTestCase {
|
|||
|
||||
SegmentReader reader = getOnlySegmentReader(IndexReader.open(ram, false));
|
||||
FieldInfos fi = reader.fieldInfos();
|
||||
assertTrue("OmitTermFreqAndPositions field bit should be set.", fi.fieldInfo("f1").omitTermFreqAndPositions);
|
||||
assertTrue("OmitTermFreqAndPositions field bit should be set.", fi.fieldInfo("f2").omitTermFreqAndPositions);
|
||||
assertEquals("OmitTermFreqAndPositions field bit should be set.", IndexOptions.DOCS_ONLY, fi.fieldInfo("f1").indexOptions);
|
||||
assertEquals("OmitTermFreqAndPositions field bit should be set.", IndexOptions.DOCS_ONLY, fi.fieldInfo("f2").indexOptions);
|
||||
|
||||
reader.close();
|
||||
ram.close();
|
||||
|
@ -176,7 +177,7 @@ public class TestOmitTf extends LuceneTestCase {
|
|||
for(int i=0;i<5;i++)
|
||||
writer.addDocument(d);
|
||||
|
||||
f2.setOmitTermFreqAndPositions(true);
|
||||
f2.setIndexOptions(IndexOptions.DOCS_ONLY);
|
||||
|
||||
for(int i=0;i<20;i++)
|
||||
writer.addDocument(d);
|
||||
|
@ -189,8 +190,8 @@ public class TestOmitTf extends LuceneTestCase {
|
|||
|
||||
SegmentReader reader = getOnlySegmentReader(IndexReader.open(ram, false));
|
||||
FieldInfos fi = reader.fieldInfos();
|
||||
assertTrue("OmitTermFreqAndPositions field bit should not be set.", !fi.fieldInfo("f1").omitTermFreqAndPositions);
|
||||
assertTrue("OmitTermFreqAndPositions field bit should be set.", fi.fieldInfo("f2").omitTermFreqAndPositions);
|
||||
assertEquals("OmitTermFreqAndPositions field bit should not be set.", IndexOptions.DOCS_AND_FREQS_AND_POSITIONS, fi.fieldInfo("f1").indexOptions);
|
||||
assertEquals("OmitTermFreqAndPositions field bit should be set.", IndexOptions.DOCS_ONLY, fi.fieldInfo("f2").indexOptions);
|
||||
|
||||
reader.close();
|
||||
ram.close();
|
||||
|
@ -198,8 +199,10 @@ public class TestOmitTf extends LuceneTestCase {
|
|||
|
||||
private void assertNoPrx(Directory dir) throws Throwable {
|
||||
final String[] files = dir.listAll();
|
||||
for(int i=0;i<files.length;i++)
|
||||
for(int i=0;i<files.length;i++) {
|
||||
assertFalse(files[i].endsWith(".prx"));
|
||||
assertFalse(files[i].endsWith(".pos"));
|
||||
}
|
||||
}
|
||||
|
||||
// Verifies no *.prx exists when all fields omit term freq:
|
||||
|
@ -213,8 +216,8 @@ public class TestOmitTf extends LuceneTestCase {
|
|||
lmp.setUseCompoundFile(false);
|
||||
Document d = new Document();
|
||||
|
||||
Field f1 = newField("f1", "This field has term freqs", Field.Store.NO, Field.Index.ANALYZED);
|
||||
f1.setOmitTermFreqAndPositions(true);
|
||||
Field f1 = newField("f1", "This field has no term freqs", Field.Store.NO, Field.Index.ANALYZED);
|
||||
f1.setIndexOptions(IndexOptions.DOCS_ONLY);
|
||||
d.add(f1);
|
||||
|
||||
for(int i=0;i<30;i++)
|
||||
|
@ -224,6 +227,14 @@ public class TestOmitTf extends LuceneTestCase {
|
|||
|
||||
assertNoPrx(ram);
|
||||
|
||||
// now add some documents with positions, and check there is no prox after optimization
|
||||
d = new Document();
|
||||
f1 = newField("f1", "This field has positions", Field.Store.NO, Field.Index.ANALYZED);
|
||||
d.add(f1);
|
||||
|
||||
for(int i=0;i<30;i++)
|
||||
writer.addDocument(d);
|
||||
|
||||
// force merge
|
||||
writer.optimize();
|
||||
// flush
|
||||
|
@ -253,7 +264,7 @@ public class TestOmitTf extends LuceneTestCase {
|
|||
sb.append(term).append(" ");
|
||||
String content = sb.toString();
|
||||
Field noTf = newField("noTf", content + (i%2==0 ? "" : " notf"), Field.Store.NO, Field.Index.ANALYZED);
|
||||
noTf.setOmitTermFreqAndPositions(true);
|
||||
noTf.setIndexOptions(IndexOptions.DOCS_ONLY);
|
||||
d.add(noTf);
|
||||
|
||||
Field tf = newField("tf", content + (i%2==0 ? " tf" : ""), Field.Store.NO, Field.Index.ANALYZED);
|
||||
|
|
|
@ -21,6 +21,7 @@ import org.apache.lucene.document.Field.Index;
|
|||
import org.apache.lucene.document.Field.Store;
|
||||
import org.apache.lucene.index.CorruptIndexException;
|
||||
import org.apache.lucene.index.DocsEnum;
|
||||
import org.apache.lucene.index.FieldInfo.IndexOptions;
|
||||
import org.apache.lucene.index.IndexReader;
|
||||
import org.apache.lucene.index.IndexWriter;
|
||||
import org.apache.lucene.index.IndexWriterConfig;
|
||||
|
@ -181,7 +182,7 @@ public class LuceneTaxonomyWriter implements TaxonomyWriter {
|
|||
parentStreamField = new Field(Consts.FIELD_PAYLOADS, parentStream);
|
||||
parentStreamField.setOmitNorms(true);
|
||||
fullPathField = new Field(Consts.FULL, "", Store.YES, Index.NOT_ANALYZED_NO_NORMS);
|
||||
fullPathField.setOmitTermFreqAndPositions(true);
|
||||
fullPathField.setIndexOptions(IndexOptions.DOCS_ONLY);
|
||||
|
||||
this.nextID = indexWriter.maxDoc();
|
||||
|
||||
|
|
|
@ -24,6 +24,7 @@ import org.apache.lucene.analysis.MockAnalyzer;
|
|||
import org.apache.lucene.document.Document;
|
||||
import org.apache.lucene.document.Field;
|
||||
import org.apache.lucene.document.NumericField;
|
||||
import org.apache.lucene.index.FieldInfo.IndexOptions;
|
||||
import org.apache.lucene.index.IndexReader;
|
||||
import org.apache.lucene.index.RandomIndexWriter;
|
||||
import org.apache.lucene.index.Term;
|
||||
|
@ -395,7 +396,7 @@ public class TestGrouping extends LuceneTestCase {
|
|||
}
|
||||
// So we can pull filter marking last doc in block:
|
||||
final Field groupEnd = newField("groupend", "x", Field.Index.NOT_ANALYZED);
|
||||
groupEnd.setOmitTermFreqAndPositions(true);
|
||||
groupEnd.setIndexOptions(IndexOptions.DOCS_ONLY);
|
||||
groupEnd.setOmitNorms(true);
|
||||
docs.get(docs.size()-1).add(groupEnd);
|
||||
// Add as a doc block:
|
||||
|
|
|
@ -28,6 +28,7 @@ import org.apache.lucene.analysis.Analyzer;
|
|||
import org.apache.lucene.analysis.TokenStream;
|
||||
import org.apache.lucene.document.Document;
|
||||
import org.apache.lucene.document.Field;
|
||||
import org.apache.lucene.index.FieldInfo.IndexOptions;
|
||||
import org.apache.lucene.index.IndexReader;
|
||||
import org.apache.lucene.index.IndexWriter;
|
||||
import org.apache.lucene.index.IndexWriterConfig;
|
||||
|
@ -618,7 +619,7 @@ public class SpellChecker implements java.io.Closeable {
|
|||
// the word field is never queried on... its indexed so it can be quickly
|
||||
// checked for rebuild (and stored for retrieval). Doesn't need norms or TF/pos
|
||||
Field f = new Field(F_WORD, text, Field.Store.YES, Field.Index.NOT_ANALYZED);
|
||||
f.setOmitTermFreqAndPositions(true);
|
||||
f.setIndexOptions(IndexOptions.DOCS_ONLY);
|
||||
f.setOmitNorms(true);
|
||||
doc.add(f); // orig term
|
||||
addGram(text, doc, ng1, ng2);
|
||||
|
@ -636,7 +637,7 @@ public class SpellChecker implements java.io.Closeable {
|
|||
if (i == 0) {
|
||||
// only one term possible in the startXXField, TF/pos and norms aren't needed.
|
||||
Field startField = new Field("start" + ng, gram, Field.Store.NO, Field.Index.NOT_ANALYZED);
|
||||
startField.setOmitTermFreqAndPositions(true);
|
||||
startField.setIndexOptions(IndexOptions.DOCS_ONLY);
|
||||
startField.setOmitNorms(true);
|
||||
doc.add(startField);
|
||||
}
|
||||
|
@ -645,7 +646,7 @@ public class SpellChecker implements java.io.Closeable {
|
|||
if (end != null) { // may not be present if len==ng1
|
||||
// only one term possible in the endXXField, TF/pos and norms aren't needed.
|
||||
Field endField = new Field("end" + ng, end, Field.Store.NO, Field.Index.NOT_ANALYZED);
|
||||
endField.setOmitTermFreqAndPositions(true);
|
||||
endField.setIndexOptions(IndexOptions.DOCS_ONLY);
|
||||
endField.setOmitNorms(true);
|
||||
doc.add(endField);
|
||||
}
|
||||
|
|
|
@ -321,6 +321,8 @@ New Features
|
|||
before adding to the index. Fix a null pointer exception in logging
|
||||
when there was no unique key. (David Smiley via yonik)
|
||||
|
||||
* LUCENE-2048: Added omitPositions to the schema, so you can omit position
|
||||
information while still indexing term frequencies. (rmuir)
|
||||
|
||||
Optimizations
|
||||
----------------------
|
||||
|
|
|
@ -35,6 +35,7 @@ import org.slf4j.LoggerFactory;
|
|||
import org.apache.lucene.analysis.Analyzer;
|
||||
import org.apache.lucene.document.Document;
|
||||
import org.apache.lucene.document.Fieldable;
|
||||
import org.apache.lucene.index.FieldInfo.IndexOptions;
|
||||
import org.apache.lucene.index.IndexReader;
|
||||
import org.apache.lucene.index.Term;
|
||||
import org.apache.lucene.index.Fields;
|
||||
|
@ -202,7 +203,10 @@ public class LukeRequestHandler extends RequestHandlerBase
|
|||
flags.append( (f != null && f.storeTermOffsets() ) ? FieldFlag.TERM_VECTOR_OFFSET.getAbbreviation() : '-' );
|
||||
flags.append( (f != null && f.storeTermPositions() ) ? FieldFlag.TERM_VECTOR_POSITION.getAbbreviation() : '-' );
|
||||
flags.append( (f != null && f.omitNorms()) ? FieldFlag.OMIT_NORMS.getAbbreviation() : '-' );
|
||||
flags.append( (f != null && f.omitTf()) ? FieldFlag.OMIT_TF.getAbbreviation() : '-' );
|
||||
flags.append( (f != null &&
|
||||
f.indexOptions() == IndexOptions.DOCS_ONLY) ? FieldFlag.OMIT_TF.getAbbreviation() : '-' );
|
||||
flags.append( (f != null &&
|
||||
f.indexOptions() == IndexOptions.DOCS_AND_FREQS) ? FieldFlag.OMIT_POSITIONS.getAbbreviation() : '-' );
|
||||
flags.append( (lazy) ? FieldFlag.LAZY.getAbbreviation() : '-' );
|
||||
flags.append( (binary) ? FieldFlag.BINARY.getAbbreviation() : '-' );
|
||||
flags.append( (f != null && f.sortMissingFirst() ) ? FieldFlag.SORT_MISSING_FIRST.getAbbreviation() : '-' );
|
||||
|
|
|
@ -47,13 +47,14 @@ public abstract class FieldProperties {
|
|||
protected final static int SORT_MISSING_LAST = 0x00000800;
|
||||
|
||||
protected final static int REQUIRED = 0x00001000;
|
||||
protected final static int OMIT_POSITIONS = 0x00002000;
|
||||
|
||||
static final String[] propertyNames = {
|
||||
"indexed", "tokenized", "stored",
|
||||
"binary", "omitNorms", "omitTermFreqAndPositions",
|
||||
"termVectors", "termPositions", "termOffsets",
|
||||
"multiValued",
|
||||
"sortMissingFirst","sortMissingLast","required"
|
||||
"sortMissingFirst","sortMissingLast","required", "omitPositions"
|
||||
};
|
||||
|
||||
static final Map<String,Integer> propertyMap = new HashMap<String,Integer>();
|
||||
|
|
|
@ -23,6 +23,7 @@ import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
|
|||
import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
|
||||
import org.apache.lucene.document.Field;
|
||||
import org.apache.lucene.document.Fieldable;
|
||||
import org.apache.lucene.index.FieldInfo.IndexOptions;
|
||||
import org.apache.lucene.index.Term;
|
||||
import org.apache.lucene.queries.function.ValueSource;
|
||||
import org.apache.lucene.search.Query;
|
||||
|
@ -251,7 +252,7 @@ public abstract class FieldType extends FieldProperties {
|
|||
|
||||
return createField(field.getName(), val, getFieldStore(field, val),
|
||||
getFieldIndex(field, val), getFieldTermVec(field, val), field.omitNorms(),
|
||||
field.omitTf(), boost);
|
||||
field.indexOptions(), boost);
|
||||
}
|
||||
|
||||
|
||||
|
@ -269,14 +270,14 @@ public abstract class FieldType extends FieldProperties {
|
|||
* @return the {@link org.apache.lucene.document.Fieldable}.
|
||||
*/
|
||||
protected Fieldable createField(String name, String val, Field.Store storage, Field.Index index,
|
||||
Field.TermVector vec, boolean omitNorms, boolean omitTFPos, float boost){
|
||||
Field.TermVector vec, boolean omitNorms, IndexOptions options, float boost){
|
||||
Field f = new Field(name,
|
||||
val,
|
||||
storage,
|
||||
index,
|
||||
vec);
|
||||
f.setOmitNorms(omitNorms);
|
||||
f.setOmitTermFreqAndPositions(omitTFPos);
|
||||
f.setIndexOptions(options);
|
||||
f.setBoost(boost);
|
||||
return f;
|
||||
}
|
||||
|
|
|
@ -18,6 +18,7 @@ package org.apache.solr.schema;
|
|||
|
||||
import org.apache.lucene.document.Field;
|
||||
import org.apache.lucene.document.Fieldable;
|
||||
import org.apache.lucene.index.FieldInfo.IndexOptions;
|
||||
import org.apache.lucene.index.IndexReader;
|
||||
import org.apache.lucene.index.IndexReader.AtomicReaderContext;
|
||||
import org.apache.lucene.queries.function.DocValues;
|
||||
|
@ -77,7 +78,7 @@ public class LatLonType extends AbstractSubTypeFieldType implements SpatialQuery
|
|||
if (field.stored()) {
|
||||
f[f.length - 1] = createField(field.getName(), externalVal,
|
||||
getFieldStore(field, externalVal), Field.Index.NO, Field.TermVector.NO,
|
||||
false, false, boost);
|
||||
false, IndexOptions.DOCS_AND_FREQS_AND_POSITIONS, boost);
|
||||
}
|
||||
return f;
|
||||
}
|
||||
|
|
|
@ -19,6 +19,7 @@ package org.apache.solr.schema;
|
|||
|
||||
import org.apache.lucene.document.Field;
|
||||
import org.apache.lucene.document.Fieldable;
|
||||
import org.apache.lucene.index.FieldInfo.IndexOptions;
|
||||
import org.apache.lucene.queries.function.ValueSource;
|
||||
import org.apache.lucene.queries.function.valuesource.VectorValueSource;
|
||||
import org.apache.lucene.search.BooleanClause;
|
||||
|
@ -90,7 +91,7 @@ public class PointType extends CoordinateFieldType implements SpatialQueryable {
|
|||
String storedVal = externalVal; // normalize or not?
|
||||
f[f.length - 1] = createField(field.getName(), storedVal,
|
||||
getFieldStore(field, storedVal), Field.Index.NO, Field.TermVector.NO,
|
||||
false, false, boost);
|
||||
false, IndexOptions.DOCS_AND_FREQS_AND_POSITIONS, boost);
|
||||
}
|
||||
|
||||
return f;
|
||||
|
|
|
@ -20,6 +20,7 @@ package org.apache.solr.schema;
|
|||
import org.apache.solr.common.SolrException;
|
||||
import org.apache.solr.common.SolrException.ErrorCode;
|
||||
import org.apache.lucene.document.Fieldable;
|
||||
import org.apache.lucene.index.FieldInfo.IndexOptions;
|
||||
import org.apache.lucene.search.SortField;
|
||||
import org.apache.solr.search.QParser;
|
||||
|
||||
|
@ -81,7 +82,17 @@ public final class SchemaField extends FieldProperties {
|
|||
public boolean storeTermPositions() { return (properties & STORE_TERMPOSITIONS)!=0; }
|
||||
public boolean storeTermOffsets() { return (properties & STORE_TERMOFFSETS)!=0; }
|
||||
public boolean omitNorms() { return (properties & OMIT_NORMS)!=0; }
|
||||
public boolean omitTf() { return (properties & OMIT_TF_POSITIONS)!=0; }
|
||||
|
||||
public IndexOptions indexOptions() {
|
||||
if ((properties & OMIT_TF_POSITIONS) != 0) {
|
||||
return IndexOptions.DOCS_ONLY;
|
||||
} else if ((properties & OMIT_POSITIONS) != 0) {
|
||||
return IndexOptions.DOCS_AND_FREQS;
|
||||
} else {
|
||||
return IndexOptions.DOCS_AND_FREQS_AND_POSITIONS;
|
||||
}
|
||||
}
|
||||
|
||||
public boolean multiValued() { return (properties & MULTIVALUED)!=0; }
|
||||
public boolean sortMissingFirst() { return (properties & SORT_MISSING_FIRST)!=0; }
|
||||
public boolean sortMissingLast() { return (properties & SORT_MISSING_LAST)!=0; }
|
||||
|
@ -215,7 +226,7 @@ public final class SchemaField extends FieldProperties {
|
|||
}
|
||||
|
||||
if (on(falseProps,INDEXED)) {
|
||||
int pp = (INDEXED | OMIT_NORMS | OMIT_TF_POSITIONS
|
||||
int pp = (INDEXED | OMIT_NORMS | OMIT_TF_POSITIONS | OMIT_POSITIONS
|
||||
| STORE_TERMVECTORS | STORE_TERMPOSITIONS | STORE_TERMOFFSETS
|
||||
| SORT_MISSING_FIRST | SORT_MISSING_LAST);
|
||||
if (on(pp,trueProps)) {
|
||||
|
@ -225,6 +236,14 @@ public final class SchemaField extends FieldProperties {
|
|||
|
||||
}
|
||||
|
||||
if (on(falseProps,OMIT_TF_POSITIONS)) {
|
||||
int pp = (OMIT_POSITIONS | OMIT_TF_POSITIONS);
|
||||
if (on(pp, trueProps)) {
|
||||
throw new RuntimeException("SchemaField: " + name + " conflicting indexed field options:" + props);
|
||||
}
|
||||
p &= ~pp;
|
||||
}
|
||||
|
||||
if (on(falseProps,STORE_TERMVECTORS)) {
|
||||
int pp = (STORE_TERMVECTORS | STORE_TERMPOSITIONS | STORE_TERMOFFSETS);
|
||||
if (on(pp,trueProps)) {
|
||||
|
|
|
@ -19,6 +19,7 @@ package org.apache.solr.schema;
|
|||
import org.apache.lucene.document.Fieldable;
|
||||
import org.apache.lucene.document.Field;
|
||||
import org.apache.lucene.document.NumericField;
|
||||
import org.apache.lucene.index.FieldInfo.IndexOptions;
|
||||
import org.apache.lucene.queries.function.ValueSource;
|
||||
import org.apache.lucene.queries.function.valuesource.DoubleFieldSource;
|
||||
import org.apache.lucene.queries.function.valuesource.FloatFieldSource;
|
||||
|
@ -521,7 +522,7 @@ public class TrieField extends FieldType {
|
|||
}
|
||||
|
||||
f.setOmitNorms(field.omitNorms());
|
||||
f.setOmitTermFreqAndPositions(field.omitTf());
|
||||
f.setIndexOptions(field.indexOptions());
|
||||
f.setBoost(boost);
|
||||
return f;
|
||||
}
|
||||
|
|
|
@ -417,6 +417,13 @@
|
|||
<tokenizer class="solr.WhitespaceTokenizerFactory"/>
|
||||
</analyzer>
|
||||
</fieldType>
|
||||
|
||||
<!-- omitPositions example -->
|
||||
<fieldType name="nopositions" class="solr.TextField" omitPositions="true">
|
||||
<analyzer>
|
||||
<tokenizer class="solr.WhitespaceTokenizerFactory"/>
|
||||
</analyzer>
|
||||
</fieldType>
|
||||
</types>
|
||||
|
||||
|
||||
|
@ -526,6 +533,8 @@
|
|||
<field name="sim2text" type="sim2" indexed="true" stored="true"/>
|
||||
<field name="sim3text" type="sim3" indexed="true" stored="true"/>
|
||||
|
||||
<field name="nopositionstext" type="nopositions" indexed="true" stored="true"/>
|
||||
|
||||
<field name="tlong" type="tlong" indexed="true" stored="true" />
|
||||
|
||||
<!-- Dynamic field definitions. If a field name is not found, dynamicFields
|
||||
|
|
|
@ -0,0 +1,60 @@
|
|||
package org.apache.solr.schema;
|
||||
|
||||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import org.apache.solr.SolrTestCaseJ4;
|
||||
import org.junit.BeforeClass;
|
||||
|
||||
public class TestOmitPositions extends SolrTestCaseJ4 {
|
||||
@BeforeClass
|
||||
public static void beforeClass() throws Exception {
|
||||
initCore("solrconfig.xml","schema.xml");
|
||||
// add some docs
|
||||
assertU(adoc("id", "1", "nopositionstext", "this is a test this is only a test", "text", "just another test"));
|
||||
assertU(adoc("id", "2", "nopositionstext", "test test test test test test test test test test test test test", "text", "have a nice day"));
|
||||
assertU(commit());
|
||||
}
|
||||
|
||||
public void testFrequencies() {
|
||||
// doc 2 should be ranked above doc 1
|
||||
assertQ("term query: ",
|
||||
req("fl", "id", "q", "nopositionstext:test"),
|
||||
"//*[@numFound='2']",
|
||||
"//result/doc[1]/int[@name='id'][.=2]",
|
||||
"//result/doc[2]/int[@name='id'][.=1]"
|
||||
);
|
||||
}
|
||||
|
||||
public void testPositions() {
|
||||
// no results should be found:
|
||||
// lucene 3.x: silent failure
|
||||
// lucene 4.x: illegal state exception, field was indexed without positions
|
||||
|
||||
ignoreException("was indexed without position data");
|
||||
try {
|
||||
assertQ("phrase query: ",
|
||||
req("fl", "id", "q", "nopositionstext:\"test test\""),
|
||||
"//*[@numFound='0']"
|
||||
);
|
||||
} catch (Exception expected) {
|
||||
assertTrue(expected.getCause() instanceof IllegalStateException);
|
||||
// in lucene 4.0, queries don't silently fail
|
||||
}
|
||||
resetExceptionIgnores();
|
||||
}
|
||||
}
|
|
@ -31,6 +31,7 @@ public enum FieldFlag {
|
|||
TERM_VECTOR_POSITION('p', "Store Position With TermVector"),
|
||||
OMIT_NORMS('O', "Omit Norms"),
|
||||
OMIT_TF('F', "Omit Tf"),
|
||||
OMIT_POSITIONS('P', "Omit Positions"),
|
||||
LAZY('L', "Lazy"),
|
||||
BINARY('B', "Binary"),
|
||||
SORT_MISSING_FIRST('f', "Sort Missing First"),
|
||||
|
|
Loading…
Reference in New Issue