LUCENE-2048: omit positions but keep term freq

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1145594 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Robert Muir 2011-07-12 13:31:22 +00:00
parent dfc5ce1cff
commit 1c646d24c9
68 changed files with 999 additions and 289 deletions

View File

@ -523,6 +523,15 @@ New Features
(grow on demand if you set/get/clear too-large indices). (Mike
McCandless)
* LUCENE-2048: Added the ability to omit positions but still index
term frequencies, you can now control what is indexed into
the postings via AbstractField.setIndexOptions:
DOCS_ONLY: only documents are indexed: term frequencies and positions are omitted
DOCS_AND_FREQS: only documents and term frequencies are indexed: positions are omitted
DOCS_AND_FREQS_AND_POSITIONS: full postings: documents, frequencies, and positions
AbstractField.setOmitTermFrequenciesAndPositions is deprecated,
you should use DOCS_ONLY instead. (Robert Muir)
Optimizations
* LUCENE-3201, LUCENE-3218: CompoundFileSystem code has been consolidated

View File

@ -22,6 +22,7 @@ import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.NumericField;
import org.apache.lucene.index.FieldInfo.IndexOptions;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig.OpenMode;
import org.apache.lucene.index.IndexWriterConfig;
@ -173,7 +174,7 @@ public class IndexFiles {
// the field into separate words and don't index term frequency
// or positional information:
Field pathField = new Field("path", file.getPath(), Field.Store.YES, Field.Index.NOT_ANALYZED_NO_NORMS);
pathField.setOmitTermFreqAndPositions(true);
pathField.setIndexOptions(IndexOptions.DOCS_ONLY);
doc.add(pathField);
// Add the last modified date of the file a field named "modified".

View File

@ -87,9 +87,7 @@ public class TestNRTManager extends LuceneTestCase {
if (field1.getOmitNorms()) {
field2.setOmitNorms(true);
}
if (field1.getOmitTermFreqAndPositions()) {
field2.setOmitTermFreqAndPositions(true);
}
field2.setIndexOptions(field1.getIndexOptions());
doc2.add(field2);
}

View File

@ -49,7 +49,7 @@ import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
* <pre>
* Field field = new Field(name, new NumericTokenStream(precisionStep).setIntValue(value));
* field.setOmitNorms(true);
* field.setOmitTermFreqAndPositions(true);
* field.setIndexOptions(IndexOptions.DOCS_ONLY);
* document.add(field);
* </pre>
*
@ -60,7 +60,7 @@ import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
* NumericTokenStream stream = new NumericTokenStream(precisionStep);
* Field field = new Field(name, stream);
* field.setOmitNorms(true);
* field.setOmitTermFreqAndPositions(true);
* field.setIndexOptions(IndexOptions.DOCS_ONLY);
* Document document = new Document();
* document.add(field);
*

View File

@ -18,6 +18,7 @@ package org.apache.lucene.document;
import org.apache.lucene.search.PhraseQuery; // for javadocs
import org.apache.lucene.search.spans.SpanQuery; // for javadocs
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.index.FieldInfo.IndexOptions;
import org.apache.lucene.index.FieldInvertState; // for javadocs
import org.apache.lucene.index.values.PerDocFieldValues;
import org.apache.lucene.index.values.ValueType;
@ -39,7 +40,7 @@ public abstract class AbstractField implements Fieldable {
protected boolean isTokenized = true;
protected boolean isBinary = false;
protected boolean lazy = false;
protected boolean omitTermFreqAndPositions = false;
protected IndexOptions indexOptions = IndexOptions.DOCS_AND_FREQS_AND_POSITIONS;
protected float boost = 1.0f;
// the data object for all different kind of field values
protected Object fieldsData = null;
@ -50,7 +51,6 @@ public abstract class AbstractField implements Fieldable {
protected int binaryOffset;
protected PerDocFieldValues docValues;
protected AbstractField()
{
}
@ -208,8 +208,8 @@ public abstract class AbstractField implements Fieldable {
/** True if norms are omitted for this indexed field */
public boolean getOmitNorms() { return omitNorms; }
/** @see #setOmitTermFreqAndPositions */
public boolean getOmitTermFreqAndPositions() { return omitTermFreqAndPositions; }
/** @see #setIndexOptions */
public IndexOptions getIndexOptions() { return indexOptions; }
/** Expert:
*
@ -220,7 +220,7 @@ public abstract class AbstractField implements Fieldable {
/** Expert:
*
* If set, omit term freq, positions and payloads from
* If set, omit term freq, and optionally also positions and payloads from
* postings for this field.
*
* <p><b>NOTE</b>: While this option reduces storage space
@ -229,7 +229,7 @@ public abstract class AbstractField implements Fieldable {
* PhraseQuery} or {@link SpanQuery} subclasses will
* silently fail to find results.
*/
public void setOmitTermFreqAndPositions(boolean omitTermFreqAndPositions) { this.omitTermFreqAndPositions=omitTermFreqAndPositions; }
public void setIndexOptions(IndexOptions indexOptions) { this.indexOptions=indexOptions; }
public boolean isLazy() {
return lazy;
@ -275,8 +275,9 @@ public abstract class AbstractField implements Fieldable {
if (omitNorms) {
result.append(",omitNorms");
}
if (omitTermFreqAndPositions) {
result.append(",omitTermFreqAndPositions");
if (indexOptions != IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) {
result.append(",indexOptions=");
result.append(indexOptions);
}
if (lazy){
result.append(",lazy");

View File

@ -20,6 +20,7 @@ package org.apache.lucene.document;
import java.io.Reader;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.index.FieldInfo.IndexOptions;
import org.apache.lucene.index.IndexWriter;
/**
@ -389,7 +390,8 @@ public final class Field extends AbstractField implements Fieldable {
this.isTokenized = index.isAnalyzed();
this.omitNorms = index.omitNorms();
if (index == Index.NO) {
this.omitTermFreqAndPositions = false;
// note: now this reads even wierder than before
this.indexOptions = IndexOptions.DOCS_AND_FREQS_AND_POSITIONS;
}
this.isBinary = false;
@ -520,7 +522,7 @@ public final class Field extends AbstractField implements Fieldable {
isStored = true;
isIndexed = false;
isTokenized = false;
omitTermFreqAndPositions = false;
indexOptions = IndexOptions.DOCS_AND_FREQS_AND_POSITIONS;
omitNorms = true;
isBinary = true;

View File

@ -17,6 +17,7 @@ package org.apache.lucene.document;
*/
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.index.FieldInfo.IndexOptions;
import org.apache.lucene.index.FieldInvertState; // for javadocs
import org.apache.lucene.index.values.IndexDocValues;
import org.apache.lucene.index.values.PerDocFieldValues;
@ -194,12 +195,12 @@ public interface Fieldable {
*/
abstract byte[] getBinaryValue(byte[] result);
/** @see #setOmitTermFreqAndPositions */
boolean getOmitTermFreqAndPositions();
/** @see #setIndexOptions */
IndexOptions getIndexOptions();
/** Expert:
*
* If set, omit term freq, positions and payloads from
* If set, omit term freq, and optionally positions and payloads from
* postings for this field.
*
* <p><b>NOTE</b>: While this option reduces storage space
@ -208,7 +209,7 @@ public interface Fieldable {
* PhraseQuery} or {@link SpanQuery} subclasses will
* fail with an exception.
*/
void setOmitTermFreqAndPositions(boolean omitTermFreqAndPositions);
void setIndexOptions(IndexOptions indexOptions);
/**
* Returns the {@link PerDocFieldValues}

View File

@ -21,6 +21,7 @@ import java.io.Reader;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.NumericTokenStream;
import org.apache.lucene.index.FieldInfo.IndexOptions;
import org.apache.lucene.util.NumericUtils;
import org.apache.lucene.search.NumericRangeQuery; // javadocs
import org.apache.lucene.search.NumericRangeFilter; // javadocs
@ -192,7 +193,7 @@ public final class NumericField extends AbstractField {
public NumericField(String name, int precisionStep, Field.Store store, boolean index) {
super(name, store, index ? Field.Index.ANALYZED_NO_NORMS : Field.Index.NO, Field.TermVector.NO);
this.precisionStep = precisionStep;
setOmitTermFreqAndPositions(true);
setIndexOptions(IndexOptions.DOCS_ONLY);
}
/** Returns a {@link NumericTokenStream} for indexing the numeric value. */

View File

@ -186,8 +186,8 @@ public class CheckIndex {
int numFields;
/** True if at least one of the fields in this segment
* does not omitTermFreqAndPositions.
* @see AbstractField#setOmitTermFreqAndPositions */
* has position data
* @see AbstractField#setIndexOptions(org.apache.lucene.index.FieldInfo.IndexOptions) */
public boolean hasProx;
/** Map that includes certain

View File

@ -233,7 +233,7 @@ final class DocFieldProcessor extends DocConsumer {
// easily add it
FieldInfo fi = fieldInfos.addOrUpdate(fieldName, field.isIndexed(), field.isTermVectorStored(),
field.isStorePositionWithTermVector(), field.isStoreOffsetWithTermVector(),
field.getOmitNorms(), false, field.getOmitTermFreqAndPositions(), field.docValuesType());
field.getOmitNorms(), false, field.getIndexOptions(), field.docValuesType());
fp = new DocFieldProcessorPerField(this, fi);
fp.next = fieldHash[hashPos];
@ -245,7 +245,7 @@ final class DocFieldProcessor extends DocConsumer {
} else {
fieldInfos.addOrUpdate(fp.fieldInfo.name, field.isIndexed(), field.isTermVectorStored(),
field.isStorePositionWithTermVector(), field.isStoreOffsetWithTermVector(),
field.getOmitNorms(), false, field.getOmitTermFreqAndPositions(), field.docValuesType());
field.getOmitNorms(), false, field.getIndexOptions(), field.docValuesType());
}
if (thisFieldGen != fp.lastGen) {

View File

@ -35,14 +35,27 @@ public final class FieldInfo {
boolean storePositionWithTermVector;
public boolean omitNorms; // omit norms associated with indexed fields
public boolean omitTermFreqAndPositions;
public IndexOptions indexOptions;
public boolean storePayloads; // whether this field stores payloads together with term positions
private int codecId = UNASSIGNED_CODEC_ID; // set inside SegmentCodecs#build() during segment flush - this is used to identify the codec used to write this field
/**
* Controls how much information is stored in the postings lists.
* @lucene.experimental
*/
public static enum IndexOptions {
/** only documents are indexed: term frequencies and positions are omitted */
DOCS_ONLY,
/** only documents and term frequencies are indexed: positions are omitted */
DOCS_AND_FREQS,
/** full postings: documents, frequencies, and positions */
DOCS_AND_FREQS_AND_POSITIONS
};
FieldInfo(String na, boolean tk, int nu, boolean storeTermVector,
boolean storePositionWithTermVector, boolean storeOffsetWithTermVector,
boolean omitNorms, boolean storePayloads, boolean omitTermFreqAndPositions, ValueType docValues) {
boolean omitNorms, boolean storePayloads, IndexOptions indexOptions, ValueType docValues) {
name = na;
isIndexed = tk;
number = nu;
@ -53,16 +66,16 @@ public final class FieldInfo {
this.storePositionWithTermVector = storePositionWithTermVector;
this.storePayloads = storePayloads;
this.omitNorms = omitNorms;
this.omitTermFreqAndPositions = omitTermFreqAndPositions;
this.indexOptions = indexOptions;
} else { // for non-indexed fields, leave defaults
this.storeTermVector = false;
this.storeOffsetWithTermVector = false;
this.storePositionWithTermVector = false;
this.storePayloads = false;
this.omitNorms = false;
this.omitTermFreqAndPositions = false;
this.indexOptions = IndexOptions.DOCS_AND_FREQS_AND_POSITIONS;
}
assert !omitTermFreqAndPositions || !storePayloads;
assert indexOptions == IndexOptions.DOCS_AND_FREQS_AND_POSITIONS || !storePayloads;
}
void setCodecId(int codecId) {
@ -77,14 +90,14 @@ public final class FieldInfo {
@Override
public Object clone() {
FieldInfo clone = new FieldInfo(name, isIndexed, number, storeTermVector, storePositionWithTermVector,
storeOffsetWithTermVector, omitNorms, storePayloads, omitTermFreqAndPositions, docValues);
storeOffsetWithTermVector, omitNorms, storePayloads, indexOptions, docValues);
clone.codecId = this.codecId;
return clone;
}
// should only be called by FieldInfos#addOrUpdate
void update(boolean isIndexed, boolean storeTermVector, boolean storePositionWithTermVector,
boolean storeOffsetWithTermVector, boolean omitNorms, boolean storePayloads, boolean omitTermFreqAndPositions) {
boolean storeOffsetWithTermVector, boolean omitNorms, boolean storePayloads, IndexOptions indexOptions) {
if (this.isIndexed != isIndexed) {
this.isIndexed = true; // once indexed, always index
@ -105,12 +118,13 @@ public final class FieldInfo {
if (this.omitNorms != omitNorms) {
this.omitNorms = true; // if one require omitNorms at least once, it remains off for life
}
if (this.omitTermFreqAndPositions != omitTermFreqAndPositions) {
this.omitTermFreqAndPositions = true; // if one require omitTermFreqAndPositions at least once, it remains off for life
if (this.indexOptions != indexOptions) {
// downgrade
this.indexOptions = this.indexOptions.compareTo(indexOptions) < 0 ? this.indexOptions : indexOptions;
this.storePayloads = false;
}
}
assert !this.omitTermFreqAndPositions || !this.storePayloads;
assert this.indexOptions == IndexOptions.DOCS_AND_FREQS_AND_POSITIONS || !this.storePayloads;
}
void setDocValues(ValueType v) {
if (docValues == null) {

View File

@ -28,6 +28,7 @@ import java.util.SortedMap;
import java.util.TreeMap;
import java.util.Map.Entry;
import org.apache.lucene.index.FieldInfo.IndexOptions;
import org.apache.lucene.index.SegmentCodecs; // Required for Java 1.5 javadocs
import org.apache.lucene.index.SegmentCodecs.SegmentCodecsBuilder;
import org.apache.lucene.index.codecs.CodecProvider;
@ -201,13 +202,13 @@ public final class FieldInfos implements Iterable<FieldInfo> {
// First used in 2.9; prior to 2.9 there was no format header
public static final int FORMAT_START = -2;
public static final int FORMAT_PER_FIELD_CODEC = -3;
// Records index values for this field
public static final int FORMAT_INDEX_VALUES = -3;
// First used in 3.4: omit only positional information
public static final int FORMAT_OMIT_POSITIONS = -3;
// per-field codec support, records index values for fields
public static final int FORMAT_FLEX = -4;
// whenever you add a new format, make it 1 smaller (negative version logic)!
static final int FORMAT_CURRENT = FORMAT_PER_FIELD_CODEC;
static final int FORMAT_CURRENT = FORMAT_FLEX;
static final int FORMAT_MINIMUM = FORMAT_START;
@ -218,8 +219,10 @@ public final class FieldInfos implements Iterable<FieldInfo> {
static final byte OMIT_NORMS = 0x10;
static final byte STORE_PAYLOADS = 0x20;
static final byte OMIT_TERM_FREQ_AND_POSITIONS = 0x40;
static final byte OMIT_POSITIONS = -128;
private int format;
private boolean hasFreq; // only set if readonly
private boolean hasProx; // only set if readonly
private boolean hasVectors; // only set if readonly
private long version; // internal use to track changes
@ -308,6 +311,7 @@ public final class FieldInfos implements Iterable<FieldInfo> {
synchronized public Object clone() {
FieldInfos fis = new FieldInfos(globalFieldNumbers, segmentCodecsBuilder);
fis.format = format;
fis.hasFreq = hasFreq;
fis.hasProx = hasProx;
fis.hasVectors = hasVectors;
for (FieldInfo fi : this) {
@ -317,14 +321,28 @@ public final class FieldInfos implements Iterable<FieldInfo> {
return fis;
}
/** Returns true if any fields do not omitTermFreqAndPositions */
/** Returns true if any fields do not positions */
public boolean hasProx() {
if (isReadOnly()) {
return hasProx;
}
// mutable FIs must check!
for (FieldInfo fi : this) {
if (fi.isIndexed && !fi.omitTermFreqAndPositions) {
if (fi.isIndexed && fi.indexOptions == IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) {
return true;
}
}
return false;
}
/** Returns true if any fields have freqs */
public boolean hasFreq() {
if (isReadOnly()) {
return hasFreq;
}
// mutable FIs must check!
for (FieldInfo fi : this) {
if (fi.isIndexed && fi.indexOptions != IndexOptions.DOCS_ONLY) {
return true;
}
}
@ -414,7 +432,7 @@ public final class FieldInfos implements Iterable<FieldInfo> {
synchronized public void addOrUpdate(String name, boolean isIndexed, boolean storeTermVector,
boolean storePositionWithTermVector, boolean storeOffsetWithTermVector, boolean omitNorms) {
addOrUpdate(name, isIndexed, storeTermVector, storePositionWithTermVector,
storeOffsetWithTermVector, omitNorms, false, false, null);
storeOffsetWithTermVector, omitNorms, false, IndexOptions.DOCS_AND_FREQS_AND_POSITIONS, null);
}
/** If the field is not yet known, adds it. If it is known, checks to make
@ -429,18 +447,18 @@ public final class FieldInfos implements Iterable<FieldInfo> {
* @param storeOffsetWithTermVector true if the term vector with offsets should be stored
* @param omitNorms true if the norms for the indexed field should be omitted
* @param storePayloads true if payloads should be stored for this field
* @param omitTermFreqAndPositions true if term freqs should be omitted for this field
* @param indexOptions if term freqs should be omitted for this field
*/
synchronized public FieldInfo addOrUpdate(String name, boolean isIndexed, boolean storeTermVector,
boolean storePositionWithTermVector, boolean storeOffsetWithTermVector,
boolean omitNorms, boolean storePayloads, boolean omitTermFreqAndPositions, ValueType docValues) {
boolean omitNorms, boolean storePayloads, IndexOptions indexOptions, ValueType docValues) {
return addOrUpdateInternal(name, -1, isIndexed, storeTermVector, storePositionWithTermVector,
storeOffsetWithTermVector, omitNorms, storePayloads, omitTermFreqAndPositions, docValues);
storeOffsetWithTermVector, omitNorms, storePayloads, indexOptions, docValues);
}
synchronized private FieldInfo addOrUpdateInternal(String name, int preferredFieldNumber, boolean isIndexed,
boolean storeTermVector, boolean storePositionWithTermVector, boolean storeOffsetWithTermVector,
boolean omitNorms, boolean storePayloads, boolean omitTermFreqAndPositions, ValueType docValues) {
boolean omitNorms, boolean storePayloads, IndexOptions indexOptions, ValueType docValues) {
if (globalFieldNumbers == null) {
throw new IllegalStateException("FieldInfos are read-only, create a new instance with a global field map to make modifications to FieldInfos");
}
@ -448,9 +466,9 @@ public final class FieldInfos implements Iterable<FieldInfo> {
FieldInfo fi = fieldInfo(name);
if (fi == null) {
final int fieldNumber = nextFieldNumber(name, preferredFieldNumber);
fi = addInternal(name, fieldNumber, isIndexed, storeTermVector, storePositionWithTermVector, storeOffsetWithTermVector, omitNorms, storePayloads, omitTermFreqAndPositions, docValues);
fi = addInternal(name, fieldNumber, isIndexed, storeTermVector, storePositionWithTermVector, storeOffsetWithTermVector, omitNorms, storePayloads, indexOptions, docValues);
} else {
fi.update(isIndexed, storeTermVector, storePositionWithTermVector, storeOffsetWithTermVector, omitNorms, storePayloads, omitTermFreqAndPositions);
fi.update(isIndexed, storeTermVector, storePositionWithTermVector, storeOffsetWithTermVector, omitNorms, storePayloads, indexOptions);
fi.setDocValues(docValues);
}
if ((fi.isIndexed || fi.hasDocValues()) && fi.getCodecId() == FieldInfo.UNASSIGNED_CODEC_ID) {
@ -465,7 +483,7 @@ public final class FieldInfos implements Iterable<FieldInfo> {
return addOrUpdateInternal(fi.name, fi.number, fi.isIndexed, fi.storeTermVector,
fi.storePositionWithTermVector, fi.storeOffsetWithTermVector,
fi.omitNorms, fi.storePayloads,
fi.omitTermFreqAndPositions, fi.docValues);
fi.indexOptions, fi.docValues);
}
/*
@ -473,13 +491,13 @@ public final class FieldInfos implements Iterable<FieldInfo> {
*/
private FieldInfo addInternal(String name, int fieldNumber, boolean isIndexed,
boolean storeTermVector, boolean storePositionWithTermVector,
boolean storeOffsetWithTermVector, boolean omitNorms, boolean storePayloads, boolean omitTermFreqAndPositions, ValueType docValuesType) {
boolean storeOffsetWithTermVector, boolean omitNorms, boolean storePayloads, IndexOptions indexOptions, ValueType docValuesType) {
// don't check modifiable here since we use that to initially build up FIs
if (globalFieldNumbers != null) {
globalFieldNumbers.setIfNotSet(fieldNumber, name);
}
final FieldInfo fi = new FieldInfo(name, isIndexed, fieldNumber, storeTermVector, storePositionWithTermVector,
storeOffsetWithTermVector, omitNorms, storePayloads, omitTermFreqAndPositions, docValuesType);
storeOffsetWithTermVector, omitNorms, storePayloads, indexOptions, docValuesType);
putInternal(fi);
return fi;
}
@ -590,7 +608,7 @@ public final class FieldInfos implements Iterable<FieldInfo> {
output.writeVInt(FORMAT_CURRENT);
output.writeVInt(size());
for (FieldInfo fi : this) {
assert !fi.omitTermFreqAndPositions || !fi.storePayloads;
assert fi.indexOptions == IndexOptions.DOCS_AND_FREQS_AND_POSITIONS || !fi.storePayloads;
byte bits = 0x0;
if (fi.isIndexed) bits |= IS_INDEXED;
if (fi.storeTermVector) bits |= STORE_TERMVECTOR;
@ -598,7 +616,10 @@ public final class FieldInfos implements Iterable<FieldInfo> {
if (fi.storeOffsetWithTermVector) bits |= STORE_OFFSET_WITH_TERMVECTOR;
if (fi.omitNorms) bits |= OMIT_NORMS;
if (fi.storePayloads) bits |= STORE_PAYLOADS;
if (fi.omitTermFreqAndPositions) bits |= OMIT_TERM_FREQ_AND_POSITIONS;
if (fi.indexOptions == IndexOptions.DOCS_ONLY)
bits |= OMIT_TERM_FREQ_AND_POSITIONS;
else if (fi.indexOptions == IndexOptions.DOCS_AND_FREQS)
bits |= OMIT_POSITIONS;
output.writeString(fi.name);
output.writeInt(fi.number);
output.writeInt(fi.getCodecId());
@ -673,8 +694,8 @@ public final class FieldInfos implements Iterable<FieldInfo> {
for (int i = 0; i < size; i++) {
String name = input.readString();
// if this is a previous format codec 0 will be preflex!
final int fieldNumber = format <= FORMAT_PER_FIELD_CODEC? input.readInt():i;
final int codecId = format <= FORMAT_PER_FIELD_CODEC? input.readInt():0;
final int fieldNumber = format <= FORMAT_FLEX? input.readInt():i;
final int codecId = format <= FORMAT_FLEX? input.readInt():0;
byte bits = input.readByte();
boolean isIndexed = (bits & IS_INDEXED) != 0;
boolean storeTermVector = (bits & STORE_TERMVECTOR) != 0;
@ -682,18 +703,30 @@ public final class FieldInfos implements Iterable<FieldInfo> {
boolean storeOffsetWithTermVector = (bits & STORE_OFFSET_WITH_TERMVECTOR) != 0;
boolean omitNorms = (bits & OMIT_NORMS) != 0;
boolean storePayloads = (bits & STORE_PAYLOADS) != 0;
boolean omitTermFreqAndPositions = (bits & OMIT_TERM_FREQ_AND_POSITIONS) != 0;
final IndexOptions indexOptions;
if ((bits & OMIT_TERM_FREQ_AND_POSITIONS) != 0) {
indexOptions = IndexOptions.DOCS_ONLY;
} else if ((bits & OMIT_POSITIONS) != 0) {
if (format <= FORMAT_OMIT_POSITIONS) {
indexOptions = IndexOptions.DOCS_AND_FREQS;
} else {
throw new CorruptIndexException("Corrupt fieldinfos, OMIT_POSITIONS set but format=" + format);
}
} else {
indexOptions = IndexOptions.DOCS_AND_FREQS_AND_POSITIONS;
}
// LUCENE-3027: past indices were able to write
// storePayloads=true when omitTFAP is also true,
// which is invalid. We correct that, here:
if (omitTermFreqAndPositions) {
if (indexOptions != IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) {
storePayloads = false;
}
hasVectors |= storeTermVector;
hasProx |= isIndexed && !omitTermFreqAndPositions;
hasProx |= isIndexed && indexOptions == IndexOptions.DOCS_AND_FREQS_AND_POSITIONS;
hasFreq |= isIndexed && indexOptions != IndexOptions.DOCS_ONLY;
ValueType docValuesType = null;
if (format <= FORMAT_INDEX_VALUES) {
if (format <= FORMAT_FLEX) {
final byte b = input.readByte();
switch(b) {
case 0:
@ -743,7 +776,7 @@ public final class FieldInfos implements Iterable<FieldInfo> {
throw new IllegalStateException("unhandled indexValues type " + b);
}
}
final FieldInfo addInternal = addInternal(name, fieldNumber, isIndexed, storeTermVector, storePositionsWithTermVector, storeOffsetWithTermVector, omitNorms, storePayloads, omitTermFreqAndPositions, docValuesType);
final FieldInfo addInternal = addInternal(name, fieldNumber, isIndexed, storeTermVector, storePositionsWithTermVector, storeOffsetWithTermVector, omitNorms, storePayloads, indexOptions, docValuesType);
addInternal.setCodecId(codecId);
}
@ -771,7 +804,8 @@ public final class FieldInfos implements Iterable<FieldInfo> {
FieldInfo clone = (FieldInfo) (fieldInfo).clone();
roFis.putInternal(clone);
roFis.hasVectors |= clone.storeTermVector;
roFis.hasProx |= clone.isIndexed && !clone.omitTermFreqAndPositions;
roFis.hasProx |= clone.isIndexed && clone.indexOptions == IndexOptions.DOCS_AND_FREQS_AND_POSITIONS;
roFis.hasFreq |= clone.isIndexed && clone.indexOptions != IndexOptions.DOCS_ONLY;
}
return roFis;
}

View File

@ -340,7 +340,7 @@ public final class FieldsReader implements Cloneable, Closeable {
}
f.setOmitNorms(fi.omitNorms);
f.setOmitTermFreqAndPositions(fi.omitTermFreqAndPositions);
f.setIndexOptions(fi.indexOptions);
doc.add(f);
}
@ -364,7 +364,7 @@ public final class FieldsReader implements Cloneable, Closeable {
termVector);
}
f.setOmitTermFreqAndPositions(fi.omitTermFreqAndPositions);
f.setIndexOptions(fi.indexOptions);
f.setOmitNorms(fi.omitNorms);
doc.add(f);
}

View File

@ -22,6 +22,7 @@ import java.util.ArrayList;
import java.util.List;
import java.util.Map;
import org.apache.lucene.index.FieldInfo.IndexOptions;
import org.apache.lucene.index.codecs.FieldsConsumer;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.CollectionUtil;
@ -79,7 +80,7 @@ final class FreqProxTermsWriter extends TermsHashConsumer {
// Aggregate the storePayload as seen by the same
// field across multiple threads
if (!fieldInfo.omitTermFreqAndPositions) {
if (fieldInfo.indexOptions == IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) {
fieldInfo.storePayloads |= fieldWriter.hasPayloads;
}

View File

@ -23,6 +23,7 @@ import java.util.Map;
import org.apache.lucene.analysis.tokenattributes.PayloadAttribute;
import org.apache.lucene.document.Fieldable;
import org.apache.lucene.index.FieldInfo.IndexOptions;
import org.apache.lucene.index.codecs.FieldsConsumer;
import org.apache.lucene.index.codecs.PostingsConsumer;
import org.apache.lucene.index.codecs.TermStats;
@ -41,7 +42,7 @@ final class FreqProxTermsWriterPerField extends TermsHashConsumerPerField implem
final FieldInfo fieldInfo;
final DocumentsWriterPerThread.DocState docState;
final FieldInvertState fieldState;
boolean omitTermFreqAndPositions;
IndexOptions indexOptions;
PayloadAttribute payloadAttribute;
public FreqProxTermsWriterPerField(TermsHashPerField termsHashPerField, FreqProxTermsWriter parent, FieldInfo fieldInfo) {
@ -50,12 +51,12 @@ final class FreqProxTermsWriterPerField extends TermsHashConsumerPerField implem
this.fieldInfo = fieldInfo;
docState = termsHashPerField.docState;
fieldState = termsHashPerField.fieldState;
omitTermFreqAndPositions = fieldInfo.omitTermFreqAndPositions;
indexOptions = fieldInfo.indexOptions;
}
@Override
int getStreamCount() {
if (fieldInfo.omitTermFreqAndPositions)
if (fieldInfo.indexOptions != IndexOptions.DOCS_AND_FREQS_AND_POSITIONS)
return 1;
else
return 2;
@ -76,7 +77,7 @@ final class FreqProxTermsWriterPerField extends TermsHashConsumerPerField implem
void reset() {
// Record, up front, whether our in-RAM format will be
// with or without term freqs:
omitTermFreqAndPositions = fieldInfo.omitTermFreqAndPositions;
indexOptions = fieldInfo.indexOptions;
payloadAttribute = null;
}
@ -126,12 +127,14 @@ final class FreqProxTermsWriterPerField extends TermsHashConsumerPerField implem
FreqProxPostingsArray postings = (FreqProxPostingsArray) termsHashPerField.postingsArray;
postings.lastDocIDs[termID] = docState.docID;
if (omitTermFreqAndPositions) {
if (indexOptions == IndexOptions.DOCS_ONLY) {
postings.lastDocCodes[termID] = docState.docID;
} else {
postings.lastDocCodes[termID] = docState.docID << 1;
postings.docFreqs[termID] = 1;
writeProx(termID, fieldState.position);
if (indexOptions == IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) {
writeProx(termID, fieldState.position);
}
}
fieldState.maxTermFrequency = Math.max(1, fieldState.maxTermFrequency);
fieldState.uniqueTermCount++;
@ -144,9 +147,9 @@ final class FreqProxTermsWriterPerField extends TermsHashConsumerPerField implem
FreqProxPostingsArray postings = (FreqProxPostingsArray) termsHashPerField.postingsArray;
assert omitTermFreqAndPositions || postings.docFreqs[termID] > 0;
assert indexOptions == IndexOptions.DOCS_ONLY || postings.docFreqs[termID] > 0;
if (omitTermFreqAndPositions) {
if (indexOptions == IndexOptions.DOCS_ONLY) {
if (docState.docID != postings.lastDocIDs[termID]) {
assert docState.docID > postings.lastDocIDs[termID];
termsHashPerField.writeVInt(0, postings.lastDocCodes[termID]);
@ -172,11 +175,15 @@ final class FreqProxTermsWriterPerField extends TermsHashConsumerPerField implem
fieldState.maxTermFrequency = Math.max(1, fieldState.maxTermFrequency);
postings.lastDocCodes[termID] = (docState.docID - postings.lastDocIDs[termID]) << 1;
postings.lastDocIDs[termID] = docState.docID;
writeProx(termID, fieldState.position);
if (indexOptions == IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) {
writeProx(termID, fieldState.position);
}
fieldState.uniqueTermCount++;
} else {
fieldState.maxTermFrequency = Math.max(fieldState.maxTermFrequency, ++postings.docFreqs[termID]);
writeProx(termID, fieldState.position-postings.lastPositions[termID]);
if (indexOptions == IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) {
writeProx(termID, fieldState.position-postings.lastPositions[termID]);
}
}
}
}
@ -237,7 +244,7 @@ final class FreqProxTermsWriterPerField extends TermsHashConsumerPerField implem
final TermsConsumer termsConsumer = consumer.addField(fieldInfo);
final Comparator<BytesRef> termComp = termsConsumer.getComparator();
final boolean currentFieldOmitTermFreqAndPositions = fieldInfo.omitTermFreqAndPositions;
final IndexOptions currentFieldIndexOptions = fieldInfo.indexOptions;
final Map<Term,Integer> segDeletes;
if (state.segDeletes != null && state.segDeletes.terms.size() > 0) {
@ -263,7 +270,7 @@ final class FreqProxTermsWriterPerField extends TermsHashConsumerPerField implem
termsHashPerField.bytePool.setBytesRef(text, textStart);
termsHashPerField.initReader(freq, termID, 0);
if (!fieldInfo.omitTermFreqAndPositions) {
if (fieldInfo.indexOptions == IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) {
termsHashPerField.initReader(prox, termID, 1);
}
@ -300,7 +307,7 @@ final class FreqProxTermsWriterPerField extends TermsHashConsumerPerField implem
if (postings.lastDocCodes[termID] != -1) {
// Return last doc
docID = postings.lastDocIDs[termID];
if (!omitTermFreqAndPositions) {
if (indexOptions != IndexOptions.DOCS_ONLY) {
termFreq = postings.docFreqs[termID];
}
postings.lastDocCodes[termID] = -1;
@ -310,7 +317,7 @@ final class FreqProxTermsWriterPerField extends TermsHashConsumerPerField implem
}
} else {
final int code = freq.readVInt();
if (omitTermFreqAndPositions) {
if (indexOptions == IndexOptions.DOCS_ONLY) {
docID += code;
} else {
docID += code >>> 1;
@ -351,14 +358,17 @@ final class FreqProxTermsWriterPerField extends TermsHashConsumerPerField implem
state.liveDocs.clear(docID);
}
if (currentFieldIndexOptions != IndexOptions.DOCS_ONLY) {
totTF += termDocFreq;
}
// Carefully copy over the prox + payload info,
// changing the format to match Lucene's segment
// format.
if (!currentFieldOmitTermFreqAndPositions) {
// omitTermFreqAndPositions == false so we do write positions &
// payload
if (currentFieldIndexOptions == IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) {
// we do write positions & payload
int position = 0;
totTF += termDocFreq;
for(int j=0;j<termDocFreq;j++) {
final int code = prox.readVInt();
position += code >> 1;

View File

@ -153,6 +153,8 @@ public abstract class IndexReader implements Cloneable,Closeable {
STORES_PAYLOADS,
/** All fields that omit tf */
OMIT_TERM_FREQ_AND_POSITIONS,
/** All fields that omit positions */
OMIT_POSITIONS,
/** All fields which are not indexed */
UNINDEXED,
/** All fields which are indexed with termvectors enabled */

View File

@ -91,7 +91,7 @@ public final class SegmentInfo implements Cloneable {
//TODO: remove when we don't have to support old indexes anymore that had this field
private int hasVectors = CHECK_FIELDINFO;
//TODO: remove when we don't have to support old indexes anymore that had this field
private int hasProx = CHECK_FIELDINFO; // True if this segment has any fields with omitTermFreqAndPositions==false
private int hasProx = CHECK_FIELDINFO; // True if this segment has any fields with positional information
private FieldInfos fieldInfos;

View File

@ -24,6 +24,7 @@ import java.util.Collection;
import java.util.List;
import org.apache.lucene.document.Document;
import org.apache.lucene.index.FieldInfo.IndexOptions;
import org.apache.lucene.index.IndexReader.FieldOption;
import org.apache.lucene.index.MergePolicy.MergeAbortedException;
import org.apache.lucene.index.codecs.Codec;
@ -158,12 +159,12 @@ final class SegmentMerger {
private static void addIndexed(IndexReader reader, FieldInfos fInfos,
Collection<String> names, boolean storeTermVectors,
boolean storePositionWithTermVector, boolean storeOffsetWithTermVector,
boolean storePayloads, boolean omitTFAndPositions)
boolean storePayloads, IndexOptions indexOptions)
throws IOException {
for (String field : names) {
fInfos.addOrUpdate(field, true, storeTermVectors,
storePositionWithTermVector, storeOffsetWithTermVector, !reader
.hasNorms(field), storePayloads, omitTFAndPositions, null);
.hasNorms(field), storePayloads, indexOptions, null);
}
}
@ -223,13 +224,14 @@ final class SegmentMerger {
fieldInfos.add(fi);
}
} else {
addIndexed(reader, fieldInfos, reader.getFieldNames(FieldOption.TERMVECTOR_WITH_POSITION_OFFSET), true, true, true, false, false);
addIndexed(reader, fieldInfos, reader.getFieldNames(FieldOption.TERMVECTOR_WITH_POSITION), true, true, false, false, false);
addIndexed(reader, fieldInfos, reader.getFieldNames(FieldOption.TERMVECTOR_WITH_OFFSET), true, false, true, false, false);
addIndexed(reader, fieldInfos, reader.getFieldNames(FieldOption.TERMVECTOR), true, false, false, false, false);
addIndexed(reader, fieldInfos, reader.getFieldNames(FieldOption.OMIT_TERM_FREQ_AND_POSITIONS), false, false, false, false, true);
addIndexed(reader, fieldInfos, reader.getFieldNames(FieldOption.STORES_PAYLOADS), false, false, false, true, false);
addIndexed(reader, fieldInfos, reader.getFieldNames(FieldOption.INDEXED), false, false, false, false, false);
addIndexed(reader, fieldInfos, reader.getFieldNames(FieldOption.TERMVECTOR_WITH_POSITION_OFFSET), true, true, true, false, IndexOptions.DOCS_AND_FREQS_AND_POSITIONS);
addIndexed(reader, fieldInfos, reader.getFieldNames(FieldOption.TERMVECTOR_WITH_POSITION), true, true, false, false, IndexOptions.DOCS_AND_FREQS_AND_POSITIONS);
addIndexed(reader, fieldInfos, reader.getFieldNames(FieldOption.TERMVECTOR_WITH_OFFSET), true, false, true, false, IndexOptions.DOCS_AND_FREQS_AND_POSITIONS);
addIndexed(reader, fieldInfos, reader.getFieldNames(FieldOption.TERMVECTOR), true, false, false, false, IndexOptions.DOCS_AND_FREQS_AND_POSITIONS);
addIndexed(reader, fieldInfos, reader.getFieldNames(FieldOption.OMIT_POSITIONS), false, false, false, false, IndexOptions.DOCS_AND_FREQS);
addIndexed(reader, fieldInfos, reader.getFieldNames(FieldOption.OMIT_TERM_FREQ_AND_POSITIONS), false, false, false, false, IndexOptions.DOCS_ONLY);
addIndexed(reader, fieldInfos, reader.getFieldNames(FieldOption.STORES_PAYLOADS), false, false, false, true, IndexOptions.DOCS_AND_FREQS_AND_POSITIONS);
addIndexed(reader, fieldInfos, reader.getFieldNames(FieldOption.INDEXED), false, false, false, false, IndexOptions.DOCS_AND_FREQS_AND_POSITIONS);
fieldInfos.addOrUpdate(reader.getFieldNames(FieldOption.UNINDEXED), false);
fieldInfos.addOrUpdate(reader.getFieldNames(FieldOption.DOC_VALUES), false);
}

View File

@ -29,6 +29,7 @@ import java.util.concurrent.atomic.AtomicInteger;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.FieldSelector;
import org.apache.lucene.index.FieldInfo.IndexOptions;
import org.apache.lucene.index.codecs.PerDocValues;
import org.apache.lucene.store.BufferedIndexInput;
import org.apache.lucene.store.Directory;
@ -513,7 +514,10 @@ public class SegmentReader extends IndexReader implements Cloneable {
else if (!fi.isIndexed && fieldOption == IndexReader.FieldOption.UNINDEXED) {
fieldSet.add(fi.name);
}
else if (fi.omitTermFreqAndPositions && fieldOption == IndexReader.FieldOption.OMIT_TERM_FREQ_AND_POSITIONS) {
else if (fi.indexOptions == IndexOptions.DOCS_ONLY && fieldOption == IndexReader.FieldOption.OMIT_TERM_FREQ_AND_POSITIONS) {
fieldSet.add(fi.name);
}
else if (fi.indexOptions == IndexOptions.DOCS_AND_FREQS && fieldOption == IndexReader.FieldOption.OMIT_POSITIONS) {
fieldSet.add(fi.name);
}
else if (fi.storePayloads && fieldOption == IndexReader.FieldOption.STORES_PAYLOADS) {

View File

@ -27,6 +27,7 @@ import java.util.TreeMap;
import org.apache.lucene.index.DocsAndPositionsEnum;
import org.apache.lucene.index.DocsEnum;
import org.apache.lucene.index.FieldInfo;
import org.apache.lucene.index.FieldInfo.IndexOptions;
import org.apache.lucene.index.FieldInfos;
import org.apache.lucene.index.FieldsEnum;
import org.apache.lucene.index.IndexFileNames;
@ -136,7 +137,7 @@ public class BlockTermsReader extends FieldsProducer {
assert numTerms >= 0;
final long termsStartPointer = in.readVLong();
final FieldInfo fieldInfo = fieldInfos.fieldInfo(field);
final long sumTotalTermFreq = fieldInfo.omitTermFreqAndPositions ? -1 : in.readVLong();
final long sumTotalTermFreq = fieldInfo.indexOptions == IndexOptions.DOCS_ONLY ? -1 : in.readVLong();
final long sumDocFreq = in.readVLong();
assert !fields.containsKey(fieldInfo.name);
fields.put(fieldInfo.name, new FieldReader(fieldInfo, numTerms, termsStartPointer, sumTotalTermFreq, sumDocFreq));
@ -709,7 +710,7 @@ public class BlockTermsReader extends FieldsProducer {
public DocsAndPositionsEnum docsAndPositions(Bits liveDocs, DocsAndPositionsEnum reuse) throws IOException {
//System.out.println("BTR.d&p this=" + this);
decodeMetaData();
if (fieldInfo.omitTermFreqAndPositions) {
if (fieldInfo.indexOptions != IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) {
return null;
} else {
DocsAndPositionsEnum dpe = postingsReader.docsAndPositions(fieldInfo, state, liveDocs, reuse);
@ -867,7 +868,7 @@ public class BlockTermsReader extends FieldsProducer {
// just skipN here:
state.docFreq = freqReader.readVInt();
//System.out.println(" dF=" + state.docFreq);
if (!fieldInfo.omitTermFreqAndPositions) {
if (fieldInfo.indexOptions != IndexOptions.DOCS_ONLY) {
state.totalTermFreq = state.docFreq + freqReader.readVLong();
//System.out.println(" totTF=" + state.totalTermFreq);
}

View File

@ -23,6 +23,7 @@ import java.util.Comparator;
import java.util.List;
import org.apache.lucene.index.FieldInfo;
import org.apache.lucene.index.FieldInfo.IndexOptions;
import org.apache.lucene.index.FieldInfos;
import org.apache.lucene.index.IndexFileNames;
import org.apache.lucene.index.SegmentWriteState;
@ -129,7 +130,7 @@ public class BlockTermsWriter extends FieldsConsumer {
out.writeVInt(field.fieldInfo.number);
out.writeVLong(field.numTerms);
out.writeVLong(field.termsStartPointer);
if (!field.fieldInfo.omitTermFreqAndPositions) {
if (field.fieldInfo.indexOptions != IndexOptions.DOCS_ONLY) {
out.writeVLong(field.sumTotalTermFreq);
}
out.writeVLong(field.sumDocFreq);
@ -298,7 +299,7 @@ public class BlockTermsWriter extends FieldsConsumer {
final TermStats stats = pendingTerms[termCount].stats;
assert stats != null;
bytesWriter.writeVInt(stats.docFreq);
if (!fieldInfo.omitTermFreqAndPositions) {
if (fieldInfo.indexOptions != IndexOptions.DOCS_ONLY) {
bytesWriter.writeVLong(stats.totalTermFreq-stats.docFreq);
}
}

View File

@ -21,6 +21,7 @@ import java.io.IOException;
import org.apache.lucene.index.DocsAndPositionsEnum;
import org.apache.lucene.index.DocsEnum;
import org.apache.lucene.index.FieldInfo.IndexOptions;
import org.apache.lucene.search.DocIdSetIterator;
import org.apache.lucene.util.BytesRef;
@ -60,16 +61,17 @@ public abstract class PostingsConsumer {
int df = 0;
long totTF = 0;
if (mergeState.fieldInfo.omitTermFreqAndPositions) {
if (mergeState.fieldInfo.indexOptions != IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) {
while(true) {
final int doc = postings.nextDoc();
if (doc == DocIdSetIterator.NO_MORE_DOCS) {
break;
}
this.startDoc(doc, postings.freq());
final int freq = postings.freq();
this.startDoc(doc, freq);
this.finishDoc();
df++;
totTF++;
totTF += freq;
}
} else {
final DocsAndPositionsEnum postingsEnum = (DocsAndPositionsEnum) postings;

View File

@ -20,6 +20,7 @@ package org.apache.lucene.index.codecs;
import java.io.IOException;
import java.util.Comparator;
import org.apache.lucene.index.FieldInfo.IndexOptions;
import org.apache.lucene.index.TermsEnum;
import org.apache.lucene.index.MultiDocsEnum;
import org.apache.lucene.index.MultiDocsAndPositionsEnum;
@ -59,7 +60,7 @@ public abstract class TermsConsumer {
long sumDocFreq = 0;
long sumDFsinceLastAbortCheck = 0;
if (mergeState.fieldInfo.omitTermFreqAndPositions) {
if (mergeState.fieldInfo.indexOptions != IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) {
if (docsEnum == null) {
docsEnum = new MappingMultiDocsEnum();
}
@ -75,6 +76,7 @@ public abstract class TermsConsumer {
final TermStats stats = postingsConsumer.merge(mergeState, docsEnum);
if (stats.docFreq > 0) {
finishTerm(term, stats);
sumTotalTermFreq += stats.totalTermFreq;
sumDFsinceLastAbortCheck += stats.docFreq;
sumDocFreq += stats.docFreq;
if (sumDFsinceLastAbortCheck > 60000) {

View File

@ -27,6 +27,7 @@ import java.util.TreeMap;
import org.apache.lucene.index.DocsAndPositionsEnum;
import org.apache.lucene.index.DocsEnum;
import org.apache.lucene.index.FieldInfo;
import org.apache.lucene.index.FieldInfo.IndexOptions;
import org.apache.lucene.index.FieldInfos;
import org.apache.lucene.index.FieldsEnum;
import org.apache.lucene.index.IndexFileNames;
@ -118,7 +119,7 @@ public class MemoryCodec extends Codec {
lastDocID = docID;
docCount++;
if (field.omitTermFreqAndPositions) {
if (field.indexOptions == IndexOptions.DOCS_ONLY) {
buffer.writeVInt(delta);
} else if (termDocFreq == 1) {
buffer.writeVInt((delta<<1) | 1);
@ -192,7 +193,7 @@ public class MemoryCodec extends Codec {
assert buffer2.getFilePointer() == 0;
buffer2.writeVInt(stats.docFreq);
if (!field.omitTermFreqAndPositions) {
if (field.indexOptions != IndexOptions.DOCS_ONLY) {
buffer2.writeVLong(stats.totalTermFreq-stats.docFreq);
}
int pos = (int) buffer2.getFilePointer();
@ -223,7 +224,7 @@ public class MemoryCodec extends Codec {
if (termCount > 0) {
out.writeVInt(termCount);
out.writeVInt(field.number);
if (!field.omitTermFreqAndPositions) {
if (field.indexOptions != IndexOptions.DOCS_ONLY) {
out.writeVLong(sumTotalTermFreq);
}
out.writeVLong(sumDocFreq);
@ -266,7 +267,7 @@ public class MemoryCodec extends Codec {
}
private final static class FSTDocsEnum extends DocsEnum {
private final boolean omitTFAP;
private final IndexOptions indexOptions;
private final boolean storePayloads;
private byte[] buffer = new byte[16];
private final ByteArrayDataInput in = new ByteArrayDataInput(buffer);
@ -278,13 +279,13 @@ public class MemoryCodec extends Codec {
private int payloadLen;
private int numDocs;
public FSTDocsEnum(boolean omitTFAP, boolean storePayloads) {
this.omitTFAP = omitTFAP;
public FSTDocsEnum(IndexOptions indexOptions, boolean storePayloads) {
this.indexOptions = indexOptions;
this.storePayloads = storePayloads;
}
public boolean canReuse(boolean omitTFAP, boolean storePayloads) {
return omitTFAP == this.omitTFAP && storePayloads == this.storePayloads;
public boolean canReuse(IndexOptions indexOptions, boolean storePayloads) {
return indexOptions == this.indexOptions && storePayloads == this.storePayloads;
}
public FSTDocsEnum reset(BytesRef bufferIn, Bits liveDocs, int numDocs) {
@ -313,7 +314,7 @@ public class MemoryCodec extends Codec {
return docID = NO_MORE_DOCS;
}
docUpto++;
if (omitTFAP) {
if (indexOptions == IndexOptions.DOCS_ONLY) {
docID += in.readVInt();
freq = 1;
} else {
@ -327,16 +328,18 @@ public class MemoryCodec extends Codec {
assert freq > 0;
}
// Skip positions
for(int posUpto=0;posUpto<freq;posUpto++) {
if (!storePayloads) {
in.readVInt();
} else {
final int posCode = in.readVInt();
if ((posCode & 1) != 0) {
payloadLen = in.readVInt();
if (indexOptions == IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) {
// Skip positions
for(int posUpto=0;posUpto<freq;posUpto++) {
if (!storePayloads) {
in.readVInt();
} else {
final int posCode = in.readVInt();
if ((posCode & 1) != 0) {
payloadLen = in.readVInt();
}
in.skipBytes(payloadLen);
}
in.skipBytes(payloadLen);
}
}
}
@ -432,7 +435,7 @@ public class MemoryCodec extends Codec {
return docID = NO_MORE_DOCS;
}
docUpto++;
final int code = in.readVInt();
docID += code >>> 1;
if ((code & 1) != 0) {
@ -454,8 +457,8 @@ public class MemoryCodec extends Codec {
if (!storePayloads) {
in.readVInt();
} else {
final int codeSkip = in.readVInt();
if ((codeSkip & 1) != 0) {
final int skipCode = in.readVInt();
if ((skipCode & 1) != 0) {
payloadLength = in.readVInt();
if (VERBOSE) System.out.println(" new payloadLen=" + payloadLength);
}
@ -548,7 +551,7 @@ public class MemoryCodec extends Codec {
if (!didDecode) {
buffer.reset(current.output.bytes, 0, current.output.length);
docFreq = buffer.readVInt();
if (!field.omitTermFreqAndPositions) {
if (field.indexOptions != IndexOptions.DOCS_ONLY) {
totalTermFreq = docFreq + buffer.readVLong();
} else {
totalTermFreq = 0;
@ -598,11 +601,11 @@ public class MemoryCodec extends Codec {
decodeMetaData();
FSTDocsEnum docsEnum;
if (reuse == null || !(reuse instanceof FSTDocsEnum)) {
docsEnum = new FSTDocsEnum(field.omitTermFreqAndPositions, field.storePayloads);
docsEnum = new FSTDocsEnum(field.indexOptions, field.storePayloads);
} else {
docsEnum = (FSTDocsEnum) reuse;
if (!docsEnum.canReuse(field.omitTermFreqAndPositions, field.storePayloads)) {
docsEnum = new FSTDocsEnum(field.omitTermFreqAndPositions, field.storePayloads);
if (!docsEnum.canReuse(field.indexOptions, field.storePayloads)) {
docsEnum = new FSTDocsEnum(field.indexOptions, field.storePayloads);
}
}
return docsEnum.reset(current.output, liveDocs, docFreq);
@ -610,7 +613,7 @@ public class MemoryCodec extends Codec {
@Override
public DocsAndPositionsEnum docsAndPositions(Bits liveDocs, DocsAndPositionsEnum reuse) throws IOException {
if (field.omitTermFreqAndPositions) {
if (field.indexOptions != IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) {
return null;
}
decodeMetaData();
@ -686,7 +689,7 @@ public class MemoryCodec extends Codec {
public TermsReader(FieldInfos fieldInfos, IndexInput in) throws IOException {
final int fieldNumber = in.readVInt();
field = fieldInfos.fieldInfo(fieldNumber);
if (!field.omitTermFreqAndPositions) {
if (field.indexOptions != IndexOptions.DOCS_ONLY) {
sumTotalTermFreq = in.readVLong();
} else {
sumTotalTermFreq = 0;

View File

@ -25,9 +25,11 @@ import java.util.Iterator;
import java.util.Map;
import java.util.TreeMap;
import org.apache.lucene.index.CorruptIndexException;
import org.apache.lucene.index.DocsAndPositionsEnum;
import org.apache.lucene.index.DocsEnum;
import org.apache.lucene.index.FieldInfo;
import org.apache.lucene.index.FieldInfo.IndexOptions;
import org.apache.lucene.index.FieldInfos;
import org.apache.lucene.index.FieldsEnum;
import org.apache.lucene.index.IndexFileNames;
@ -99,7 +101,7 @@ public class PreFlexFields extends FieldsProducer {
if (fi.isIndexed) {
fields.put(fi.name, fi);
preTerms.put(fi.name, new PreTerms(fi));
if (!fi.omitTermFreqAndPositions) {
if (fi.indexOptions == IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) {
anyProx = true;
}
}
@ -973,7 +975,7 @@ public class PreFlexFields extends FieldsProducer {
@Override
public DocsAndPositionsEnum docsAndPositions(Bits liveDocs, DocsAndPositionsEnum reuse) throws IOException {
PreDocsAndPositionsEnum docsPosEnum;
if (fieldInfo.omitTermFreqAndPositions) {
if (fieldInfo.indexOptions != IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) {
return null;
} else if (reuse == null || !(reuse instanceof PreDocsAndPositionsEnum)) {
docsPosEnum = new PreDocsAndPositionsEnum();

View File

@ -20,6 +20,7 @@ package org.apache.lucene.index.codecs.preflex;
import java.io.IOException;
import org.apache.lucene.index.FieldInfo;
import org.apache.lucene.index.FieldInfo.IndexOptions;
import org.apache.lucene.index.FieldInfos;
import org.apache.lucene.index.Term;
import org.apache.lucene.index.codecs.standard.DefaultSkipListReader;
@ -51,7 +52,7 @@ public class SegmentTermDocs {
private boolean haveSkipped;
protected boolean currentFieldStoresPayloads;
protected boolean currentFieldOmitTermFreqAndPositions;
protected IndexOptions indexOptions;
public SegmentTermDocs(IndexInput freqStream, TermInfosReader tis, FieldInfos fieldInfos) {
this.freqStream = (IndexInput) freqStream.clone();
@ -89,7 +90,7 @@ public class SegmentTermDocs {
void seek(TermInfo ti, Term term) throws IOException {
count = 0;
FieldInfo fi = fieldInfos.fieldInfo(term.field());
currentFieldOmitTermFreqAndPositions = (fi != null) ? fi.omitTermFreqAndPositions : false;
this.indexOptions = (fi != null) ? fi.indexOptions : IndexOptions.DOCS_AND_FREQS_AND_POSITIONS;
currentFieldStoresPayloads = (fi != null) ? fi.storePayloads : false;
if (ti == null) {
df = 0;
@ -122,7 +123,7 @@ public class SegmentTermDocs {
return false;
final int docCode = freqStream.readVInt();
if (currentFieldOmitTermFreqAndPositions) {
if (indexOptions == IndexOptions.DOCS_ONLY) {
doc += docCode;
freq = 1;
} else {
@ -149,7 +150,7 @@ public class SegmentTermDocs {
public int read(final int[] docs, final int[] freqs)
throws IOException {
final int length = docs.length;
if (currentFieldOmitTermFreqAndPositions) {
if (indexOptions == IndexOptions.DOCS_ONLY) {
return readNoTf(docs, freqs, length);
} else {
int i = 0;

View File

@ -21,6 +21,7 @@ import java.io.IOException;
import org.apache.lucene.index.FieldInfos;
import org.apache.lucene.index.Term;
import org.apache.lucene.index.FieldInfo.IndexOptions;
import org.apache.lucene.store.IndexInput;
/**
@ -77,8 +78,8 @@ extends SegmentTermDocs {
}
public final int nextPosition() throws IOException {
if (currentFieldOmitTermFreqAndPositions)
// This field does not store term freq, positions, payloads
if (indexOptions != IndexOptions.DOCS_AND_FREQS_AND_POSITIONS)
// This field does not store positions, payloads
return 0;
// perform lazy skips if necessary
lazySkip();
@ -140,7 +141,7 @@ extends SegmentTermDocs {
}
private void skipPositions(int n) throws IOException {
assert !currentFieldOmitTermFreqAndPositions;
assert indexOptions == IndexOptions.DOCS_AND_FREQS_AND_POSITIONS;
for (int f = n; f > 0; f--) { // skip unread positions
readDeltaPosition();
skipPayload();

View File

@ -22,6 +22,7 @@ import java.io.IOException;
import org.apache.lucene.index.DocsAndPositionsEnum;
import org.apache.lucene.index.DocsEnum;
import org.apache.lucene.index.FieldInfo;
import org.apache.lucene.index.FieldInfo.IndexOptions;
import org.apache.lucene.index.TermState;
import org.apache.lucene.index.codecs.PostingsReaderBase;
import org.apache.lucene.index.codecs.BlockTermState;
@ -134,8 +135,8 @@ public class PulsingPostingsReaderImpl extends PostingsReaderBase {
//System.out.println("PR nextTerm");
PulsingTermState termState = (PulsingTermState) _termState;
// total TF, but in the omitTFAP case its computed based on docFreq.
long count = fieldInfo.omitTermFreqAndPositions ? termState.docFreq : termState.totalTermFreq;
// if we have positions, its total TF, otherwise its computed based on docFreq.
long count = fieldInfo.indexOptions == IndexOptions.DOCS_AND_FREQS_AND_POSITIONS ? termState.totalTermFreq : termState.docFreq;
//System.out.println(" count=" + count + " threshold=" + maxPositions);
if (count <= maxPositions) {
@ -193,7 +194,7 @@ public class PulsingPostingsReaderImpl extends PostingsReaderBase {
// TODO: -- not great that we can't always reuse
@Override
public DocsAndPositionsEnum docsAndPositions(FieldInfo field, BlockTermState _termState, Bits liveDocs, DocsAndPositionsEnum reuse) throws IOException {
if (field.omitTermFreqAndPositions) {
if (field.indexOptions != IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) {
return null;
}
//System.out.println("D&P: field=" + field.name);
@ -223,7 +224,7 @@ public class PulsingPostingsReaderImpl extends PostingsReaderBase {
private static class PulsingDocsEnum extends DocsEnum {
private final ByteArrayDataInput postings = new ByteArrayDataInput();
private final boolean omitTF;
private final IndexOptions indexOptions;
private final boolean storePayloads;
private Bits liveDocs;
private int docID;
@ -231,7 +232,7 @@ public class PulsingPostingsReaderImpl extends PostingsReaderBase {
private int payloadLength;
public PulsingDocsEnum(FieldInfo fieldInfo) {
omitTF = fieldInfo.omitTermFreqAndPositions;
indexOptions = fieldInfo.indexOptions;
storePayloads = fieldInfo.storePayloads;
}
@ -249,7 +250,7 @@ public class PulsingPostingsReaderImpl extends PostingsReaderBase {
}
boolean canReuse(FieldInfo fieldInfo) {
return omitTF == fieldInfo.omitTermFreqAndPositions && storePayloads == fieldInfo.storePayloads;
return indexOptions == fieldInfo.indexOptions && storePayloads == fieldInfo.storePayloads;
}
@Override
@ -262,7 +263,7 @@ public class PulsingPostingsReaderImpl extends PostingsReaderBase {
}
final int code = postings.readVInt();
if (omitTF) {
if (indexOptions == IndexOptions.DOCS_ONLY) {
docID += code;
} else {
docID += code >>> 1; // shift off low bit
@ -272,22 +273,24 @@ public class PulsingPostingsReaderImpl extends PostingsReaderBase {
freq = postings.readVInt(); // else read freq
}
// Skip positions
if (storePayloads) {
for(int pos=0;pos<freq;pos++) {
final int posCode = postings.readVInt();
if ((posCode & 1) != 0) {
payloadLength = postings.readVInt();
if (indexOptions == IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) {
// Skip positions
if (storePayloads) {
for(int pos=0;pos<freq;pos++) {
final int posCode = postings.readVInt();
if ((posCode & 1) != 0) {
payloadLength = postings.readVInt();
}
if (payloadLength != 0) {
postings.skipBytes(payloadLength);
}
}
if (payloadLength != 0) {
postings.skipBytes(payloadLength);
} else {
for(int pos=0;pos<freq;pos++) {
// TODO: skipVInt
postings.readVInt();
}
}
} else {
for(int pos=0;pos<freq;pos++) {
// TODO: skipVInt
postings.readVInt();
}
}
}

View File

@ -20,6 +20,7 @@ package org.apache.lucene.index.codecs.pulsing;
import java.io.IOException;
import org.apache.lucene.index.FieldInfo;
import org.apache.lucene.index.FieldInfo.IndexOptions;
import org.apache.lucene.index.codecs.PostingsWriterBase;
import org.apache.lucene.index.codecs.TermStats;
import org.apache.lucene.store.IndexOutput;
@ -46,7 +47,7 @@ public final class PulsingPostingsWriterImpl extends PostingsWriterBase {
private IndexOutput termsOut;
private boolean omitTF;
private IndexOptions indexOptions;
private boolean storePayloads;
// one entry per position
@ -102,7 +103,7 @@ public final class PulsingPostingsWriterImpl extends PostingsWriterBase {
// our parent calls setField whenever the field changes
@Override
public void setField(FieldInfo fieldInfo) {
omitTF = fieldInfo.omitTermFreqAndPositions;
this.indexOptions = fieldInfo.indexOptions;
//System.out.println("PW field=" + fieldInfo.name + " omitTF=" + omitTF);
storePayloads = fieldInfo.storePayloads;
wrappedPostingsWriter.setField(fieldInfo);
@ -123,8 +124,11 @@ public final class PulsingPostingsWriterImpl extends PostingsWriterBase {
assert pendingCount < pending.length;
currentDoc = pending[pendingCount];
currentDoc.docID = docID;
if (omitTF) {
if (indexOptions == IndexOptions.DOCS_ONLY) {
pendingCount++;
} else if (indexOptions == IndexOptions.DOCS_AND_FREQS) {
pendingCount++;
currentDoc.termFreq = termDocFreq;
} else {
currentDoc.termFreq = termDocFreq;
}
@ -196,7 +200,7 @@ public final class PulsingPostingsWriterImpl extends PostingsWriterBase {
// given codec wants to store other interesting
// stuff, it could use this pulsing codec to do so
if (!omitTF) {
if (indexOptions == IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) {
int lastDocID = 0;
int pendingIDX = 0;
int lastPayloadLength = -1;
@ -239,7 +243,20 @@ public final class PulsingPostingsWriterImpl extends PostingsWriterBase {
}
}
}
} else {
} else if (indexOptions == IndexOptions.DOCS_AND_FREQS) {
int lastDocID = 0;
for(int posIDX=0;posIDX<pendingCount;posIDX++) {
final Position doc = pending[posIDX];
final int delta = doc.docID - lastDocID;
if (doc.termFreq == 1) {
buffer.writeVInt((delta<<1)|1);
} else {
buffer.writeVInt(delta<<1);
buffer.writeVInt(doc.termFreq);
}
lastDocID = doc.docID;
}
} else if (indexOptions == IndexOptions.DOCS_ONLY) {
int lastDocID = 0;
for(int posIDX=0;posIDX<pendingCount;posIDX++) {
final Position doc = pending[posIDX];
@ -282,7 +299,7 @@ public final class PulsingPostingsWriterImpl extends PostingsWriterBase {
wrappedPostingsWriter.startTerm();
// Flush all buffered docs
if (!omitTF) {
if (indexOptions == IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) {
Position doc = null;
for(Position pos : pending) {
if (doc == null) {
@ -303,7 +320,7 @@ public final class PulsingPostingsWriterImpl extends PostingsWriterBase {
//wrappedPostingsWriter.finishDoc();
} else {
for(Position doc : pending) {
wrappedPostingsWriter.startDoc(doc.docID, 0);
wrappedPostingsWriter.startDoc(doc.docID, indexOptions == IndexOptions.DOCS_ONLY ? 0 : doc.termFreq);
}
}
pendingCount = -1;

View File

@ -23,6 +23,7 @@ import java.util.Collection;
import org.apache.lucene.index.DocsAndPositionsEnum;
import org.apache.lucene.index.DocsEnum;
import org.apache.lucene.index.FieldInfo;
import org.apache.lucene.index.FieldInfo.IndexOptions;
import org.apache.lucene.index.IndexFileNames;
import org.apache.lucene.index.SegmentInfo;
import org.apache.lucene.index.TermState;
@ -68,14 +69,17 @@ public class SepPostingsReaderImpl extends PostingsReaderBase {
skipIn = dir.openInput(IndexFileNames.segmentFileName(segmentInfo.name, codecId, SepPostingsWriterImpl.SKIP_EXTENSION), context);
if (segmentInfo.getFieldInfos().hasFreq()) {
freqIn = intFactory.openInput(dir, IndexFileNames.segmentFileName(segmentInfo.name, codecId, SepPostingsWriterImpl.FREQ_EXTENSION), context);
} else {
freqIn = null;
}
if (segmentInfo.getHasProx()) {
freqIn = intFactory.openInput(dir, IndexFileNames.segmentFileName(segmentInfo.name, codecId, SepPostingsWriterImpl.FREQ_EXTENSION), context);
posIn = intFactory.openInput(dir, IndexFileNames.segmentFileName(segmentInfo.name, codecId, SepPostingsWriterImpl.POS_EXTENSION), context);
payloadIn = dir.openInput(IndexFileNames.segmentFileName(segmentInfo.name, codecId, SepPostingsWriterImpl.PAYLOAD_EXTENSION), context);
} else {
posIn = null;
payloadIn = null;
freqIn = null;
}
success = true;
} finally {
@ -89,8 +93,11 @@ public class SepPostingsReaderImpl extends PostingsReaderBase {
files.add(IndexFileNames.segmentFileName(segmentInfo.name, codecId, SepPostingsWriterImpl.DOC_EXTENSION));
files.add(IndexFileNames.segmentFileName(segmentInfo.name, codecId, SepPostingsWriterImpl.SKIP_EXTENSION));
if (segmentInfo.getHasProx()) {
if (segmentInfo.getFieldInfos().hasFreq()) {
files.add(IndexFileNames.segmentFileName(segmentInfo.name, codecId, SepPostingsWriterImpl.FREQ_EXTENSION));
}
if (segmentInfo.getHasProx()) {
files.add(IndexFileNames.segmentFileName(segmentInfo.name, codecId, SepPostingsWriterImpl.POS_EXTENSION));
files.add(IndexFileNames.segmentFileName(segmentInfo.name, codecId, SepPostingsWriterImpl.PAYLOAD_EXTENSION));
}
@ -229,8 +236,11 @@ public class SepPostingsReaderImpl extends PostingsReaderBase {
final boolean isFirstTerm = termState.termCount == 0;
termState.docIndex.read(termState.bytesReader, isFirstTerm);
//System.out.println(" docIndex=" + termState.docIndex);
if (!fieldInfo.omitTermFreqAndPositions) {
if (fieldInfo.indexOptions != IndexOptions.DOCS_ONLY) {
termState.freqIndex.read(termState.bytesReader, isFirstTerm);
}
if (fieldInfo.indexOptions == IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) {
//System.out.println(" freqIndex=" + termState.freqIndex);
termState.posIndex.read(termState.bytesReader, isFirstTerm);
//System.out.println(" posIndex=" + termState.posIndex);
@ -277,7 +287,7 @@ public class SepPostingsReaderImpl extends PostingsReaderBase {
@Override
public DocsAndPositionsEnum docsAndPositions(FieldInfo fieldInfo, BlockTermState _termState, Bits liveDocs, DocsAndPositionsEnum reuse) throws IOException {
assert !fieldInfo.omitTermFreqAndPositions;
assert fieldInfo.indexOptions == IndexOptions.DOCS_AND_FREQS_AND_POSITIONS;
final SepTermState termState = (SepTermState) _termState;
SepDocsAndPositionsEnum postingsEnum;
if (reuse == null || !(reuse instanceof SepDocsAndPositionsEnum)) {
@ -304,6 +314,7 @@ public class SepPostingsReaderImpl extends PostingsReaderBase {
// TODO: -- should we do omitTF with 2 different enum classes?
private boolean omitTF;
private IndexOptions indexOptions;
private boolean storePayloads;
private Bits liveDocs;
private final IntIndexInput.Reader docReader;
@ -340,7 +351,8 @@ public class SepPostingsReaderImpl extends PostingsReaderBase {
SepDocsEnum init(FieldInfo fieldInfo, SepTermState termState, Bits liveDocs) throws IOException {
this.liveDocs = liveDocs;
omitTF = fieldInfo.omitTermFreqAndPositions;
this.indexOptions = fieldInfo.indexOptions;
omitTF = indexOptions == IndexOptions.DOCS_ONLY;
storePayloads = fieldInfo.storePayloads;
// TODO: can't we only do this if consumer
@ -456,7 +468,7 @@ public class SepPostingsReaderImpl extends PostingsReaderBase {
0,
docFreq,
storePayloads);
skipper.setOmitTF(omitTF);
skipper.setIndexOptions(indexOptions);
skipped = true;
}
@ -633,7 +645,7 @@ public class SepPostingsReaderImpl extends PostingsReaderBase {
payloadFP,
docFreq,
storePayloads);
skipper.setIndexOptions(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS);
skipped = true;
}
final int newCount = skipper.skipTo(target);

View File

@ -23,6 +23,7 @@ import java.util.Set;
import org.apache.lucene.index.CorruptIndexException;
import org.apache.lucene.index.DocsEnum;
import org.apache.lucene.index.FieldInfo;
import org.apache.lucene.index.FieldInfo.IndexOptions;
import org.apache.lucene.index.IndexFileNames;
import org.apache.lucene.index.SegmentWriteState;
import org.apache.lucene.index.codecs.PostingsWriterBase;
@ -86,7 +87,7 @@ public final class SepPostingsWriterImpl extends PostingsWriterBase {
final int totalNumDocs;
boolean storePayloads;
boolean omitTF;
IndexOptions indexOptions;
long lastSkipFP;
@ -121,11 +122,13 @@ public final class SepPostingsWriterImpl extends PostingsWriterBase {
docOut = factory.createOutput(state.directory, docFileName, state.context);
docIndex = docOut.index();
if (state.fieldInfos.hasProx()) {
if (state.fieldInfos.hasFreq()) {
final String frqFileName = IndexFileNames.segmentFileName(state.segmentName, state.codecId, FREQ_EXTENSION);
freqOut = factory.createOutput(state.directory, frqFileName, state.context);
freqIndex = freqOut.index();
}
if (state.fieldInfos.hasProx()) {
final String posFileName = IndexFileNames.segmentFileName(state.segmentName, state.codecId, POS_EXTENSION);
posOut = factory.createOutput(state.directory, posFileName, state.context);
posIndex = posOut.index();
@ -168,12 +171,17 @@ public final class SepPostingsWriterImpl extends PostingsWriterBase {
@Override
public void startTerm() throws IOException {
docIndex.mark();
if (!omitTF) {
if (indexOptions != IndexOptions.DOCS_ONLY) {
freqIndex.mark();
}
if (indexOptions == IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) {
posIndex.mark();
payloadStart = payloadOut.getFilePointer();
lastPayloadLength = -1;
}
skipListWriter.resetSkip(docIndex, freqIndex, posIndex);
}
@ -182,9 +190,9 @@ public final class SepPostingsWriterImpl extends PostingsWriterBase {
@Override
public void setField(FieldInfo fieldInfo) {
this.fieldInfo = fieldInfo;
omitTF = fieldInfo.omitTermFreqAndPositions;
skipListWriter.setOmitTF(omitTF);
storePayloads = !omitTF && fieldInfo.storePayloads;
this.indexOptions = fieldInfo.indexOptions;
skipListWriter.setIndexOptions(indexOptions);
storePayloads = indexOptions == IndexOptions.DOCS_AND_FREQS_AND_POSITIONS && fieldInfo.storePayloads;
}
/** Adds a new doc in this term. If this returns null
@ -209,7 +217,7 @@ public final class SepPostingsWriterImpl extends PostingsWriterBase {
lastDocID = docID;
docOut.write(delta);
if (!omitTF) {
if (indexOptions != IndexOptions.DOCS_ONLY) {
//System.out.println(" sepw startDoc: write freq=" + termDocFreq);
freqOut.write(termDocFreq);
}
@ -227,7 +235,7 @@ public final class SepPostingsWriterImpl extends PostingsWriterBase {
/** Add a new position & payload */
@Override
public void addPosition(int position, BytesRef payload) throws IOException {
assert !omitTF;
assert indexOptions == IndexOptions.DOCS_AND_FREQS_AND_POSITIONS;
final int delta = position - lastPosition;
assert delta >= 0: "position=" + position + " lastPosition=" + lastPosition; // not quite right (if pos=0 is repeated twice we don't catch it)
@ -274,10 +282,12 @@ public final class SepPostingsWriterImpl extends PostingsWriterBase {
docIndex.write(indexBytesWriter, isFirstTerm);
//System.out.println(" docIndex=" + docIndex);
if (!omitTF) {
if (indexOptions != IndexOptions.DOCS_ONLY) {
freqIndex.write(indexBytesWriter, isFirstTerm);
//System.out.println(" freqIndex=" + freqIndex);
}
if (indexOptions == IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) {
posIndex.write(indexBytesWriter, isFirstTerm);
//System.out.println(" posIndex=" + posIndex);
if (storePayloads) {

View File

@ -21,6 +21,7 @@ import java.io.IOException;
import java.util.Arrays;
import org.apache.lucene.store.IndexInput;
import org.apache.lucene.index.FieldInfo.IndexOptions;
import org.apache.lucene.index.codecs.MultiLevelSkipListReader;
/**
@ -87,10 +88,10 @@ class SepSkipListReader extends MultiLevelSkipListReader {
}
}
boolean omitTF;
IndexOptions indexOptions;
void setOmitTF(boolean v) {
omitTF = v;
void setIndexOptions(IndexOptions v) {
indexOptions = v;
}
void init(long skipPointer,
@ -177,7 +178,7 @@ class SepSkipListReader extends MultiLevelSkipListReader {
@Override
protected int readSkipData(int level, IndexInput skipStream) throws IOException {
int delta;
assert !omitTF || !currentFieldStoresPayloads;
assert indexOptions == IndexOptions.DOCS_AND_FREQS_AND_POSITIONS || !currentFieldStoresPayloads;
if (currentFieldStoresPayloads) {
// the current field stores payloads.
// if the doc delta is odd then we have
@ -192,11 +193,11 @@ class SepSkipListReader extends MultiLevelSkipListReader {
} else {
delta = skipStream.readVInt();
}
if (!omitTF) {
if (indexOptions != IndexOptions.DOCS_ONLY) {
freqIndex[level].read(skipStream, false);
}
docIndex[level].read(skipStream, false);
if (!omitTF) {
if (indexOptions == IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) {
posIndex[level].read(skipStream, false);
if (currentFieldStoresPayloads) {
payloadPointer[level] += skipStream.readVInt();

View File

@ -21,6 +21,7 @@ import java.io.IOException;
import java.util.Arrays;
import org.apache.lucene.store.IndexOutput;
import org.apache.lucene.index.FieldInfo.IndexOptions;
import org.apache.lucene.index.codecs.MultiLevelSkipListWriter;
// TODO: -- skip data should somehow be more local to the
@ -84,10 +85,10 @@ class SepSkipListWriter extends MultiLevelSkipListWriter {
}
}
boolean omitTF;
IndexOptions indexOptions;
void setOmitTF(boolean v) {
omitTF = v;
void setIndexOptions(IndexOptions v) {
indexOptions = v;
}
void setPosOutput(IntIndexOutput posOutput) throws IOException {
@ -159,7 +160,7 @@ class SepSkipListWriter extends MultiLevelSkipListWriter {
// current payload length equals the length at the previous
// skip point
assert !omitTF || !curStorePayloads;
assert indexOptions == IndexOptions.DOCS_AND_FREQS_AND_POSITIONS || !curStorePayloads;
if (curStorePayloads) {
int delta = curDoc - lastSkipDoc[level];
@ -179,13 +180,13 @@ class SepSkipListWriter extends MultiLevelSkipListWriter {
skipBuffer.writeVInt(curDoc - lastSkipDoc[level]);
}
if (!omitTF) {
if (indexOptions != IndexOptions.DOCS_ONLY) {
freqIndex[level].mark();
freqIndex[level].write(skipBuffer, false);
}
docIndex[level].mark();
docIndex[level].write(skipBuffer, false);
if (!omitTF) {
if (indexOptions == IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) {
posIndex[level].mark();
posIndex[level].write(skipBuffer, false);
if (curStorePayloads) {

View File

@ -19,6 +19,7 @@ package org.apache.lucene.index.codecs.simpletext;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.index.codecs.FieldsProducer;
import org.apache.lucene.index.FieldInfo.IndexOptions;
import org.apache.lucene.index.SegmentReadState;
import org.apache.lucene.index.FieldsEnum;
import org.apache.lucene.index.Terms;
@ -53,6 +54,7 @@ class SimpleTextFieldsReader extends FieldsProducer {
final static BytesRef FIELD = SimpleTextFieldsWriter.FIELD;
final static BytesRef TERM = SimpleTextFieldsWriter.TERM;
final static BytesRef DOC = SimpleTextFieldsWriter.DOC;
final static BytesRef FREQ = SimpleTextFieldsWriter.FREQ;
final static BytesRef POS = SimpleTextFieldsWriter.POS;
final static BytesRef PAYLOAD = SimpleTextFieldsWriter.PAYLOAD;
@ -114,16 +116,16 @@ class SimpleTextFieldsReader extends FieldsProducer {
private class SimpleTextTermsEnum extends TermsEnum {
private final IndexInput in;
private final boolean omitTF;
private final IndexOptions indexOptions;
private int docFreq;
private long totalTermFreq;
private long docsStart;
private boolean ended;
private final BytesRefFSTEnum<PairOutputs.Pair<Long,PairOutputs.Pair<Long,Long>>> fstEnum;
public SimpleTextTermsEnum(FST<PairOutputs.Pair<Long,PairOutputs.Pair<Long,Long>>> fst, boolean omitTF) throws IOException {
public SimpleTextTermsEnum(FST<PairOutputs.Pair<Long,PairOutputs.Pair<Long,Long>>> fst, IndexOptions indexOptions) throws IOException {
this.in = (IndexInput) SimpleTextFieldsReader.this.in.clone();
this.omitTF = omitTF;
this.indexOptions = indexOptions;
fstEnum = new BytesRefFSTEnum<PairOutputs.Pair<Long,PairOutputs.Pair<Long,Long>>>(fst);
}
@ -218,12 +220,12 @@ class SimpleTextFieldsReader extends FieldsProducer {
} else {
docsEnum = new SimpleTextDocsEnum();
}
return docsEnum.reset(docsStart, liveDocs, omitTF);
return docsEnum.reset(docsStart, liveDocs, indexOptions == IndexOptions.DOCS_ONLY);
}
@Override
public DocsAndPositionsEnum docsAndPositions(Bits liveDocs, DocsAndPositionsEnum reuse) throws IOException {
if (omitTF) {
if (indexOptions != IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) {
return null;
}
@ -303,8 +305,11 @@ class SimpleTextFieldsReader extends FieldsProducer {
docID = ArrayUtil.parseInt(scratchUTF16.chars, 0, scratchUTF16.length);
termFreq = 0;
first = false;
} else if (scratch.startsWith(FREQ)) {
UnicodeUtil.UTF8toUTF16(scratch.bytes, scratch.offset+FREQ.length, scratch.length-FREQ.length, scratchUTF16);
termFreq = ArrayUtil.parseInt(scratchUTF16.chars, 0, scratchUTF16.length);
} else if (scratch.startsWith(POS)) {
termFreq++;
// skip termFreq++;
} else if (scratch.startsWith(PAYLOAD)) {
// skip
} else {
@ -384,10 +389,13 @@ class SimpleTextFieldsReader extends FieldsProducer {
UnicodeUtil.UTF8toUTF16(scratch.bytes, scratch.offset+DOC.length, scratch.length-DOC.length, scratchUTF16);
docID = ArrayUtil.parseInt(scratchUTF16.chars, 0, scratchUTF16.length);
tf = 0;
posStart = in.getFilePointer();
first = false;
} else if (scratch.startsWith(FREQ)) {
UnicodeUtil.UTF8toUTF16(scratch.bytes, scratch.offset+FREQ.length, scratch.length-FREQ.length, scratchUTF16);
tf = ArrayUtil.parseInt(scratchUTF16.chars, 0, scratchUTF16.length);
posStart = in.getFilePointer();
} else if (scratch.startsWith(POS)) {
tf++;
// skip
} else if (scratch.startsWith(PAYLOAD)) {
// skip
} else {
@ -461,7 +469,7 @@ class SimpleTextFieldsReader extends FieldsProducer {
private class SimpleTextTerms extends Terms {
private final long termsStart;
private final boolean omitTF;
private final IndexOptions indexOptions;
private long sumTotalTermFreq;
private long sumDocFreq;
private FST<PairOutputs.Pair<Long,PairOutputs.Pair<Long,Long>>> fst;
@ -470,7 +478,7 @@ class SimpleTextFieldsReader extends FieldsProducer {
public SimpleTextTerms(String field, long termsStart) throws IOException {
this.termsStart = termsStart;
omitTF = fieldInfos.fieldInfo(field).omitTermFreqAndPositions;
indexOptions = fieldInfos.fieldInfo(field).indexOptions;
loadTerms();
}
@ -533,7 +541,7 @@ class SimpleTextFieldsReader extends FieldsProducer {
@Override
public TermsEnum iterator() throws IOException {
if (fst != null) {
return new SimpleTextTermsEnum(fst, omitTF);
return new SimpleTextTermsEnum(fst, indexOptions);
} else {
return TermsEnum.EMPTY;
}

View File

@ -24,6 +24,7 @@ import org.apache.lucene.index.codecs.TermsConsumer;
import org.apache.lucene.index.codecs.PostingsConsumer;
import org.apache.lucene.index.codecs.TermStats;
import org.apache.lucene.index.FieldInfo;
import org.apache.lucene.index.FieldInfo.IndexOptions;
import org.apache.lucene.index.SegmentWriteState;
import org.apache.lucene.store.IndexOutput;
@ -41,6 +42,7 @@ class SimpleTextFieldsWriter extends FieldsConsumer {
final static BytesRef FIELD = new BytesRef("field ");
final static BytesRef TERM = new BytesRef(" term ");
final static BytesRef DOC = new BytesRef(" doc ");
final static BytesRef FREQ = new BytesRef(" freq ");
final static BytesRef POS = new BytesRef(" pos ");
final static BytesRef PAYLOAD = new BytesRef(" payload ");
@ -73,11 +75,15 @@ class SimpleTextFieldsWriter extends FieldsConsumer {
write(FIELD);
write(field.name);
out.writeByte(NEWLINE);
return new SimpleTextTermsWriter();
return new SimpleTextTermsWriter(field);
}
private class SimpleTextTermsWriter extends TermsConsumer {
private final SimpleTextPostingsWriter postingsWriter = new SimpleTextPostingsWriter();
private final SimpleTextPostingsWriter postingsWriter;
public SimpleTextTermsWriter(FieldInfo field) {
postingsWriter = new SimpleTextPostingsWriter(field);
}
@Override
public PostingsConsumer startTerm(BytesRef term) throws IOException {
@ -101,7 +107,12 @@ class SimpleTextFieldsWriter extends FieldsConsumer {
private class SimpleTextPostingsWriter extends PostingsConsumer {
private BytesRef term;
private boolean wroteTerm;
private IndexOptions indexOptions;
public SimpleTextPostingsWriter(FieldInfo field) {
this.indexOptions = field.indexOptions;
}
@Override
public void startDoc(int docID, int termDocFreq) throws IOException {
if (!wroteTerm) {
@ -115,7 +126,14 @@ class SimpleTextFieldsWriter extends FieldsConsumer {
write(DOC);
write(Integer.toString(docID));
newline();
if (indexOptions != IndexOptions.DOCS_ONLY) {
write(FREQ);
write(Integer.toString(termDocFreq));
newline();
}
}
public PostingsConsumer reset(BytesRef term) {
this.term = term;

View File

@ -23,6 +23,7 @@ import java.util.Collection;
import org.apache.lucene.index.DocsAndPositionsEnum;
import org.apache.lucene.index.DocsEnum;
import org.apache.lucene.index.FieldInfo;
import org.apache.lucene.index.FieldInfo.IndexOptions;
import org.apache.lucene.index.IndexFileNames;
import org.apache.lucene.index.SegmentInfo;
import org.apache.lucene.index.TermState;
@ -190,7 +191,7 @@ public class StandardPostingsReader extends PostingsReaderBase {
// undefined
}
if (!fieldInfo.omitTermFreqAndPositions) {
if (fieldInfo.indexOptions == IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) {
if (isFirstTerm) {
termState.proxOffset = termState.bytesReader.readVLong();
} else {
@ -219,7 +220,7 @@ public class StandardPostingsReader extends PostingsReaderBase {
@Override
public DocsAndPositionsEnum docsAndPositions(FieldInfo fieldInfo, BlockTermState termState, Bits liveDocs, DocsAndPositionsEnum reuse) throws IOException {
if (fieldInfo.omitTermFreqAndPositions) {
if (fieldInfo.indexOptions != IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) {
return null;
}
@ -282,7 +283,7 @@ public class StandardPostingsReader extends PostingsReaderBase {
}
public SegmentDocsEnum reset(FieldInfo fieldInfo, StandardTermState termState, Bits liveDocs) throws IOException {
omitTF = fieldInfo.omitTermFreqAndPositions;
omitTF = fieldInfo.indexOptions == IndexOptions.DOCS_ONLY;
if (omitTF) {
freq = 1;
}
@ -455,7 +456,7 @@ public class StandardPostingsReader extends PostingsReaderBase {
}
public SegmentDocsAndPositionsEnum reset(FieldInfo fieldInfo, StandardTermState termState, Bits liveDocs) throws IOException {
assert !fieldInfo.omitTermFreqAndPositions;
assert fieldInfo.indexOptions == IndexOptions.DOCS_AND_FREQS_AND_POSITIONS;
assert !fieldInfo.storePayloads;
this.liveDocs = liveDocs;
@ -649,7 +650,7 @@ public class StandardPostingsReader extends PostingsReaderBase {
}
public SegmentDocsAndPositionsAndPayloadsEnum reset(FieldInfo fieldInfo, StandardTermState termState, Bits liveDocs) throws IOException {
assert !fieldInfo.omitTermFreqAndPositions;
assert fieldInfo.indexOptions == IndexOptions.DOCS_AND_FREQS_AND_POSITIONS;
assert fieldInfo.storePayloads;
if (payload == null) {
payload = new BytesRef();

View File

@ -25,6 +25,7 @@ import java.io.IOException;
import org.apache.lucene.index.CorruptIndexException;
import org.apache.lucene.index.DocsEnum;
import org.apache.lucene.index.FieldInfo;
import org.apache.lucene.index.FieldInfo.IndexOptions;
import org.apache.lucene.index.IndexFileNames;
import org.apache.lucene.index.SegmentWriteState;
import org.apache.lucene.index.codecs.PostingsWriterBase;
@ -66,7 +67,7 @@ public final class StandardPostingsWriter extends PostingsWriterBase {
final int totalNumDocs;
IndexOutput termsOut;
boolean omitTermFreqAndPositions;
IndexOptions indexOptions;
boolean storePayloads;
// Starts a new term
long lastFreqStart;
@ -144,7 +145,7 @@ public final class StandardPostingsWriter extends PostingsWriterBase {
public void setField(FieldInfo fieldInfo) {
//System.out.println("SPW: setField");
this.fieldInfo = fieldInfo;
omitTermFreqAndPositions = fieldInfo.omitTermFreqAndPositions;
indexOptions = fieldInfo.indexOptions;
storePayloads = fieldInfo.storePayloads;
//System.out.println(" set init blockFreqStart=" + freqStart);
//System.out.println(" set init blockProxStart=" + proxStart);
@ -173,7 +174,7 @@ public final class StandardPostingsWriter extends PostingsWriterBase {
assert docID < totalNumDocs: "docID=" + docID + " totalNumDocs=" + totalNumDocs;
lastDocID = docID;
if (omitTermFreqAndPositions) {
if (indexOptions == IndexOptions.DOCS_ONLY) {
freqOut.writeVInt(delta);
} else if (1 == termDocFreq) {
freqOut.writeVInt((delta<<1) | 1);
@ -189,7 +190,7 @@ public final class StandardPostingsWriter extends PostingsWriterBase {
@Override
public void addPosition(int position, BytesRef payload) throws IOException {
//System.out.println("StandardW: addPos pos=" + position + " payload=" + (payload == null ? "null" : (payload.length + " bytes")) + " proxFP=" + proxOut.getFilePointer());
assert !omitTermFreqAndPositions: "omitTermFreqAndPositions is true";
assert indexOptions == IndexOptions.DOCS_AND_FREQS_AND_POSITIONS: "invalid indexOptions: " + indexOptions;
assert proxOut != null;
final int delta = position - lastPosition;
@ -246,7 +247,7 @@ public final class StandardPostingsWriter extends PostingsWriterBase {
bytesWriter.writeVInt((int) (skipListWriter.writeSkip(freqOut)-freqStart));
}
if (!omitTermFreqAndPositions) {
if (indexOptions == IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) {
//System.out.println(" proxFP=" + proxStart);
if (isFirstTerm) {
bytesWriter.writeVLong(proxStart);

View File

@ -195,7 +195,7 @@ public class MultiPhraseQuery extends Query {
if (postingsEnum == null) {
if (reader.termDocsEnum(liveDocs, term.field(), term.bytes()) != null) {
// term does exist, but has no positions
throw new IllegalStateException("field \"" + term.field() + "\" was indexed with Field.omitTermFreqAndPositions=true; cannot run PhraseQuery (term=" + term.text() + ")");
throw new IllegalStateException("field \"" + term.field() + "\" was indexed without position data; cannot run PhraseQuery (term=" + term.text() + ")");
} else {
// term does not exist
return null;
@ -443,7 +443,7 @@ class UnionDocsAndPositionsEnum extends DocsAndPositionsEnum {
} else {
if (indexReader.termDocsEnum(liveDocs, terms[i].field(), terms[i].bytes()) != null) {
// term does exist, but has no positions
throw new IllegalStateException("field \"" + terms[i].field() + "\" was indexed with Field.omitTermFreqAndPositions=true; cannot run PhraseQuery (term=" + terms[i].text() + ")");
throw new IllegalStateException("field \"" + terms[i].field() + "\" was indexed without position data; cannot run PhraseQuery (term=" + terms[i].text() + ")");
}
}
}

View File

@ -229,7 +229,7 @@ public class PhraseQuery extends Query {
if (postingsEnum == null) {
assert (reader.termDocsEnum(liveDocs, t.field(), t.bytes(), state) != null) : "termstate found but no term exists in reader";
// term does exist, but has no positions
throw new IllegalStateException("field \"" + t.field() + "\" was indexed with Field.omitTermFreqAndPositions=true; cannot run PhraseQuery (term=" + t.text() + ")");
throw new IllegalStateException("field \"" + t.field() + "\" was indexed without position data; cannot run PhraseQuery (term=" + t.text() + ")");
}
// get the docFreq without seeking
TermsEnum te = reader.fields().terms(field).getThreadTermsEnum();

View File

@ -92,7 +92,7 @@ public class SpanTermQuery extends SpanQuery {
} else {
if (reader.termDocsEnum(reader.getLiveDocs(), term.field(), term.bytes()) != null) {
// term does exist, but has no positions
throw new IllegalStateException("field \"" + term.field() + "\" was indexed with Field.omitTermFreqAndPositions=true; cannot run SpanTermQuery (term=" + term.text() + ")");
throw new IllegalStateException("field \"" + term.field() + "\" was indexed without position data; cannot run SpanTermQuery (term=" + term.text() + ")");
} else {
// term does not exist
return TermSpans.EMPTY_TERM_SPANS;

View File

@ -99,6 +99,10 @@
to stored fields file, previously they were stored in
text format only.
</p>
<p>
In version 3.4, fields can omit position data while
still indexing term frequencies.
</p>
</section>
<section id="Definitions"><title>Definitions</title>
@ -276,7 +280,7 @@
<p>Term Frequency
data. For each term in the dictionary, the numbers of all the
documents that contain that term, and the frequency of the term in
that document if omitTf is false.
that document, unless frequencies are omitted (IndexOptions.DOCS_ONLY)
</p>
</li>
@ -284,8 +288,7 @@
<p>Term Proximity
data. For each term in the dictionary, the positions that the term
occurs in each document. Note that this will
not exist if all fields in all documents set
omitTf to true.
not exist if all fields in all documents omit position data.
</p>
</li>
@ -1080,7 +1083,7 @@
<p>
HasProx is 1 if any fields in this segment have
omitTf set to false; else, it's 0.
position data (IndexOptions.DOCS_AND_FREQS_AND_POSITIONS); else, it's 0.
</p>
<p>
@ -1217,11 +1220,13 @@
<li>If the fourth lowest-order bit is set (0x08), term offsets are stored with the term vectors.</li>
<li>If the fifth lowest-order bit is set (0x10), norms are omitted for the indexed field.</li>
<li>If the sixth lowest-order bit is set (0x20), payloads are stored for the indexed field.</li>
<li>If the seventh lowest-order bit is set (0x40), term frequencies and positions omitted for the indexed field.</li>
<li>If the eighth lowest-order bit is set (0x80), positions are omitted for the indexed field.</li>
</ul>
</p>
<p>
FNMVersion (added in 2.9) is always -2.
FNMVersion (added in 2.9) is -2 for indexes from 2.9 - 3.3. It is -3 for indexes in Lucene 3.4+
</p>
<p>
@ -1419,7 +1424,7 @@
file. In particular, it is the difference between the position of
this term's data in that file and the position of the previous
term's data (or zero, for the first term in the file. For fields
with omitTf true, this will be 0 since
that omit position data, this will be 0 since
prox information is not stored.
</p>
<p>SkipDelta determines the position of this
@ -1494,7 +1499,7 @@
<p>
The .frq file contains the lists of documents
which contain each term, along with the frequency of the term in that
document (if omitTf is false).
document (except when frequencies are omitted: IndexOptions.DOCS_ONLY).
</p>
<p>FreqFile (.frq) --&gt;
&lt;TermFreqs, SkipData&gt;
@ -1531,26 +1536,26 @@
<p>TermFreq
entries are ordered by increasing document number.
</p>
<p>DocDelta: if omitTf is false, this determines both
<p>DocDelta: if frequencies are indexed, this determines both
the document number and the frequency. In
particular, DocDelta/2 is the difference between
this document number and the previous document
number (or zero when this is the first document in
a TermFreqs). When DocDelta is odd, the frequency
is one. When DocDelta is even, the frequency is
read as another VInt. If omitTf is true, DocDelta
read as another VInt. If frequencies are omitted, DocDelta
contains the gap (not multiplied by 2) between
document numbers and no frequency information is
stored.
</p>
<p>For example, the TermFreqs for a term which occurs
once in document seven and three times in document
eleven, with omitTf false, would be the following
eleven, with frequencies indexed, would be the following
sequence of VInts:
</p>
<p>15, 8, 3
</p>
<p> If omitTf were true it would be this sequence
<p> If frequencies were omitted (IndexOptions.DOCS_ONLY) it would be this sequence
of VInts instead:
</p>
<p>
@ -1621,9 +1626,9 @@
<p>
The .prx file contains the lists of positions that
each term occurs at within documents. Note that
fields with omitTf true do not store
fields omitting positional data do not store
anything into this file, and if all fields in the
index have omitTf true then the .prx file will not
index omit positional data then the .prx file will not
exist.
</p>
<p>ProxFile (.prx) --&gt;

View File

@ -29,6 +29,7 @@ import org.apache.lucene.analysis.MockTokenizer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.Fieldable;
import org.apache.lucene.index.FieldInfo.IndexOptions;
import org.apache.lucene.search.SimilarityProvider;
import org.apache.lucene.store.Directory;
import org.apache.lucene.util.LuceneTestCase;
@ -67,7 +68,7 @@ class DocHelper {
public static Field noTFField = new Field(NO_TF_KEY, NO_TF_TEXT,
Field.Store.YES, Field.Index.ANALYZED);
static {
noTFField.setOmitTermFreqAndPositions(true);
noTFField.setIndexOptions(IndexOptions.DOCS_ONLY);
}
public static final String UNINDEXED_FIELD_TEXT = "unindexed field text";
@ -173,7 +174,7 @@ class DocHelper {
if (f.isStored()) add(stored,f);
else add(unstored,f);
if (f.getOmitNorms()) add(noNorms,f);
if (f.getOmitTermFreqAndPositions()) add(noTf,f);
if (f.getIndexOptions() == IndexOptions.DOCS_ONLY) add(noTf,f);
if (f.isLazy()) add(lazy, f);
}
}

View File

@ -22,6 +22,7 @@ import java.util.Comparator;
import org.apache.lucene.index.CorruptIndexException;
import org.apache.lucene.index.FieldInfo;
import org.apache.lucene.index.FieldInfo.IndexOptions;
import org.apache.lucene.index.IndexFileNames;
import org.apache.lucene.index.SegmentWriteState;
import org.apache.lucene.index.codecs.FieldsConsumer;
@ -90,7 +91,7 @@ class PreFlexFieldsWriter extends FieldsConsumer {
public PreFlexTermsWriter(FieldInfo fieldInfo) {
this.fieldInfo = fieldInfo;
omitTF = fieldInfo.omitTermFreqAndPositions;
omitTF = fieldInfo.indexOptions == IndexOptions.DOCS_ONLY;
storePayloads = fieldInfo.storePayloads;
}

View File

@ -422,7 +422,7 @@ public class _TestUtil {
List<Fieldable> fields = doc.getFields();
for (Fieldable field : fields) {
fieldInfos.addOrUpdate(field.name(), field.isIndexed(), field.isTermVectorStored(), field.isStorePositionWithTermVector(),
field.isStoreOffsetWithTermVector(), field.getOmitNorms(), false, field.getOmitTermFreqAndPositions(), field.docValuesType());
field.isStoreOffsetWithTermVector(), field.getOmitNorms(), false, field.getIndexOptions(), field.docValuesType());
}
}
@ -507,7 +507,7 @@ public class _TestUtil {
field1.isStored() ? Field.Store.YES : Field.Store.NO,
field1.isIndexed() ? (field1.isTokenized() ? Field.Index.ANALYZED : Field.Index.NOT_ANALYZED) : Field.Index.NO);
field2.setOmitNorms(field1.getOmitNorms());
field2.setOmitTermFreqAndPositions(field1.getOmitTermFreqAndPositions());
field2.setIndexOptions(field1.getIndexOptions());
doc2.add(field2);
}

View File

@ -24,6 +24,7 @@ import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.index.FieldInfo.IndexOptions;
import org.apache.lucene.index.codecs.CodecProvider;
import org.apache.lucene.store.MockDirectoryWrapper;
import org.apache.lucene.util.LuceneTestCase;
@ -62,7 +63,7 @@ public class Test2BPostings extends LuceneTestCase {
Document doc = new Document();
Field field = new Field("field", new MyTokenStream());
field.setOmitTermFreqAndPositions(true);
field.setIndexOptions(IndexOptions.DOCS_ONLY);
field.setOmitNorms(true);
doc.add(field);

View File

@ -23,6 +23,7 @@ import org.apache.lucene.search.*;
import org.apache.lucene.analysis.*;
import org.apache.lucene.analysis.tokenattributes.*;
import org.apache.lucene.document.*;
import org.apache.lucene.index.FieldInfo.IndexOptions;
import org.apache.lucene.index.codecs.CodecProvider;
import java.io.File;
import java.io.IOException;
@ -177,7 +178,7 @@ public class Test2BTerms extends LuceneTestCase {
Document doc = new Document();
final MyTokenStream ts = new MyTokenStream(random, TERMS_PER_DOC);
Field field = new Field("field", ts);
field.setOmitTermFreqAndPositions(true);
field.setIndexOptions(IndexOptions.DOCS_ONLY);
field.setOmitNorms(true);
doc.add(field);
//w.setInfoStream(System.out);

View File

@ -31,6 +31,7 @@ import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.Fieldable;
import org.apache.lucene.document.NumericField;
import org.apache.lucene.index.FieldInfo.IndexOptions;
import org.apache.lucene.index.IndexWriterConfig.OpenMode;
import org.apache.lucene.search.DefaultSimilarity;
import org.apache.lucene.search.DocIdSetIterator;
@ -606,10 +607,10 @@ public class TestBackwardsCompatibility extends LuceneTestCase {
private void addNoProxDoc(IndexWriter writer) throws IOException {
Document doc = new Document();
Field f = new Field("content3", "aaa", Field.Store.YES, Field.Index.ANALYZED);
f.setOmitTermFreqAndPositions(true);
f.setIndexOptions(IndexOptions.DOCS_ONLY);
doc.add(f);
f = new Field("content4", "aaa", Field.Store.YES, Field.Index.NO);
f.setOmitTermFreqAndPositions(true);
f.setIndexOptions(IndexOptions.DOCS_ONLY);
doc.add(f);
writer.addDocument(doc);
}

View File

@ -25,6 +25,7 @@ import org.apache.lucene.analysis.MockAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.Field.Store;
import org.apache.lucene.index.FieldInfo.IndexOptions;
import org.apache.lucene.index.codecs.CodecProvider;
import org.apache.lucene.index.codecs.FieldsConsumer;
import org.apache.lucene.index.codecs.FieldsProducer;
@ -84,7 +85,8 @@ public class TestCodecs extends LuceneTestCase {
this.storePayloads = storePayloads;
fieldInfos.addOrUpdate(name, true);
fieldInfo = fieldInfos.fieldInfo(name);
fieldInfo.omitTermFreqAndPositions = omitTF;
// TODO: change this test to use all three
fieldInfo.indexOptions = omitTF ? IndexOptions.DOCS_ONLY : IndexOptions.DOCS_AND_FREQS_AND_POSITIONS;
fieldInfo.storePayloads = storePayloads;
this.terms = terms;
for(int i=0;i<terms.length;i++)

View File

@ -34,6 +34,7 @@ import org.apache.lucene.document.Field.Index;
import org.apache.lucene.document.Field.Store;
import org.apache.lucene.document.Field.TermVector;
import org.apache.lucene.document.Fieldable;
import org.apache.lucene.index.FieldInfo.IndexOptions;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.IOContext;
import org.apache.lucene.store.IOContext.Context;
@ -303,7 +304,7 @@ public class TestDocumentWriter extends LuceneTestCase {
doc.add(newField("f1", "v2", Store.YES, Index.NO));
// f2 has no TF
Field f = newField("f2", "v1", Store.NO, Index.ANALYZED);
f.setOmitTermFreqAndPositions(true);
f.setIndexOptions(IndexOptions.DOCS_ONLY);
doc.add(f);
doc.add(newField("f2", "v2", Store.YES, Index.NO));
@ -319,10 +320,10 @@ public class TestDocumentWriter extends LuceneTestCase {
FieldInfos fi = reader.fieldInfos();
// f1
assertFalse("f1 should have no norms", reader.hasNorms("f1"));
assertFalse("omitTermFreqAndPositions field bit should not be set for f1", fi.fieldInfo("f1").omitTermFreqAndPositions);
assertEquals("omitTermFreqAndPositions field bit should not be set for f1", IndexOptions.DOCS_AND_FREQS_AND_POSITIONS, fi.fieldInfo("f1").indexOptions);
// f2
assertTrue("f2 should have norms", reader.hasNorms("f2"));
assertTrue("omitTermFreqAndPositions field bit should be set for f2", fi.fieldInfo("f2").omitTermFreqAndPositions);
assertEquals("omitTermFreqAndPositions field bit should be set for f2", IndexOptions.DOCS_ONLY, fi.fieldInfo("f2").indexOptions);
reader.close();
}
}

View File

@ -20,6 +20,7 @@ package org.apache.lucene.index;
import org.apache.lucene.util.LuceneTestCase;
import org.apache.lucene.util._TestUtil;
import org.apache.lucene.document.Document;
import org.apache.lucene.index.FieldInfo.IndexOptions;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.IndexOutput;
@ -137,7 +138,7 @@ public class TestFieldInfos extends LuceneTestCase {
try {
readOnly.addOrUpdate("bogus", random.nextBoolean(), random.nextBoolean(),
random.nextBoolean(), random.nextBoolean(), random.nextBoolean(),
random.nextBoolean(), random.nextBoolean(), null);
random.nextBoolean(), random.nextBoolean() ? IndexOptions.DOCS_ONLY : IndexOptions.DOCS_AND_FREQS_AND_POSITIONS, null);
fail("instance should be read only");
} catch (IllegalStateException e) {
// expected

View File

@ -30,6 +30,7 @@ import org.apache.lucene.document.FieldSelectorResult;
import org.apache.lucene.document.Fieldable;
import org.apache.lucene.document.LoadFirstFieldSelector;
import org.apache.lucene.document.SetBasedFieldSelector;
import org.apache.lucene.index.FieldInfo.IndexOptions;
import org.apache.lucene.index.IndexWriterConfig.OpenMode;
import org.apache.lucene.search.FieldCache;
import org.apache.lucene.store.AlreadyClosedException;
@ -91,7 +92,7 @@ public class TestFieldsReader extends LuceneTestCase {
assertTrue(field.isStoreOffsetWithTermVector() == true);
assertTrue(field.isStorePositionWithTermVector() == true);
assertTrue(field.getOmitNorms() == false);
assertTrue(field.getOmitTermFreqAndPositions() == false);
assertTrue(field.getIndexOptions() == IndexOptions.DOCS_AND_FREQS_AND_POSITIONS);
field = doc.getField(DocHelper.TEXT_FIELD_3_KEY);
assertTrue(field != null);
@ -99,7 +100,7 @@ public class TestFieldsReader extends LuceneTestCase {
assertTrue(field.isStoreOffsetWithTermVector() == false);
assertTrue(field.isStorePositionWithTermVector() == false);
assertTrue(field.getOmitNorms() == true);
assertTrue(field.getOmitTermFreqAndPositions() == false);
assertTrue(field.getIndexOptions() == IndexOptions.DOCS_AND_FREQS_AND_POSITIONS);
field = doc.getField(DocHelper.NO_TF_KEY);
assertTrue(field != null);
@ -107,7 +108,7 @@ public class TestFieldsReader extends LuceneTestCase {
assertTrue(field.isStoreOffsetWithTermVector() == false);
assertTrue(field.isStorePositionWithTermVector() == false);
assertTrue(field.getOmitNorms() == false);
assertTrue(field.getOmitTermFreqAndPositions() == true);
assertTrue(field.getIndexOptions() == IndexOptions.DOCS_ONLY);
reader.close();
}

View File

@ -26,6 +26,7 @@ import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.tokenattributes.TermToBytesRefAttribute;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.index.FieldInfo.IndexOptions;
import org.apache.lucene.index.codecs.CodecProvider;
import org.apache.lucene.store.Directory;
import org.apache.lucene.util.BytesRef;
@ -63,8 +64,8 @@ public class TestLongPostings extends LuceneTestCase {
}
public void testLongPostings() throws Exception {
assumeFalse("Too slow with SimpleText codec", CodecProvider.getDefault().getFieldCodec("field").equals("SimpleText"));
assumeFalse("Too slow with Memory codec", CodecProvider.getDefault().getFieldCodec("field").equals("Memory"));
assumeFalse("Too slow with SimpleText codec at night", TEST_NIGHTLY && CodecProvider.getDefault().getFieldCodec("field").equals("SimpleText"));
assumeFalse("Too slow with Memory codec at night", TEST_NIGHTLY && CodecProvider.getDefault().getFieldCodec("field").equals("Memory"));
// Don't use _TestUtil.getTempDir so that we own the
// randomness (ie same seed will point to same dir):
@ -250,4 +251,187 @@ public class TestLongPostings extends LuceneTestCase {
r.close();
dir.close();
}
// a weaker form of testLongPostings, that doesnt check positions
public void testLongPostingsNoPositions() throws Exception {
doTestLongPostingsNoPositions(IndexOptions.DOCS_ONLY);
doTestLongPostingsNoPositions(IndexOptions.DOCS_AND_FREQS);
}
public void doTestLongPostingsNoPositions(IndexOptions options) throws Exception {
assumeFalse("Too slow with SimpleText codec at night", TEST_NIGHTLY && CodecProvider.getDefault().getFieldCodec("field").equals("SimpleText"));
assumeFalse("Too slow with Memory codec at night", TEST_NIGHTLY && CodecProvider.getDefault().getFieldCodec("field").equals("Memory"));
// Don't use _TestUtil.getTempDir so that we own the
// randomness (ie same seed will point to same dir):
Directory dir = newFSDirectory(_TestUtil.getTempDir("longpostings" + "." + random.nextLong()));
final int NUM_DOCS = atLeast(2000);
if (VERBOSE) {
System.out.println("TEST: NUM_DOCS=" + NUM_DOCS);
}
final String s1 = getRandomTerm(null);
final String s2 = getRandomTerm(s1);
if (VERBOSE) {
System.out.println("\nTEST: s1=" + s1 + " s2=" + s2);
/*
for(int idx=0;idx<s1.length();idx++) {
System.out.println(" s1 ch=0x" + Integer.toHexString(s1.charAt(idx)));
}
for(int idx=0;idx<s2.length();idx++) {
System.out.println(" s2 ch=0x" + Integer.toHexString(s2.charAt(idx)));
}
*/
}
final FixedBitSet isS1 = new FixedBitSet(NUM_DOCS);
for(int idx=0;idx<NUM_DOCS;idx++) {
if (random.nextBoolean()) {
isS1.set(idx);
}
}
final IndexReader r;
if (true) {
final IndexWriterConfig iwc = newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random))
.setOpenMode(IndexWriterConfig.OpenMode.CREATE)
.setMergePolicy(newLogMergePolicy());
iwc.setRAMBufferSizeMB(16.0 + 16.0 * random.nextDouble());
iwc.setMaxBufferedDocs(-1);
final RandomIndexWriter riw = new RandomIndexWriter(random, dir, iwc);
for(int idx=0;idx<NUM_DOCS;idx++) {
final Document doc = new Document();
String s = isS1.get(idx) ? s1 : s2;
final Field f = newField("field", s, Field.Index.ANALYZED);
f.setIndexOptions(options);
final int count = _TestUtil.nextInt(random, 1, 4);
for(int ct=0;ct<count;ct++) {
doc.add(f);
}
riw.addDocument(doc);
}
r = riw.getReader();
riw.close();
} else {
r = IndexReader.open(dir);
}
/*
if (VERBOSE) {
System.out.println("TEST: terms");
TermEnum termEnum = r.terms();
while(termEnum.next()) {
System.out.println(" term=" + termEnum.term() + " len=" + termEnum.term().text().length());
assertTrue(termEnum.docFreq() > 0);
System.out.println(" s1?=" + (termEnum.term().text().equals(s1)) + " s1len=" + s1.length());
System.out.println(" s2?=" + (termEnum.term().text().equals(s2)) + " s2len=" + s2.length());
final String s = termEnum.term().text();
for(int idx=0;idx<s.length();idx++) {
System.out.println(" ch=0x" + Integer.toHexString(s.charAt(idx)));
}
}
}
*/
assertEquals(NUM_DOCS, r.numDocs());
assertTrue(r.docFreq(new Term("field", s1)) > 0);
assertTrue(r.docFreq(new Term("field", s2)) > 0);
int num = atLeast(1000);
for(int iter=0;iter<num;iter++) {
final String term;
final boolean doS1;
if (random.nextBoolean()) {
term = s1;
doS1 = true;
} else {
term = s2;
doS1 = false;
}
if (VERBOSE) {
System.out.println("\nTEST: iter=" + iter + " doS1=" + doS1);
}
final DocsEnum postings = MultiFields.getTermDocsEnum(r, null, "field", new BytesRef(term));
int docID = -1;
while(docID < DocsEnum.NO_MORE_DOCS) {
final int what = random.nextInt(3);
if (what == 0) {
if (VERBOSE) {
System.out.println("TEST: docID=" + docID + "; do next()");
}
// nextDoc
int expected = docID+1;
while(true) {
if (expected == NUM_DOCS) {
expected = Integer.MAX_VALUE;
break;
} else if (isS1.get(expected) == doS1) {
break;
} else {
expected++;
}
}
docID = postings.nextDoc();
if (VERBOSE) {
System.out.println(" got docID=" + docID);
}
assertEquals(expected, docID);
if (docID == DocsEnum.NO_MORE_DOCS) {
break;
}
if (random.nextInt(6) == 3) {
final int freq = postings.freq();
assertTrue(freq >=1 && freq <= 4);
}
} else {
// advance
final int targetDocID;
if (docID == -1) {
targetDocID = random.nextInt(NUM_DOCS+1);
} else {
targetDocID = docID + _TestUtil.nextInt(random, 1, NUM_DOCS - docID);
}
if (VERBOSE) {
System.out.println("TEST: docID=" + docID + "; do advance(" + targetDocID + ")");
}
int expected = targetDocID;
while(true) {
if (expected == NUM_DOCS) {
expected = Integer.MAX_VALUE;
break;
} else if (isS1.get(expected) == doS1) {
break;
} else {
expected++;
}
}
docID = postings.advance(targetDocID);
if (VERBOSE) {
System.out.println(" got docID=" + docID);
}
assertEquals(expected, docID);
if (docID == DocsEnum.NO_MORE_DOCS) {
break;
}
if (random.nextInt(6) == 3) {
final int freq = postings.freq();
assertTrue(freq >=1 && freq <= 4);
}
}
}
}
r.close();
dir.close();
}
}

View File

@ -0,0 +1,232 @@
package org.apache.lucene.index;
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.MockAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.index.FieldInfo.IndexOptions;
import org.apache.lucene.search.DocIdSetIterator;
import org.apache.lucene.store.Directory;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.LuceneTestCase;
/**
*
* @lucene.experimental
*/
public class TestOmitPositions extends LuceneTestCase {
public void testBasic() throws Exception {
Directory dir = newDirectory();
RandomIndexWriter w = new RandomIndexWriter(random, dir);
Document doc = new Document();
Field f = newField("foo", "this is a test test", Field.Index.ANALYZED);
f.setIndexOptions(IndexOptions.DOCS_AND_FREQS);
doc.add(f);
for (int i = 0; i < 100; i++) {
w.addDocument(doc);
}
IndexReader reader = w.getReader();
w.close();
assertNull(MultiFields.getTermPositionsEnum(reader, null, "foo", new BytesRef("test")));
DocsEnum de = MultiFields.getTermDocsEnum(reader, null, "foo", new BytesRef("test"));
while (de.nextDoc() != DocIdSetIterator.NO_MORE_DOCS) {
assertEquals(2, de.freq());
}
reader.close();
dir.close();
}
// Tests whether the DocumentWriter correctly enable the
// omitTermFreqAndPositions bit in the FieldInfo
public void testPositions() throws Exception {
Directory ram = newDirectory();
Analyzer analyzer = new MockAnalyzer(random);
IndexWriter writer = new IndexWriter(ram, newIndexWriterConfig( TEST_VERSION_CURRENT, analyzer));
Document d = new Document();
// f1,f2,f3: docs only
Field f1 = newField("f1", "This field has docs only", Field.Store.NO, Field.Index.ANALYZED);
f1.setIndexOptions(IndexOptions.DOCS_ONLY);
d.add(f1);
Field f2 = newField("f2", "This field has docs only", Field.Store.NO, Field.Index.ANALYZED);
f2.setIndexOptions(IndexOptions.DOCS_ONLY);
d.add(f2);
Field f3 = newField("f3", "This field has docs only", Field.Store.NO, Field.Index.ANALYZED);
f3.setIndexOptions(IndexOptions.DOCS_ONLY);
d.add(f3);
// f4,f5,f6 docs and freqs
Field f4 = newField("f4", "This field has docs and freqs", Field.Store.NO, Field.Index.ANALYZED);
f4.setIndexOptions(IndexOptions.DOCS_AND_FREQS);
d.add(f4);
Field f5 = newField("f5", "This field has docs and freqs", Field.Store.NO, Field.Index.ANALYZED);
f5.setIndexOptions(IndexOptions.DOCS_AND_FREQS);
d.add(f5);
Field f6 = newField("f6", "This field has docs and freqs", Field.Store.NO, Field.Index.ANALYZED);
f6.setIndexOptions(IndexOptions.DOCS_AND_FREQS);
d.add(f6);
// f7,f8,f9 docs/freqs/positions
Field f7 = newField("f7", "This field has docs and freqs and positions", Field.Store.NO, Field.Index.ANALYZED);
f7.setIndexOptions(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS);
d.add(f7);
Field f8 = newField("f8", "This field has docs and freqs and positions", Field.Store.NO, Field.Index.ANALYZED);
f8.setIndexOptions(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS);
d.add(f8);
Field f9 = newField("f9", "This field has docs and freqs and positions", Field.Store.NO, Field.Index.ANALYZED);
f9.setIndexOptions(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS);
d.add(f9);
writer.addDocument(d);
writer.optimize();
// now we add another document which has docs-only for f1, f4, f7, docs/freqs for f2, f5, f8,
// and docs/freqs/positions for f3, f6, f9
d = new Document();
// f1,f4,f7: docs only
f1 = newField("f1", "This field has docs only", Field.Store.NO, Field.Index.ANALYZED);
f1.setIndexOptions(IndexOptions.DOCS_ONLY);
d.add(f1);
f4 = newField("f4", "This field has docs only", Field.Store.NO, Field.Index.ANALYZED);
f4.setIndexOptions(IndexOptions.DOCS_ONLY);
d.add(f4);
f7 = newField("f7", "This field has docs only", Field.Store.NO, Field.Index.ANALYZED);
f7.setIndexOptions(IndexOptions.DOCS_ONLY);
d.add(f7);
// f2, f5, f8: docs and freqs
f2 = newField("f2", "This field has docs and freqs", Field.Store.NO, Field.Index.ANALYZED);
f2.setIndexOptions(IndexOptions.DOCS_AND_FREQS);
d.add(f2);
f5 = newField("f5", "This field has docs and freqs", Field.Store.NO, Field.Index.ANALYZED);
f5.setIndexOptions(IndexOptions.DOCS_AND_FREQS);
d.add(f5);
f8 = newField("f8", "This field has docs and freqs", Field.Store.NO, Field.Index.ANALYZED);
f8.setIndexOptions(IndexOptions.DOCS_AND_FREQS);
d.add(f8);
// f3, f6, f9: docs and freqs and positions
f3 = newField("f3", "This field has docs and freqs and positions", Field.Store.NO, Field.Index.ANALYZED);
f3.setIndexOptions(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS);
d.add(f3);
f6 = newField("f6", "This field has docs and freqs and positions", Field.Store.NO, Field.Index.ANALYZED);
f6.setIndexOptions(IndexOptions.DOCS_AND_FREQS);
d.add(f6);
f9 = newField("f9", "This field has docs and freqs and positions", Field.Store.NO, Field.Index.ANALYZED);
f9.setIndexOptions(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS);
d.add(f9);
writer.addDocument(d);
// force merge
writer.optimize();
// flush
writer.close();
SegmentReader reader = getOnlySegmentReader(IndexReader.open(ram, false));
FieldInfos fi = reader.fieldInfos();
// docs + docs = docs
assertEquals(IndexOptions.DOCS_ONLY, fi.fieldInfo("f1").indexOptions);
// docs + docs/freqs = docs
assertEquals(IndexOptions.DOCS_ONLY, fi.fieldInfo("f2").indexOptions);
// docs + docs/freqs/pos = docs
assertEquals(IndexOptions.DOCS_ONLY, fi.fieldInfo("f3").indexOptions);
// docs/freqs + docs = docs
assertEquals(IndexOptions.DOCS_ONLY, fi.fieldInfo("f4").indexOptions);
// docs/freqs + docs/freqs = docs/freqs
assertEquals(IndexOptions.DOCS_AND_FREQS, fi.fieldInfo("f5").indexOptions);
// docs/freqs + docs/freqs/pos = docs/freqs
assertEquals(IndexOptions.DOCS_AND_FREQS, fi.fieldInfo("f6").indexOptions);
// docs/freqs/pos + docs = docs
assertEquals(IndexOptions.DOCS_ONLY, fi.fieldInfo("f7").indexOptions);
// docs/freqs/pos + docs/freqs = docs/freqs
assertEquals(IndexOptions.DOCS_AND_FREQS, fi.fieldInfo("f8").indexOptions);
// docs/freqs/pos + docs/freqs/pos = docs/freqs/pos
assertEquals(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS, fi.fieldInfo("f9").indexOptions);
reader.close();
ram.close();
}
private void assertNoPrx(Directory dir) throws Throwable {
final String[] files = dir.listAll();
for(int i=0;i<files.length;i++) {
assertFalse(files[i].endsWith(".prx"));
assertFalse(files[i].endsWith(".pos"));
}
}
// Verifies no *.prx exists when all fields omit term positions:
public void testNoPrxFile() throws Throwable {
Directory ram = newDirectory();
Analyzer analyzer = new MockAnalyzer(random);
IndexWriter writer = new IndexWriter(ram, newIndexWriterConfig(
TEST_VERSION_CURRENT, analyzer).setMaxBufferedDocs(3).setMergePolicy(newLogMergePolicy()));
LogMergePolicy lmp = (LogMergePolicy) writer.getConfig().getMergePolicy();
lmp.setMergeFactor(2);
lmp.setUseCompoundFile(false);
Document d = new Document();
Field f1 = newField("f1", "This field has term freqs", Field.Store.NO, Field.Index.ANALYZED);
f1.setIndexOptions(IndexOptions.DOCS_AND_FREQS);
d.add(f1);
for(int i=0;i<30;i++)
writer.addDocument(d);
writer.commit();
assertNoPrx(ram);
// now add some documents with positions, and check there is no prox after optimization
d = new Document();
f1 = newField("f1", "This field has positions", Field.Store.NO, Field.Index.ANALYZED);
d.add(f1);
for(int i=0;i<30;i++)
writer.addDocument(d);
// force merge
writer.optimize();
// flush
writer.close();
assertNoPrx(ram);
ram.close();
}
}

View File

@ -26,6 +26,7 @@ import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.MockAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.index.FieldInfo.IndexOptions;
import org.apache.lucene.index.IndexReader.AtomicReaderContext;
import org.apache.lucene.search.*;
import org.apache.lucene.search.BooleanClause.Occur;
@ -65,7 +66,7 @@ public class TestOmitTf extends LuceneTestCase {
// this field will NOT have Tf
Field f2 = newField("f2", "This field has NO Tf in all docs", Field.Store.NO, Field.Index.ANALYZED);
f2.setOmitTermFreqAndPositions(true);
f2.setIndexOptions(IndexOptions.DOCS_ONLY);
d.add(f2);
writer.addDocument(d);
@ -75,10 +76,10 @@ public class TestOmitTf extends LuceneTestCase {
d = new Document();
// Reverse
f1.setOmitTermFreqAndPositions(true);
f1.setIndexOptions(IndexOptions.DOCS_ONLY);
d.add(f1);
f2.setOmitTermFreqAndPositions(false);
f2.setIndexOptions(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS);
d.add(f2);
writer.addDocument(d);
@ -90,8 +91,8 @@ public class TestOmitTf extends LuceneTestCase {
SegmentReader reader = getOnlySegmentReader(IndexReader.open(ram, false));
FieldInfos fi = reader.fieldInfos();
assertTrue("OmitTermFreqAndPositions field bit should be set.", fi.fieldInfo("f1").omitTermFreqAndPositions);
assertTrue("OmitTermFreqAndPositions field bit should be set.", fi.fieldInfo("f2").omitTermFreqAndPositions);
assertEquals("OmitTermFreqAndPositions field bit should be set.", IndexOptions.DOCS_ONLY, fi.fieldInfo("f1").indexOptions);
assertEquals("OmitTermFreqAndPositions field bit should be set.", IndexOptions.DOCS_ONLY, fi.fieldInfo("f2").indexOptions);
reader.close();
ram.close();
@ -117,7 +118,7 @@ public class TestOmitTf extends LuceneTestCase {
// this field will NOT have Tf
Field f2 = newField("f2", "This field has NO Tf in all docs", Field.Store.NO, Field.Index.ANALYZED);
f2.setOmitTermFreqAndPositions(true);
f2.setIndexOptions(IndexOptions.DOCS_ONLY);
d.add(f2);
for(int i=0;i<30;i++)
@ -128,10 +129,10 @@ public class TestOmitTf extends LuceneTestCase {
d = new Document();
// Reverese
f1.setOmitTermFreqAndPositions(true);
f1.setIndexOptions(IndexOptions.DOCS_ONLY);
d.add(f1);
f2.setOmitTermFreqAndPositions(false);
f2.setIndexOptions(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS);
d.add(f2);
for(int i=0;i<30;i++)
@ -144,8 +145,8 @@ public class TestOmitTf extends LuceneTestCase {
SegmentReader reader = getOnlySegmentReader(IndexReader.open(ram, false));
FieldInfos fi = reader.fieldInfos();
assertTrue("OmitTermFreqAndPositions field bit should be set.", fi.fieldInfo("f1").omitTermFreqAndPositions);
assertTrue("OmitTermFreqAndPositions field bit should be set.", fi.fieldInfo("f2").omitTermFreqAndPositions);
assertEquals("OmitTermFreqAndPositions field bit should be set.", IndexOptions.DOCS_ONLY, fi.fieldInfo("f1").indexOptions);
assertEquals("OmitTermFreqAndPositions field bit should be set.", IndexOptions.DOCS_ONLY, fi.fieldInfo("f2").indexOptions);
reader.close();
ram.close();
@ -176,7 +177,7 @@ public class TestOmitTf extends LuceneTestCase {
for(int i=0;i<5;i++)
writer.addDocument(d);
f2.setOmitTermFreqAndPositions(true);
f2.setIndexOptions(IndexOptions.DOCS_ONLY);
for(int i=0;i<20;i++)
writer.addDocument(d);
@ -189,8 +190,8 @@ public class TestOmitTf extends LuceneTestCase {
SegmentReader reader = getOnlySegmentReader(IndexReader.open(ram, false));
FieldInfos fi = reader.fieldInfos();
assertTrue("OmitTermFreqAndPositions field bit should not be set.", !fi.fieldInfo("f1").omitTermFreqAndPositions);
assertTrue("OmitTermFreqAndPositions field bit should be set.", fi.fieldInfo("f2").omitTermFreqAndPositions);
assertEquals("OmitTermFreqAndPositions field bit should not be set.", IndexOptions.DOCS_AND_FREQS_AND_POSITIONS, fi.fieldInfo("f1").indexOptions);
assertEquals("OmitTermFreqAndPositions field bit should be set.", IndexOptions.DOCS_ONLY, fi.fieldInfo("f2").indexOptions);
reader.close();
ram.close();
@ -198,8 +199,10 @@ public class TestOmitTf extends LuceneTestCase {
private void assertNoPrx(Directory dir) throws Throwable {
final String[] files = dir.listAll();
for(int i=0;i<files.length;i++)
for(int i=0;i<files.length;i++) {
assertFalse(files[i].endsWith(".prx"));
assertFalse(files[i].endsWith(".pos"));
}
}
// Verifies no *.prx exists when all fields omit term freq:
@ -213,8 +216,8 @@ public class TestOmitTf extends LuceneTestCase {
lmp.setUseCompoundFile(false);
Document d = new Document();
Field f1 = newField("f1", "This field has term freqs", Field.Store.NO, Field.Index.ANALYZED);
f1.setOmitTermFreqAndPositions(true);
Field f1 = newField("f1", "This field has no term freqs", Field.Store.NO, Field.Index.ANALYZED);
f1.setIndexOptions(IndexOptions.DOCS_ONLY);
d.add(f1);
for(int i=0;i<30;i++)
@ -223,7 +226,15 @@ public class TestOmitTf extends LuceneTestCase {
writer.commit();
assertNoPrx(ram);
// now add some documents with positions, and check there is no prox after optimization
d = new Document();
f1 = newField("f1", "This field has positions", Field.Store.NO, Field.Index.ANALYZED);
d.add(f1);
for(int i=0;i<30;i++)
writer.addDocument(d);
// force merge
writer.optimize();
// flush
@ -253,7 +264,7 @@ public class TestOmitTf extends LuceneTestCase {
sb.append(term).append(" ");
String content = sb.toString();
Field noTf = newField("noTf", content + (i%2==0 ? "" : " notf"), Field.Store.NO, Field.Index.ANALYZED);
noTf.setOmitTermFreqAndPositions(true);
noTf.setIndexOptions(IndexOptions.DOCS_ONLY);
d.add(noTf);
Field tf = newField("tf", content + (i%2==0 ? " tf" : ""), Field.Store.NO, Field.Index.ANALYZED);

View File

@ -21,6 +21,7 @@ import org.apache.lucene.document.Field.Index;
import org.apache.lucene.document.Field.Store;
import org.apache.lucene.index.CorruptIndexException;
import org.apache.lucene.index.DocsEnum;
import org.apache.lucene.index.FieldInfo.IndexOptions;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
@ -181,7 +182,7 @@ public class LuceneTaxonomyWriter implements TaxonomyWriter {
parentStreamField = new Field(Consts.FIELD_PAYLOADS, parentStream);
parentStreamField.setOmitNorms(true);
fullPathField = new Field(Consts.FULL, "", Store.YES, Index.NOT_ANALYZED_NO_NORMS);
fullPathField.setOmitTermFreqAndPositions(true);
fullPathField.setIndexOptions(IndexOptions.DOCS_ONLY);
this.nextID = indexWriter.maxDoc();

View File

@ -24,6 +24,7 @@ import org.apache.lucene.analysis.MockAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.NumericField;
import org.apache.lucene.index.FieldInfo.IndexOptions;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.RandomIndexWriter;
import org.apache.lucene.index.Term;
@ -395,7 +396,7 @@ public class TestGrouping extends LuceneTestCase {
}
// So we can pull filter marking last doc in block:
final Field groupEnd = newField("groupend", "x", Field.Index.NOT_ANALYZED);
groupEnd.setOmitTermFreqAndPositions(true);
groupEnd.setIndexOptions(IndexOptions.DOCS_ONLY);
groupEnd.setOmitNorms(true);
docs.get(docs.size()-1).add(groupEnd);
// Add as a doc block:

View File

@ -28,6 +28,7 @@ import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.index.FieldInfo.IndexOptions;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
@ -618,7 +619,7 @@ public class SpellChecker implements java.io.Closeable {
// the word field is never queried on... its indexed so it can be quickly
// checked for rebuild (and stored for retrieval). Doesn't need norms or TF/pos
Field f = new Field(F_WORD, text, Field.Store.YES, Field.Index.NOT_ANALYZED);
f.setOmitTermFreqAndPositions(true);
f.setIndexOptions(IndexOptions.DOCS_ONLY);
f.setOmitNorms(true);
doc.add(f); // orig term
addGram(text, doc, ng1, ng2);
@ -636,7 +637,7 @@ public class SpellChecker implements java.io.Closeable {
if (i == 0) {
// only one term possible in the startXXField, TF/pos and norms aren't needed.
Field startField = new Field("start" + ng, gram, Field.Store.NO, Field.Index.NOT_ANALYZED);
startField.setOmitTermFreqAndPositions(true);
startField.setIndexOptions(IndexOptions.DOCS_ONLY);
startField.setOmitNorms(true);
doc.add(startField);
}
@ -645,7 +646,7 @@ public class SpellChecker implements java.io.Closeable {
if (end != null) { // may not be present if len==ng1
// only one term possible in the endXXField, TF/pos and norms aren't needed.
Field endField = new Field("end" + ng, end, Field.Store.NO, Field.Index.NOT_ANALYZED);
endField.setOmitTermFreqAndPositions(true);
endField.setIndexOptions(IndexOptions.DOCS_ONLY);
endField.setOmitNorms(true);
doc.add(endField);
}

View File

@ -321,6 +321,8 @@ New Features
before adding to the index. Fix a null pointer exception in logging
when there was no unique key. (David Smiley via yonik)
* LUCENE-2048: Added omitPositions to the schema, so you can omit position
information while still indexing term frequencies. (rmuir)
Optimizations
----------------------

View File

@ -35,6 +35,7 @@ import org.slf4j.LoggerFactory;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Fieldable;
import org.apache.lucene.index.FieldInfo.IndexOptions;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.Term;
import org.apache.lucene.index.Fields;
@ -202,7 +203,10 @@ public class LukeRequestHandler extends RequestHandlerBase
flags.append( (f != null && f.storeTermOffsets() ) ? FieldFlag.TERM_VECTOR_OFFSET.getAbbreviation() : '-' );
flags.append( (f != null && f.storeTermPositions() ) ? FieldFlag.TERM_VECTOR_POSITION.getAbbreviation() : '-' );
flags.append( (f != null && f.omitNorms()) ? FieldFlag.OMIT_NORMS.getAbbreviation() : '-' );
flags.append( (f != null && f.omitTf()) ? FieldFlag.OMIT_TF.getAbbreviation() : '-' );
flags.append( (f != null &&
f.indexOptions() == IndexOptions.DOCS_ONLY) ? FieldFlag.OMIT_TF.getAbbreviation() : '-' );
flags.append( (f != null &&
f.indexOptions() == IndexOptions.DOCS_AND_FREQS) ? FieldFlag.OMIT_POSITIONS.getAbbreviation() : '-' );
flags.append( (lazy) ? FieldFlag.LAZY.getAbbreviation() : '-' );
flags.append( (binary) ? FieldFlag.BINARY.getAbbreviation() : '-' );
flags.append( (f != null && f.sortMissingFirst() ) ? FieldFlag.SORT_MISSING_FIRST.getAbbreviation() : '-' );

View File

@ -47,13 +47,14 @@ public abstract class FieldProperties {
protected final static int SORT_MISSING_LAST = 0x00000800;
protected final static int REQUIRED = 0x00001000;
protected final static int OMIT_POSITIONS = 0x00002000;
static final String[] propertyNames = {
"indexed", "tokenized", "stored",
"binary", "omitNorms", "omitTermFreqAndPositions",
"termVectors", "termPositions", "termOffsets",
"multiValued",
"sortMissingFirst","sortMissingLast","required"
"sortMissingFirst","sortMissingLast","required", "omitPositions"
};
static final Map<String,Integer> propertyMap = new HashMap<String,Integer>();

View File

@ -23,6 +23,7 @@ import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.Fieldable;
import org.apache.lucene.index.FieldInfo.IndexOptions;
import org.apache.lucene.index.Term;
import org.apache.lucene.queries.function.ValueSource;
import org.apache.lucene.search.Query;
@ -251,7 +252,7 @@ public abstract class FieldType extends FieldProperties {
return createField(field.getName(), val, getFieldStore(field, val),
getFieldIndex(field, val), getFieldTermVec(field, val), field.omitNorms(),
field.omitTf(), boost);
field.indexOptions(), boost);
}
@ -269,14 +270,14 @@ public abstract class FieldType extends FieldProperties {
* @return the {@link org.apache.lucene.document.Fieldable}.
*/
protected Fieldable createField(String name, String val, Field.Store storage, Field.Index index,
Field.TermVector vec, boolean omitNorms, boolean omitTFPos, float boost){
Field.TermVector vec, boolean omitNorms, IndexOptions options, float boost){
Field f = new Field(name,
val,
storage,
index,
vec);
f.setOmitNorms(omitNorms);
f.setOmitTermFreqAndPositions(omitTFPos);
f.setIndexOptions(options);
f.setBoost(boost);
return f;
}

View File

@ -18,6 +18,7 @@ package org.apache.solr.schema;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.Fieldable;
import org.apache.lucene.index.FieldInfo.IndexOptions;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexReader.AtomicReaderContext;
import org.apache.lucene.queries.function.DocValues;
@ -77,7 +78,7 @@ public class LatLonType extends AbstractSubTypeFieldType implements SpatialQuery
if (field.stored()) {
f[f.length - 1] = createField(field.getName(), externalVal,
getFieldStore(field, externalVal), Field.Index.NO, Field.TermVector.NO,
false, false, boost);
false, IndexOptions.DOCS_AND_FREQS_AND_POSITIONS, boost);
}
return f;
}

View File

@ -19,6 +19,7 @@ package org.apache.solr.schema;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.Fieldable;
import org.apache.lucene.index.FieldInfo.IndexOptions;
import org.apache.lucene.queries.function.ValueSource;
import org.apache.lucene.queries.function.valuesource.VectorValueSource;
import org.apache.lucene.search.BooleanClause;
@ -90,7 +91,7 @@ public class PointType extends CoordinateFieldType implements SpatialQueryable {
String storedVal = externalVal; // normalize or not?
f[f.length - 1] = createField(field.getName(), storedVal,
getFieldStore(field, storedVal), Field.Index.NO, Field.TermVector.NO,
false, false, boost);
false, IndexOptions.DOCS_AND_FREQS_AND_POSITIONS, boost);
}
return f;

View File

@ -20,6 +20,7 @@ package org.apache.solr.schema;
import org.apache.solr.common.SolrException;
import org.apache.solr.common.SolrException.ErrorCode;
import org.apache.lucene.document.Fieldable;
import org.apache.lucene.index.FieldInfo.IndexOptions;
import org.apache.lucene.search.SortField;
import org.apache.solr.search.QParser;
@ -81,7 +82,17 @@ public final class SchemaField extends FieldProperties {
public boolean storeTermPositions() { return (properties & STORE_TERMPOSITIONS)!=0; }
public boolean storeTermOffsets() { return (properties & STORE_TERMOFFSETS)!=0; }
public boolean omitNorms() { return (properties & OMIT_NORMS)!=0; }
public boolean omitTf() { return (properties & OMIT_TF_POSITIONS)!=0; }
public IndexOptions indexOptions() {
if ((properties & OMIT_TF_POSITIONS) != 0) {
return IndexOptions.DOCS_ONLY;
} else if ((properties & OMIT_POSITIONS) != 0) {
return IndexOptions.DOCS_AND_FREQS;
} else {
return IndexOptions.DOCS_AND_FREQS_AND_POSITIONS;
}
}
public boolean multiValued() { return (properties & MULTIVALUED)!=0; }
public boolean sortMissingFirst() { return (properties & SORT_MISSING_FIRST)!=0; }
public boolean sortMissingLast() { return (properties & SORT_MISSING_LAST)!=0; }
@ -215,7 +226,7 @@ public final class SchemaField extends FieldProperties {
}
if (on(falseProps,INDEXED)) {
int pp = (INDEXED | OMIT_NORMS | OMIT_TF_POSITIONS
int pp = (INDEXED | OMIT_NORMS | OMIT_TF_POSITIONS | OMIT_POSITIONS
| STORE_TERMVECTORS | STORE_TERMPOSITIONS | STORE_TERMOFFSETS
| SORT_MISSING_FIRST | SORT_MISSING_LAST);
if (on(pp,trueProps)) {
@ -225,6 +236,14 @@ public final class SchemaField extends FieldProperties {
}
if (on(falseProps,OMIT_TF_POSITIONS)) {
int pp = (OMIT_POSITIONS | OMIT_TF_POSITIONS);
if (on(pp, trueProps)) {
throw new RuntimeException("SchemaField: " + name + " conflicting indexed field options:" + props);
}
p &= ~pp;
}
if (on(falseProps,STORE_TERMVECTORS)) {
int pp = (STORE_TERMVECTORS | STORE_TERMPOSITIONS | STORE_TERMOFFSETS);
if (on(pp,trueProps)) {

View File

@ -19,6 +19,7 @@ package org.apache.solr.schema;
import org.apache.lucene.document.Fieldable;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.NumericField;
import org.apache.lucene.index.FieldInfo.IndexOptions;
import org.apache.lucene.queries.function.ValueSource;
import org.apache.lucene.queries.function.valuesource.DoubleFieldSource;
import org.apache.lucene.queries.function.valuesource.FloatFieldSource;
@ -521,7 +522,7 @@ public class TrieField extends FieldType {
}
f.setOmitNorms(field.omitNorms());
f.setOmitTermFreqAndPositions(field.omitTf());
f.setIndexOptions(field.indexOptions());
f.setBoost(boost);
return f;
}

View File

@ -417,6 +417,13 @@
<tokenizer class="solr.WhitespaceTokenizerFactory"/>
</analyzer>
</fieldType>
<!-- omitPositions example -->
<fieldType name="nopositions" class="solr.TextField" omitPositions="true">
<analyzer>
<tokenizer class="solr.WhitespaceTokenizerFactory"/>
</analyzer>
</fieldType>
</types>
@ -525,6 +532,8 @@
<field name="sim1text" type="sim1" indexed="true" stored="true"/>
<field name="sim2text" type="sim2" indexed="true" stored="true"/>
<field name="sim3text" type="sim3" indexed="true" stored="true"/>
<field name="nopositionstext" type="nopositions" indexed="true" stored="true"/>
<field name="tlong" type="tlong" indexed="true" stored="true" />

View File

@ -0,0 +1,60 @@
package org.apache.solr.schema;
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import org.apache.solr.SolrTestCaseJ4;
import org.junit.BeforeClass;
public class TestOmitPositions extends SolrTestCaseJ4 {
@BeforeClass
public static void beforeClass() throws Exception {
initCore("solrconfig.xml","schema.xml");
// add some docs
assertU(adoc("id", "1", "nopositionstext", "this is a test this is only a test", "text", "just another test"));
assertU(adoc("id", "2", "nopositionstext", "test test test test test test test test test test test test test", "text", "have a nice day"));
assertU(commit());
}
public void testFrequencies() {
// doc 2 should be ranked above doc 1
assertQ("term query: ",
req("fl", "id", "q", "nopositionstext:test"),
"//*[@numFound='2']",
"//result/doc[1]/int[@name='id'][.=2]",
"//result/doc[2]/int[@name='id'][.=1]"
);
}
public void testPositions() {
// no results should be found:
// lucene 3.x: silent failure
// lucene 4.x: illegal state exception, field was indexed without positions
ignoreException("was indexed without position data");
try {
assertQ("phrase query: ",
req("fl", "id", "q", "nopositionstext:\"test test\""),
"//*[@numFound='0']"
);
} catch (Exception expected) {
assertTrue(expected.getCause() instanceof IllegalStateException);
// in lucene 4.0, queries don't silently fail
}
resetExceptionIgnores();
}
}

View File

@ -31,6 +31,7 @@ public enum FieldFlag {
TERM_VECTOR_POSITION('p', "Store Position With TermVector"),
OMIT_NORMS('O', "Omit Norms"),
OMIT_TF('F', "Omit Tf"),
OMIT_POSITIONS('P', "Omit Positions"),
LAZY('L', "Lazy"),
BINARY('B', "Binary"),
SORT_MISSING_FIRST('f', "Sort Missing First"),