LUCENE-2048: omit positions but keep term freq

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1145594 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Robert Muir 2011-07-12 13:31:22 +00:00
parent dfc5ce1cff
commit 1c646d24c9
68 changed files with 999 additions and 289 deletions

View File

@ -523,6 +523,15 @@ New Features
(grow on demand if you set/get/clear too-large indices). (Mike (grow on demand if you set/get/clear too-large indices). (Mike
McCandless) McCandless)
* LUCENE-2048: Added the ability to omit positions but still index
term frequencies, you can now control what is indexed into
the postings via AbstractField.setIndexOptions:
DOCS_ONLY: only documents are indexed: term frequencies and positions are omitted
DOCS_AND_FREQS: only documents and term frequencies are indexed: positions are omitted
DOCS_AND_FREQS_AND_POSITIONS: full postings: documents, frequencies, and positions
AbstractField.setOmitTermFrequenciesAndPositions is deprecated,
you should use DOCS_ONLY instead. (Robert Muir)
Optimizations Optimizations
* LUCENE-3201, LUCENE-3218: CompoundFileSystem code has been consolidated * LUCENE-3201, LUCENE-3218: CompoundFileSystem code has been consolidated

View File

@ -22,6 +22,7 @@ import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document; import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field; import org.apache.lucene.document.Field;
import org.apache.lucene.document.NumericField; import org.apache.lucene.document.NumericField;
import org.apache.lucene.index.FieldInfo.IndexOptions;
import org.apache.lucene.index.IndexWriter; import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig.OpenMode; import org.apache.lucene.index.IndexWriterConfig.OpenMode;
import org.apache.lucene.index.IndexWriterConfig; import org.apache.lucene.index.IndexWriterConfig;
@ -173,7 +174,7 @@ public class IndexFiles {
// the field into separate words and don't index term frequency // the field into separate words and don't index term frequency
// or positional information: // or positional information:
Field pathField = new Field("path", file.getPath(), Field.Store.YES, Field.Index.NOT_ANALYZED_NO_NORMS); Field pathField = new Field("path", file.getPath(), Field.Store.YES, Field.Index.NOT_ANALYZED_NO_NORMS);
pathField.setOmitTermFreqAndPositions(true); pathField.setIndexOptions(IndexOptions.DOCS_ONLY);
doc.add(pathField); doc.add(pathField);
// Add the last modified date of the file a field named "modified". // Add the last modified date of the file a field named "modified".

View File

@ -87,9 +87,7 @@ public class TestNRTManager extends LuceneTestCase {
if (field1.getOmitNorms()) { if (field1.getOmitNorms()) {
field2.setOmitNorms(true); field2.setOmitNorms(true);
} }
if (field1.getOmitTermFreqAndPositions()) { field2.setIndexOptions(field1.getIndexOptions());
field2.setOmitTermFreqAndPositions(true);
}
doc2.add(field2); doc2.add(field2);
} }

View File

@ -49,7 +49,7 @@ import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
* <pre> * <pre>
* Field field = new Field(name, new NumericTokenStream(precisionStep).setIntValue(value)); * Field field = new Field(name, new NumericTokenStream(precisionStep).setIntValue(value));
* field.setOmitNorms(true); * field.setOmitNorms(true);
* field.setOmitTermFreqAndPositions(true); * field.setIndexOptions(IndexOptions.DOCS_ONLY);
* document.add(field); * document.add(field);
* </pre> * </pre>
* *
@ -60,7 +60,7 @@ import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
* NumericTokenStream stream = new NumericTokenStream(precisionStep); * NumericTokenStream stream = new NumericTokenStream(precisionStep);
* Field field = new Field(name, stream); * Field field = new Field(name, stream);
* field.setOmitNorms(true); * field.setOmitNorms(true);
* field.setOmitTermFreqAndPositions(true); * field.setIndexOptions(IndexOptions.DOCS_ONLY);
* Document document = new Document(); * Document document = new Document();
* document.add(field); * document.add(field);
* *

View File

@ -18,6 +18,7 @@ package org.apache.lucene.document;
import org.apache.lucene.search.PhraseQuery; // for javadocs import org.apache.lucene.search.PhraseQuery; // for javadocs
import org.apache.lucene.search.spans.SpanQuery; // for javadocs import org.apache.lucene.search.spans.SpanQuery; // for javadocs
import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.index.FieldInfo.IndexOptions;
import org.apache.lucene.index.FieldInvertState; // for javadocs import org.apache.lucene.index.FieldInvertState; // for javadocs
import org.apache.lucene.index.values.PerDocFieldValues; import org.apache.lucene.index.values.PerDocFieldValues;
import org.apache.lucene.index.values.ValueType; import org.apache.lucene.index.values.ValueType;
@ -39,7 +40,7 @@ public abstract class AbstractField implements Fieldable {
protected boolean isTokenized = true; protected boolean isTokenized = true;
protected boolean isBinary = false; protected boolean isBinary = false;
protected boolean lazy = false; protected boolean lazy = false;
protected boolean omitTermFreqAndPositions = false; protected IndexOptions indexOptions = IndexOptions.DOCS_AND_FREQS_AND_POSITIONS;
protected float boost = 1.0f; protected float boost = 1.0f;
// the data object for all different kind of field values // the data object for all different kind of field values
protected Object fieldsData = null; protected Object fieldsData = null;
@ -50,7 +51,6 @@ public abstract class AbstractField implements Fieldable {
protected int binaryOffset; protected int binaryOffset;
protected PerDocFieldValues docValues; protected PerDocFieldValues docValues;
protected AbstractField() protected AbstractField()
{ {
} }
@ -208,8 +208,8 @@ public abstract class AbstractField implements Fieldable {
/** True if norms are omitted for this indexed field */ /** True if norms are omitted for this indexed field */
public boolean getOmitNorms() { return omitNorms; } public boolean getOmitNorms() { return omitNorms; }
/** @see #setOmitTermFreqAndPositions */ /** @see #setIndexOptions */
public boolean getOmitTermFreqAndPositions() { return omitTermFreqAndPositions; } public IndexOptions getIndexOptions() { return indexOptions; }
/** Expert: /** Expert:
* *
@ -220,7 +220,7 @@ public abstract class AbstractField implements Fieldable {
/** Expert: /** Expert:
* *
* If set, omit term freq, positions and payloads from * If set, omit term freq, and optionally also positions and payloads from
* postings for this field. * postings for this field.
* *
* <p><b>NOTE</b>: While this option reduces storage space * <p><b>NOTE</b>: While this option reduces storage space
@ -229,7 +229,7 @@ public abstract class AbstractField implements Fieldable {
* PhraseQuery} or {@link SpanQuery} subclasses will * PhraseQuery} or {@link SpanQuery} subclasses will
* silently fail to find results. * silently fail to find results.
*/ */
public void setOmitTermFreqAndPositions(boolean omitTermFreqAndPositions) { this.omitTermFreqAndPositions=omitTermFreqAndPositions; } public void setIndexOptions(IndexOptions indexOptions) { this.indexOptions=indexOptions; }
public boolean isLazy() { public boolean isLazy() {
return lazy; return lazy;
@ -275,8 +275,9 @@ public abstract class AbstractField implements Fieldable {
if (omitNorms) { if (omitNorms) {
result.append(",omitNorms"); result.append(",omitNorms");
} }
if (omitTermFreqAndPositions) { if (indexOptions != IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) {
result.append(",omitTermFreqAndPositions"); result.append(",indexOptions=");
result.append(indexOptions);
} }
if (lazy){ if (lazy){
result.append(",lazy"); result.append(",lazy");

View File

@ -20,6 +20,7 @@ package org.apache.lucene.document;
import java.io.Reader; import java.io.Reader;
import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.index.FieldInfo.IndexOptions;
import org.apache.lucene.index.IndexWriter; import org.apache.lucene.index.IndexWriter;
/** /**
@ -389,7 +390,8 @@ public final class Field extends AbstractField implements Fieldable {
this.isTokenized = index.isAnalyzed(); this.isTokenized = index.isAnalyzed();
this.omitNorms = index.omitNorms(); this.omitNorms = index.omitNorms();
if (index == Index.NO) { if (index == Index.NO) {
this.omitTermFreqAndPositions = false; // note: now this reads even wierder than before
this.indexOptions = IndexOptions.DOCS_AND_FREQS_AND_POSITIONS;
} }
this.isBinary = false; this.isBinary = false;
@ -520,7 +522,7 @@ public final class Field extends AbstractField implements Fieldable {
isStored = true; isStored = true;
isIndexed = false; isIndexed = false;
isTokenized = false; isTokenized = false;
omitTermFreqAndPositions = false; indexOptions = IndexOptions.DOCS_AND_FREQS_AND_POSITIONS;
omitNorms = true; omitNorms = true;
isBinary = true; isBinary = true;

View File

@ -17,6 +17,7 @@ package org.apache.lucene.document;
*/ */
import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.index.FieldInfo.IndexOptions;
import org.apache.lucene.index.FieldInvertState; // for javadocs import org.apache.lucene.index.FieldInvertState; // for javadocs
import org.apache.lucene.index.values.IndexDocValues; import org.apache.lucene.index.values.IndexDocValues;
import org.apache.lucene.index.values.PerDocFieldValues; import org.apache.lucene.index.values.PerDocFieldValues;
@ -194,12 +195,12 @@ public interface Fieldable {
*/ */
abstract byte[] getBinaryValue(byte[] result); abstract byte[] getBinaryValue(byte[] result);
/** @see #setOmitTermFreqAndPositions */ /** @see #setIndexOptions */
boolean getOmitTermFreqAndPositions(); IndexOptions getIndexOptions();
/** Expert: /** Expert:
* *
* If set, omit term freq, positions and payloads from * If set, omit term freq, and optionally positions and payloads from
* postings for this field. * postings for this field.
* *
* <p><b>NOTE</b>: While this option reduces storage space * <p><b>NOTE</b>: While this option reduces storage space
@ -208,7 +209,7 @@ public interface Fieldable {
* PhraseQuery} or {@link SpanQuery} subclasses will * PhraseQuery} or {@link SpanQuery} subclasses will
* fail with an exception. * fail with an exception.
*/ */
void setOmitTermFreqAndPositions(boolean omitTermFreqAndPositions); void setIndexOptions(IndexOptions indexOptions);
/** /**
* Returns the {@link PerDocFieldValues} * Returns the {@link PerDocFieldValues}

View File

@ -21,6 +21,7 @@ import java.io.Reader;
import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.NumericTokenStream; import org.apache.lucene.analysis.NumericTokenStream;
import org.apache.lucene.index.FieldInfo.IndexOptions;
import org.apache.lucene.util.NumericUtils; import org.apache.lucene.util.NumericUtils;
import org.apache.lucene.search.NumericRangeQuery; // javadocs import org.apache.lucene.search.NumericRangeQuery; // javadocs
import org.apache.lucene.search.NumericRangeFilter; // javadocs import org.apache.lucene.search.NumericRangeFilter; // javadocs
@ -192,7 +193,7 @@ public final class NumericField extends AbstractField {
public NumericField(String name, int precisionStep, Field.Store store, boolean index) { public NumericField(String name, int precisionStep, Field.Store store, boolean index) {
super(name, store, index ? Field.Index.ANALYZED_NO_NORMS : Field.Index.NO, Field.TermVector.NO); super(name, store, index ? Field.Index.ANALYZED_NO_NORMS : Field.Index.NO, Field.TermVector.NO);
this.precisionStep = precisionStep; this.precisionStep = precisionStep;
setOmitTermFreqAndPositions(true); setIndexOptions(IndexOptions.DOCS_ONLY);
} }
/** Returns a {@link NumericTokenStream} for indexing the numeric value. */ /** Returns a {@link NumericTokenStream} for indexing the numeric value. */

View File

@ -186,8 +186,8 @@ public class CheckIndex {
int numFields; int numFields;
/** True if at least one of the fields in this segment /** True if at least one of the fields in this segment
* does not omitTermFreqAndPositions. * has position data
* @see AbstractField#setOmitTermFreqAndPositions */ * @see AbstractField#setIndexOptions(org.apache.lucene.index.FieldInfo.IndexOptions) */
public boolean hasProx; public boolean hasProx;
/** Map that includes certain /** Map that includes certain

View File

@ -233,7 +233,7 @@ final class DocFieldProcessor extends DocConsumer {
// easily add it // easily add it
FieldInfo fi = fieldInfos.addOrUpdate(fieldName, field.isIndexed(), field.isTermVectorStored(), FieldInfo fi = fieldInfos.addOrUpdate(fieldName, field.isIndexed(), field.isTermVectorStored(),
field.isStorePositionWithTermVector(), field.isStoreOffsetWithTermVector(), field.isStorePositionWithTermVector(), field.isStoreOffsetWithTermVector(),
field.getOmitNorms(), false, field.getOmitTermFreqAndPositions(), field.docValuesType()); field.getOmitNorms(), false, field.getIndexOptions(), field.docValuesType());
fp = new DocFieldProcessorPerField(this, fi); fp = new DocFieldProcessorPerField(this, fi);
fp.next = fieldHash[hashPos]; fp.next = fieldHash[hashPos];
@ -245,7 +245,7 @@ final class DocFieldProcessor extends DocConsumer {
} else { } else {
fieldInfos.addOrUpdate(fp.fieldInfo.name, field.isIndexed(), field.isTermVectorStored(), fieldInfos.addOrUpdate(fp.fieldInfo.name, field.isIndexed(), field.isTermVectorStored(),
field.isStorePositionWithTermVector(), field.isStoreOffsetWithTermVector(), field.isStorePositionWithTermVector(), field.isStoreOffsetWithTermVector(),
field.getOmitNorms(), false, field.getOmitTermFreqAndPositions(), field.docValuesType()); field.getOmitNorms(), false, field.getIndexOptions(), field.docValuesType());
} }
if (thisFieldGen != fp.lastGen) { if (thisFieldGen != fp.lastGen) {

View File

@ -35,14 +35,27 @@ public final class FieldInfo {
boolean storePositionWithTermVector; boolean storePositionWithTermVector;
public boolean omitNorms; // omit norms associated with indexed fields public boolean omitNorms; // omit norms associated with indexed fields
public boolean omitTermFreqAndPositions; public IndexOptions indexOptions;
public boolean storePayloads; // whether this field stores payloads together with term positions public boolean storePayloads; // whether this field stores payloads together with term positions
private int codecId = UNASSIGNED_CODEC_ID; // set inside SegmentCodecs#build() during segment flush - this is used to identify the codec used to write this field private int codecId = UNASSIGNED_CODEC_ID; // set inside SegmentCodecs#build() during segment flush - this is used to identify the codec used to write this field
/**
* Controls how much information is stored in the postings lists.
* @lucene.experimental
*/
public static enum IndexOptions {
/** only documents are indexed: term frequencies and positions are omitted */
DOCS_ONLY,
/** only documents and term frequencies are indexed: positions are omitted */
DOCS_AND_FREQS,
/** full postings: documents, frequencies, and positions */
DOCS_AND_FREQS_AND_POSITIONS
};
FieldInfo(String na, boolean tk, int nu, boolean storeTermVector, FieldInfo(String na, boolean tk, int nu, boolean storeTermVector,
boolean storePositionWithTermVector, boolean storeOffsetWithTermVector, boolean storePositionWithTermVector, boolean storeOffsetWithTermVector,
boolean omitNorms, boolean storePayloads, boolean omitTermFreqAndPositions, ValueType docValues) { boolean omitNorms, boolean storePayloads, IndexOptions indexOptions, ValueType docValues) {
name = na; name = na;
isIndexed = tk; isIndexed = tk;
number = nu; number = nu;
@ -53,16 +66,16 @@ public final class FieldInfo {
this.storePositionWithTermVector = storePositionWithTermVector; this.storePositionWithTermVector = storePositionWithTermVector;
this.storePayloads = storePayloads; this.storePayloads = storePayloads;
this.omitNorms = omitNorms; this.omitNorms = omitNorms;
this.omitTermFreqAndPositions = omitTermFreqAndPositions; this.indexOptions = indexOptions;
} else { // for non-indexed fields, leave defaults } else { // for non-indexed fields, leave defaults
this.storeTermVector = false; this.storeTermVector = false;
this.storeOffsetWithTermVector = false; this.storeOffsetWithTermVector = false;
this.storePositionWithTermVector = false; this.storePositionWithTermVector = false;
this.storePayloads = false; this.storePayloads = false;
this.omitNorms = false; this.omitNorms = false;
this.omitTermFreqAndPositions = false; this.indexOptions = IndexOptions.DOCS_AND_FREQS_AND_POSITIONS;
} }
assert !omitTermFreqAndPositions || !storePayloads; assert indexOptions == IndexOptions.DOCS_AND_FREQS_AND_POSITIONS || !storePayloads;
} }
void setCodecId(int codecId) { void setCodecId(int codecId) {
@ -77,14 +90,14 @@ public final class FieldInfo {
@Override @Override
public Object clone() { public Object clone() {
FieldInfo clone = new FieldInfo(name, isIndexed, number, storeTermVector, storePositionWithTermVector, FieldInfo clone = new FieldInfo(name, isIndexed, number, storeTermVector, storePositionWithTermVector,
storeOffsetWithTermVector, omitNorms, storePayloads, omitTermFreqAndPositions, docValues); storeOffsetWithTermVector, omitNorms, storePayloads, indexOptions, docValues);
clone.codecId = this.codecId; clone.codecId = this.codecId;
return clone; return clone;
} }
// should only be called by FieldInfos#addOrUpdate // should only be called by FieldInfos#addOrUpdate
void update(boolean isIndexed, boolean storeTermVector, boolean storePositionWithTermVector, void update(boolean isIndexed, boolean storeTermVector, boolean storePositionWithTermVector,
boolean storeOffsetWithTermVector, boolean omitNorms, boolean storePayloads, boolean omitTermFreqAndPositions) { boolean storeOffsetWithTermVector, boolean omitNorms, boolean storePayloads, IndexOptions indexOptions) {
if (this.isIndexed != isIndexed) { if (this.isIndexed != isIndexed) {
this.isIndexed = true; // once indexed, always index this.isIndexed = true; // once indexed, always index
@ -105,12 +118,13 @@ public final class FieldInfo {
if (this.omitNorms != omitNorms) { if (this.omitNorms != omitNorms) {
this.omitNorms = true; // if one require omitNorms at least once, it remains off for life this.omitNorms = true; // if one require omitNorms at least once, it remains off for life
} }
if (this.omitTermFreqAndPositions != omitTermFreqAndPositions) { if (this.indexOptions != indexOptions) {
this.omitTermFreqAndPositions = true; // if one require omitTermFreqAndPositions at least once, it remains off for life // downgrade
this.indexOptions = this.indexOptions.compareTo(indexOptions) < 0 ? this.indexOptions : indexOptions;
this.storePayloads = false; this.storePayloads = false;
} }
} }
assert !this.omitTermFreqAndPositions || !this.storePayloads; assert this.indexOptions == IndexOptions.DOCS_AND_FREQS_AND_POSITIONS || !this.storePayloads;
} }
void setDocValues(ValueType v) { void setDocValues(ValueType v) {
if (docValues == null) { if (docValues == null) {

View File

@ -28,6 +28,7 @@ import java.util.SortedMap;
import java.util.TreeMap; import java.util.TreeMap;
import java.util.Map.Entry; import java.util.Map.Entry;
import org.apache.lucene.index.FieldInfo.IndexOptions;
import org.apache.lucene.index.SegmentCodecs; // Required for Java 1.5 javadocs import org.apache.lucene.index.SegmentCodecs; // Required for Java 1.5 javadocs
import org.apache.lucene.index.SegmentCodecs.SegmentCodecsBuilder; import org.apache.lucene.index.SegmentCodecs.SegmentCodecsBuilder;
import org.apache.lucene.index.codecs.CodecProvider; import org.apache.lucene.index.codecs.CodecProvider;
@ -201,13 +202,13 @@ public final class FieldInfos implements Iterable<FieldInfo> {
// First used in 2.9; prior to 2.9 there was no format header // First used in 2.9; prior to 2.9 there was no format header
public static final int FORMAT_START = -2; public static final int FORMAT_START = -2;
public static final int FORMAT_PER_FIELD_CODEC = -3; // First used in 3.4: omit only positional information
public static final int FORMAT_OMIT_POSITIONS = -3;
// Records index values for this field // per-field codec support, records index values for fields
public static final int FORMAT_INDEX_VALUES = -3; public static final int FORMAT_FLEX = -4;
// whenever you add a new format, make it 1 smaller (negative version logic)! // whenever you add a new format, make it 1 smaller (negative version logic)!
static final int FORMAT_CURRENT = FORMAT_PER_FIELD_CODEC; static final int FORMAT_CURRENT = FORMAT_FLEX;
static final int FORMAT_MINIMUM = FORMAT_START; static final int FORMAT_MINIMUM = FORMAT_START;
@ -218,8 +219,10 @@ public final class FieldInfos implements Iterable<FieldInfo> {
static final byte OMIT_NORMS = 0x10; static final byte OMIT_NORMS = 0x10;
static final byte STORE_PAYLOADS = 0x20; static final byte STORE_PAYLOADS = 0x20;
static final byte OMIT_TERM_FREQ_AND_POSITIONS = 0x40; static final byte OMIT_TERM_FREQ_AND_POSITIONS = 0x40;
static final byte OMIT_POSITIONS = -128;
private int format; private int format;
private boolean hasFreq; // only set if readonly
private boolean hasProx; // only set if readonly private boolean hasProx; // only set if readonly
private boolean hasVectors; // only set if readonly private boolean hasVectors; // only set if readonly
private long version; // internal use to track changes private long version; // internal use to track changes
@ -308,6 +311,7 @@ public final class FieldInfos implements Iterable<FieldInfo> {
synchronized public Object clone() { synchronized public Object clone() {
FieldInfos fis = new FieldInfos(globalFieldNumbers, segmentCodecsBuilder); FieldInfos fis = new FieldInfos(globalFieldNumbers, segmentCodecsBuilder);
fis.format = format; fis.format = format;
fis.hasFreq = hasFreq;
fis.hasProx = hasProx; fis.hasProx = hasProx;
fis.hasVectors = hasVectors; fis.hasVectors = hasVectors;
for (FieldInfo fi : this) { for (FieldInfo fi : this) {
@ -317,14 +321,28 @@ public final class FieldInfos implements Iterable<FieldInfo> {
return fis; return fis;
} }
/** Returns true if any fields do not omitTermFreqAndPositions */ /** Returns true if any fields do not positions */
public boolean hasProx() { public boolean hasProx() {
if (isReadOnly()) { if (isReadOnly()) {
return hasProx; return hasProx;
} }
// mutable FIs must check! // mutable FIs must check!
for (FieldInfo fi : this) { for (FieldInfo fi : this) {
if (fi.isIndexed && !fi.omitTermFreqAndPositions) { if (fi.isIndexed && fi.indexOptions == IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) {
return true;
}
}
return false;
}
/** Returns true if any fields have freqs */
public boolean hasFreq() {
if (isReadOnly()) {
return hasFreq;
}
// mutable FIs must check!
for (FieldInfo fi : this) {
if (fi.isIndexed && fi.indexOptions != IndexOptions.DOCS_ONLY) {
return true; return true;
} }
} }
@ -414,7 +432,7 @@ public final class FieldInfos implements Iterable<FieldInfo> {
synchronized public void addOrUpdate(String name, boolean isIndexed, boolean storeTermVector, synchronized public void addOrUpdate(String name, boolean isIndexed, boolean storeTermVector,
boolean storePositionWithTermVector, boolean storeOffsetWithTermVector, boolean omitNorms) { boolean storePositionWithTermVector, boolean storeOffsetWithTermVector, boolean omitNorms) {
addOrUpdate(name, isIndexed, storeTermVector, storePositionWithTermVector, addOrUpdate(name, isIndexed, storeTermVector, storePositionWithTermVector,
storeOffsetWithTermVector, omitNorms, false, false, null); storeOffsetWithTermVector, omitNorms, false, IndexOptions.DOCS_AND_FREQS_AND_POSITIONS, null);
} }
/** If the field is not yet known, adds it. If it is known, checks to make /** If the field is not yet known, adds it. If it is known, checks to make
@ -429,18 +447,18 @@ public final class FieldInfos implements Iterable<FieldInfo> {
* @param storeOffsetWithTermVector true if the term vector with offsets should be stored * @param storeOffsetWithTermVector true if the term vector with offsets should be stored
* @param omitNorms true if the norms for the indexed field should be omitted * @param omitNorms true if the norms for the indexed field should be omitted
* @param storePayloads true if payloads should be stored for this field * @param storePayloads true if payloads should be stored for this field
* @param omitTermFreqAndPositions true if term freqs should be omitted for this field * @param indexOptions if term freqs should be omitted for this field
*/ */
synchronized public FieldInfo addOrUpdate(String name, boolean isIndexed, boolean storeTermVector, synchronized public FieldInfo addOrUpdate(String name, boolean isIndexed, boolean storeTermVector,
boolean storePositionWithTermVector, boolean storeOffsetWithTermVector, boolean storePositionWithTermVector, boolean storeOffsetWithTermVector,
boolean omitNorms, boolean storePayloads, boolean omitTermFreqAndPositions, ValueType docValues) { boolean omitNorms, boolean storePayloads, IndexOptions indexOptions, ValueType docValues) {
return addOrUpdateInternal(name, -1, isIndexed, storeTermVector, storePositionWithTermVector, return addOrUpdateInternal(name, -1, isIndexed, storeTermVector, storePositionWithTermVector,
storeOffsetWithTermVector, omitNorms, storePayloads, omitTermFreqAndPositions, docValues); storeOffsetWithTermVector, omitNorms, storePayloads, indexOptions, docValues);
} }
synchronized private FieldInfo addOrUpdateInternal(String name, int preferredFieldNumber, boolean isIndexed, synchronized private FieldInfo addOrUpdateInternal(String name, int preferredFieldNumber, boolean isIndexed,
boolean storeTermVector, boolean storePositionWithTermVector, boolean storeOffsetWithTermVector, boolean storeTermVector, boolean storePositionWithTermVector, boolean storeOffsetWithTermVector,
boolean omitNorms, boolean storePayloads, boolean omitTermFreqAndPositions, ValueType docValues) { boolean omitNorms, boolean storePayloads, IndexOptions indexOptions, ValueType docValues) {
if (globalFieldNumbers == null) { if (globalFieldNumbers == null) {
throw new IllegalStateException("FieldInfos are read-only, create a new instance with a global field map to make modifications to FieldInfos"); throw new IllegalStateException("FieldInfos are read-only, create a new instance with a global field map to make modifications to FieldInfos");
} }
@ -448,9 +466,9 @@ public final class FieldInfos implements Iterable<FieldInfo> {
FieldInfo fi = fieldInfo(name); FieldInfo fi = fieldInfo(name);
if (fi == null) { if (fi == null) {
final int fieldNumber = nextFieldNumber(name, preferredFieldNumber); final int fieldNumber = nextFieldNumber(name, preferredFieldNumber);
fi = addInternal(name, fieldNumber, isIndexed, storeTermVector, storePositionWithTermVector, storeOffsetWithTermVector, omitNorms, storePayloads, omitTermFreqAndPositions, docValues); fi = addInternal(name, fieldNumber, isIndexed, storeTermVector, storePositionWithTermVector, storeOffsetWithTermVector, omitNorms, storePayloads, indexOptions, docValues);
} else { } else {
fi.update(isIndexed, storeTermVector, storePositionWithTermVector, storeOffsetWithTermVector, omitNorms, storePayloads, omitTermFreqAndPositions); fi.update(isIndexed, storeTermVector, storePositionWithTermVector, storeOffsetWithTermVector, omitNorms, storePayloads, indexOptions);
fi.setDocValues(docValues); fi.setDocValues(docValues);
} }
if ((fi.isIndexed || fi.hasDocValues()) && fi.getCodecId() == FieldInfo.UNASSIGNED_CODEC_ID) { if ((fi.isIndexed || fi.hasDocValues()) && fi.getCodecId() == FieldInfo.UNASSIGNED_CODEC_ID) {
@ -465,7 +483,7 @@ public final class FieldInfos implements Iterable<FieldInfo> {
return addOrUpdateInternal(fi.name, fi.number, fi.isIndexed, fi.storeTermVector, return addOrUpdateInternal(fi.name, fi.number, fi.isIndexed, fi.storeTermVector,
fi.storePositionWithTermVector, fi.storeOffsetWithTermVector, fi.storePositionWithTermVector, fi.storeOffsetWithTermVector,
fi.omitNorms, fi.storePayloads, fi.omitNorms, fi.storePayloads,
fi.omitTermFreqAndPositions, fi.docValues); fi.indexOptions, fi.docValues);
} }
/* /*
@ -473,13 +491,13 @@ public final class FieldInfos implements Iterable<FieldInfo> {
*/ */
private FieldInfo addInternal(String name, int fieldNumber, boolean isIndexed, private FieldInfo addInternal(String name, int fieldNumber, boolean isIndexed,
boolean storeTermVector, boolean storePositionWithTermVector, boolean storeTermVector, boolean storePositionWithTermVector,
boolean storeOffsetWithTermVector, boolean omitNorms, boolean storePayloads, boolean omitTermFreqAndPositions, ValueType docValuesType) { boolean storeOffsetWithTermVector, boolean omitNorms, boolean storePayloads, IndexOptions indexOptions, ValueType docValuesType) {
// don't check modifiable here since we use that to initially build up FIs // don't check modifiable here since we use that to initially build up FIs
if (globalFieldNumbers != null) { if (globalFieldNumbers != null) {
globalFieldNumbers.setIfNotSet(fieldNumber, name); globalFieldNumbers.setIfNotSet(fieldNumber, name);
} }
final FieldInfo fi = new FieldInfo(name, isIndexed, fieldNumber, storeTermVector, storePositionWithTermVector, final FieldInfo fi = new FieldInfo(name, isIndexed, fieldNumber, storeTermVector, storePositionWithTermVector,
storeOffsetWithTermVector, omitNorms, storePayloads, omitTermFreqAndPositions, docValuesType); storeOffsetWithTermVector, omitNorms, storePayloads, indexOptions, docValuesType);
putInternal(fi); putInternal(fi);
return fi; return fi;
} }
@ -590,7 +608,7 @@ public final class FieldInfos implements Iterable<FieldInfo> {
output.writeVInt(FORMAT_CURRENT); output.writeVInt(FORMAT_CURRENT);
output.writeVInt(size()); output.writeVInt(size());
for (FieldInfo fi : this) { for (FieldInfo fi : this) {
assert !fi.omitTermFreqAndPositions || !fi.storePayloads; assert fi.indexOptions == IndexOptions.DOCS_AND_FREQS_AND_POSITIONS || !fi.storePayloads;
byte bits = 0x0; byte bits = 0x0;
if (fi.isIndexed) bits |= IS_INDEXED; if (fi.isIndexed) bits |= IS_INDEXED;
if (fi.storeTermVector) bits |= STORE_TERMVECTOR; if (fi.storeTermVector) bits |= STORE_TERMVECTOR;
@ -598,7 +616,10 @@ public final class FieldInfos implements Iterable<FieldInfo> {
if (fi.storeOffsetWithTermVector) bits |= STORE_OFFSET_WITH_TERMVECTOR; if (fi.storeOffsetWithTermVector) bits |= STORE_OFFSET_WITH_TERMVECTOR;
if (fi.omitNorms) bits |= OMIT_NORMS; if (fi.omitNorms) bits |= OMIT_NORMS;
if (fi.storePayloads) bits |= STORE_PAYLOADS; if (fi.storePayloads) bits |= STORE_PAYLOADS;
if (fi.omitTermFreqAndPositions) bits |= OMIT_TERM_FREQ_AND_POSITIONS; if (fi.indexOptions == IndexOptions.DOCS_ONLY)
bits |= OMIT_TERM_FREQ_AND_POSITIONS;
else if (fi.indexOptions == IndexOptions.DOCS_AND_FREQS)
bits |= OMIT_POSITIONS;
output.writeString(fi.name); output.writeString(fi.name);
output.writeInt(fi.number); output.writeInt(fi.number);
output.writeInt(fi.getCodecId()); output.writeInt(fi.getCodecId());
@ -673,8 +694,8 @@ public final class FieldInfos implements Iterable<FieldInfo> {
for (int i = 0; i < size; i++) { for (int i = 0; i < size; i++) {
String name = input.readString(); String name = input.readString();
// if this is a previous format codec 0 will be preflex! // if this is a previous format codec 0 will be preflex!
final int fieldNumber = format <= FORMAT_PER_FIELD_CODEC? input.readInt():i; final int fieldNumber = format <= FORMAT_FLEX? input.readInt():i;
final int codecId = format <= FORMAT_PER_FIELD_CODEC? input.readInt():0; final int codecId = format <= FORMAT_FLEX? input.readInt():0;
byte bits = input.readByte(); byte bits = input.readByte();
boolean isIndexed = (bits & IS_INDEXED) != 0; boolean isIndexed = (bits & IS_INDEXED) != 0;
boolean storeTermVector = (bits & STORE_TERMVECTOR) != 0; boolean storeTermVector = (bits & STORE_TERMVECTOR) != 0;
@ -682,18 +703,30 @@ public final class FieldInfos implements Iterable<FieldInfo> {
boolean storeOffsetWithTermVector = (bits & STORE_OFFSET_WITH_TERMVECTOR) != 0; boolean storeOffsetWithTermVector = (bits & STORE_OFFSET_WITH_TERMVECTOR) != 0;
boolean omitNorms = (bits & OMIT_NORMS) != 0; boolean omitNorms = (bits & OMIT_NORMS) != 0;
boolean storePayloads = (bits & STORE_PAYLOADS) != 0; boolean storePayloads = (bits & STORE_PAYLOADS) != 0;
boolean omitTermFreqAndPositions = (bits & OMIT_TERM_FREQ_AND_POSITIONS) != 0; final IndexOptions indexOptions;
if ((bits & OMIT_TERM_FREQ_AND_POSITIONS) != 0) {
indexOptions = IndexOptions.DOCS_ONLY;
} else if ((bits & OMIT_POSITIONS) != 0) {
if (format <= FORMAT_OMIT_POSITIONS) {
indexOptions = IndexOptions.DOCS_AND_FREQS;
} else {
throw new CorruptIndexException("Corrupt fieldinfos, OMIT_POSITIONS set but format=" + format);
}
} else {
indexOptions = IndexOptions.DOCS_AND_FREQS_AND_POSITIONS;
}
// LUCENE-3027: past indices were able to write // LUCENE-3027: past indices were able to write
// storePayloads=true when omitTFAP is also true, // storePayloads=true when omitTFAP is also true,
// which is invalid. We correct that, here: // which is invalid. We correct that, here:
if (omitTermFreqAndPositions) { if (indexOptions != IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) {
storePayloads = false; storePayloads = false;
} }
hasVectors |= storeTermVector; hasVectors |= storeTermVector;
hasProx |= isIndexed && !omitTermFreqAndPositions; hasProx |= isIndexed && indexOptions == IndexOptions.DOCS_AND_FREQS_AND_POSITIONS;
hasFreq |= isIndexed && indexOptions != IndexOptions.DOCS_ONLY;
ValueType docValuesType = null; ValueType docValuesType = null;
if (format <= FORMAT_INDEX_VALUES) { if (format <= FORMAT_FLEX) {
final byte b = input.readByte(); final byte b = input.readByte();
switch(b) { switch(b) {
case 0: case 0:
@ -743,7 +776,7 @@ public final class FieldInfos implements Iterable<FieldInfo> {
throw new IllegalStateException("unhandled indexValues type " + b); throw new IllegalStateException("unhandled indexValues type " + b);
} }
} }
final FieldInfo addInternal = addInternal(name, fieldNumber, isIndexed, storeTermVector, storePositionsWithTermVector, storeOffsetWithTermVector, omitNorms, storePayloads, omitTermFreqAndPositions, docValuesType); final FieldInfo addInternal = addInternal(name, fieldNumber, isIndexed, storeTermVector, storePositionsWithTermVector, storeOffsetWithTermVector, omitNorms, storePayloads, indexOptions, docValuesType);
addInternal.setCodecId(codecId); addInternal.setCodecId(codecId);
} }
@ -771,7 +804,8 @@ public final class FieldInfos implements Iterable<FieldInfo> {
FieldInfo clone = (FieldInfo) (fieldInfo).clone(); FieldInfo clone = (FieldInfo) (fieldInfo).clone();
roFis.putInternal(clone); roFis.putInternal(clone);
roFis.hasVectors |= clone.storeTermVector; roFis.hasVectors |= clone.storeTermVector;
roFis.hasProx |= clone.isIndexed && !clone.omitTermFreqAndPositions; roFis.hasProx |= clone.isIndexed && clone.indexOptions == IndexOptions.DOCS_AND_FREQS_AND_POSITIONS;
roFis.hasFreq |= clone.isIndexed && clone.indexOptions != IndexOptions.DOCS_ONLY;
} }
return roFis; return roFis;
} }

View File

@ -340,7 +340,7 @@ public final class FieldsReader implements Cloneable, Closeable {
} }
f.setOmitNorms(fi.omitNorms); f.setOmitNorms(fi.omitNorms);
f.setOmitTermFreqAndPositions(fi.omitTermFreqAndPositions); f.setIndexOptions(fi.indexOptions);
doc.add(f); doc.add(f);
} }
@ -364,7 +364,7 @@ public final class FieldsReader implements Cloneable, Closeable {
termVector); termVector);
} }
f.setOmitTermFreqAndPositions(fi.omitTermFreqAndPositions); f.setIndexOptions(fi.indexOptions);
f.setOmitNorms(fi.omitNorms); f.setOmitNorms(fi.omitNorms);
doc.add(f); doc.add(f);
} }

View File

@ -22,6 +22,7 @@ import java.util.ArrayList;
import java.util.List; import java.util.List;
import java.util.Map; import java.util.Map;
import org.apache.lucene.index.FieldInfo.IndexOptions;
import org.apache.lucene.index.codecs.FieldsConsumer; import org.apache.lucene.index.codecs.FieldsConsumer;
import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.CollectionUtil; import org.apache.lucene.util.CollectionUtil;
@ -79,7 +80,7 @@ final class FreqProxTermsWriter extends TermsHashConsumer {
// Aggregate the storePayload as seen by the same // Aggregate the storePayload as seen by the same
// field across multiple threads // field across multiple threads
if (!fieldInfo.omitTermFreqAndPositions) { if (fieldInfo.indexOptions == IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) {
fieldInfo.storePayloads |= fieldWriter.hasPayloads; fieldInfo.storePayloads |= fieldWriter.hasPayloads;
} }

View File

@ -23,6 +23,7 @@ import java.util.Map;
import org.apache.lucene.analysis.tokenattributes.PayloadAttribute; import org.apache.lucene.analysis.tokenattributes.PayloadAttribute;
import org.apache.lucene.document.Fieldable; import org.apache.lucene.document.Fieldable;
import org.apache.lucene.index.FieldInfo.IndexOptions;
import org.apache.lucene.index.codecs.FieldsConsumer; import org.apache.lucene.index.codecs.FieldsConsumer;
import org.apache.lucene.index.codecs.PostingsConsumer; import org.apache.lucene.index.codecs.PostingsConsumer;
import org.apache.lucene.index.codecs.TermStats; import org.apache.lucene.index.codecs.TermStats;
@ -41,7 +42,7 @@ final class FreqProxTermsWriterPerField extends TermsHashConsumerPerField implem
final FieldInfo fieldInfo; final FieldInfo fieldInfo;
final DocumentsWriterPerThread.DocState docState; final DocumentsWriterPerThread.DocState docState;
final FieldInvertState fieldState; final FieldInvertState fieldState;
boolean omitTermFreqAndPositions; IndexOptions indexOptions;
PayloadAttribute payloadAttribute; PayloadAttribute payloadAttribute;
public FreqProxTermsWriterPerField(TermsHashPerField termsHashPerField, FreqProxTermsWriter parent, FieldInfo fieldInfo) { public FreqProxTermsWriterPerField(TermsHashPerField termsHashPerField, FreqProxTermsWriter parent, FieldInfo fieldInfo) {
@ -50,12 +51,12 @@ final class FreqProxTermsWriterPerField extends TermsHashConsumerPerField implem
this.fieldInfo = fieldInfo; this.fieldInfo = fieldInfo;
docState = termsHashPerField.docState; docState = termsHashPerField.docState;
fieldState = termsHashPerField.fieldState; fieldState = termsHashPerField.fieldState;
omitTermFreqAndPositions = fieldInfo.omitTermFreqAndPositions; indexOptions = fieldInfo.indexOptions;
} }
@Override @Override
int getStreamCount() { int getStreamCount() {
if (fieldInfo.omitTermFreqAndPositions) if (fieldInfo.indexOptions != IndexOptions.DOCS_AND_FREQS_AND_POSITIONS)
return 1; return 1;
else else
return 2; return 2;
@ -76,7 +77,7 @@ final class FreqProxTermsWriterPerField extends TermsHashConsumerPerField implem
void reset() { void reset() {
// Record, up front, whether our in-RAM format will be // Record, up front, whether our in-RAM format will be
// with or without term freqs: // with or without term freqs:
omitTermFreqAndPositions = fieldInfo.omitTermFreqAndPositions; indexOptions = fieldInfo.indexOptions;
payloadAttribute = null; payloadAttribute = null;
} }
@ -126,12 +127,14 @@ final class FreqProxTermsWriterPerField extends TermsHashConsumerPerField implem
FreqProxPostingsArray postings = (FreqProxPostingsArray) termsHashPerField.postingsArray; FreqProxPostingsArray postings = (FreqProxPostingsArray) termsHashPerField.postingsArray;
postings.lastDocIDs[termID] = docState.docID; postings.lastDocIDs[termID] = docState.docID;
if (omitTermFreqAndPositions) { if (indexOptions == IndexOptions.DOCS_ONLY) {
postings.lastDocCodes[termID] = docState.docID; postings.lastDocCodes[termID] = docState.docID;
} else { } else {
postings.lastDocCodes[termID] = docState.docID << 1; postings.lastDocCodes[termID] = docState.docID << 1;
postings.docFreqs[termID] = 1; postings.docFreqs[termID] = 1;
writeProx(termID, fieldState.position); if (indexOptions == IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) {
writeProx(termID, fieldState.position);
}
} }
fieldState.maxTermFrequency = Math.max(1, fieldState.maxTermFrequency); fieldState.maxTermFrequency = Math.max(1, fieldState.maxTermFrequency);
fieldState.uniqueTermCount++; fieldState.uniqueTermCount++;
@ -144,9 +147,9 @@ final class FreqProxTermsWriterPerField extends TermsHashConsumerPerField implem
FreqProxPostingsArray postings = (FreqProxPostingsArray) termsHashPerField.postingsArray; FreqProxPostingsArray postings = (FreqProxPostingsArray) termsHashPerField.postingsArray;
assert omitTermFreqAndPositions || postings.docFreqs[termID] > 0; assert indexOptions == IndexOptions.DOCS_ONLY || postings.docFreqs[termID] > 0;
if (omitTermFreqAndPositions) { if (indexOptions == IndexOptions.DOCS_ONLY) {
if (docState.docID != postings.lastDocIDs[termID]) { if (docState.docID != postings.lastDocIDs[termID]) {
assert docState.docID > postings.lastDocIDs[termID]; assert docState.docID > postings.lastDocIDs[termID];
termsHashPerField.writeVInt(0, postings.lastDocCodes[termID]); termsHashPerField.writeVInt(0, postings.lastDocCodes[termID]);
@ -172,11 +175,15 @@ final class FreqProxTermsWriterPerField extends TermsHashConsumerPerField implem
fieldState.maxTermFrequency = Math.max(1, fieldState.maxTermFrequency); fieldState.maxTermFrequency = Math.max(1, fieldState.maxTermFrequency);
postings.lastDocCodes[termID] = (docState.docID - postings.lastDocIDs[termID]) << 1; postings.lastDocCodes[termID] = (docState.docID - postings.lastDocIDs[termID]) << 1;
postings.lastDocIDs[termID] = docState.docID; postings.lastDocIDs[termID] = docState.docID;
writeProx(termID, fieldState.position); if (indexOptions == IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) {
writeProx(termID, fieldState.position);
}
fieldState.uniqueTermCount++; fieldState.uniqueTermCount++;
} else { } else {
fieldState.maxTermFrequency = Math.max(fieldState.maxTermFrequency, ++postings.docFreqs[termID]); fieldState.maxTermFrequency = Math.max(fieldState.maxTermFrequency, ++postings.docFreqs[termID]);
writeProx(termID, fieldState.position-postings.lastPositions[termID]); if (indexOptions == IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) {
writeProx(termID, fieldState.position-postings.lastPositions[termID]);
}
} }
} }
} }
@ -237,7 +244,7 @@ final class FreqProxTermsWriterPerField extends TermsHashConsumerPerField implem
final TermsConsumer termsConsumer = consumer.addField(fieldInfo); final TermsConsumer termsConsumer = consumer.addField(fieldInfo);
final Comparator<BytesRef> termComp = termsConsumer.getComparator(); final Comparator<BytesRef> termComp = termsConsumer.getComparator();
final boolean currentFieldOmitTermFreqAndPositions = fieldInfo.omitTermFreqAndPositions; final IndexOptions currentFieldIndexOptions = fieldInfo.indexOptions;
final Map<Term,Integer> segDeletes; final Map<Term,Integer> segDeletes;
if (state.segDeletes != null && state.segDeletes.terms.size() > 0) { if (state.segDeletes != null && state.segDeletes.terms.size() > 0) {
@ -263,7 +270,7 @@ final class FreqProxTermsWriterPerField extends TermsHashConsumerPerField implem
termsHashPerField.bytePool.setBytesRef(text, textStart); termsHashPerField.bytePool.setBytesRef(text, textStart);
termsHashPerField.initReader(freq, termID, 0); termsHashPerField.initReader(freq, termID, 0);
if (!fieldInfo.omitTermFreqAndPositions) { if (fieldInfo.indexOptions == IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) {
termsHashPerField.initReader(prox, termID, 1); termsHashPerField.initReader(prox, termID, 1);
} }
@ -300,7 +307,7 @@ final class FreqProxTermsWriterPerField extends TermsHashConsumerPerField implem
if (postings.lastDocCodes[termID] != -1) { if (postings.lastDocCodes[termID] != -1) {
// Return last doc // Return last doc
docID = postings.lastDocIDs[termID]; docID = postings.lastDocIDs[termID];
if (!omitTermFreqAndPositions) { if (indexOptions != IndexOptions.DOCS_ONLY) {
termFreq = postings.docFreqs[termID]; termFreq = postings.docFreqs[termID];
} }
postings.lastDocCodes[termID] = -1; postings.lastDocCodes[termID] = -1;
@ -310,7 +317,7 @@ final class FreqProxTermsWriterPerField extends TermsHashConsumerPerField implem
} }
} else { } else {
final int code = freq.readVInt(); final int code = freq.readVInt();
if (omitTermFreqAndPositions) { if (indexOptions == IndexOptions.DOCS_ONLY) {
docID += code; docID += code;
} else { } else {
docID += code >>> 1; docID += code >>> 1;
@ -351,14 +358,17 @@ final class FreqProxTermsWriterPerField extends TermsHashConsumerPerField implem
state.liveDocs.clear(docID); state.liveDocs.clear(docID);
} }
if (currentFieldIndexOptions != IndexOptions.DOCS_ONLY) {
totTF += termDocFreq;
}
// Carefully copy over the prox + payload info, // Carefully copy over the prox + payload info,
// changing the format to match Lucene's segment // changing the format to match Lucene's segment
// format. // format.
if (!currentFieldOmitTermFreqAndPositions) {
// omitTermFreqAndPositions == false so we do write positions & if (currentFieldIndexOptions == IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) {
// payload // we do write positions & payload
int position = 0; int position = 0;
totTF += termDocFreq;
for(int j=0;j<termDocFreq;j++) { for(int j=0;j<termDocFreq;j++) {
final int code = prox.readVInt(); final int code = prox.readVInt();
position += code >> 1; position += code >> 1;

View File

@ -153,6 +153,8 @@ public abstract class IndexReader implements Cloneable,Closeable {
STORES_PAYLOADS, STORES_PAYLOADS,
/** All fields that omit tf */ /** All fields that omit tf */
OMIT_TERM_FREQ_AND_POSITIONS, OMIT_TERM_FREQ_AND_POSITIONS,
/** All fields that omit positions */
OMIT_POSITIONS,
/** All fields which are not indexed */ /** All fields which are not indexed */
UNINDEXED, UNINDEXED,
/** All fields which are indexed with termvectors enabled */ /** All fields which are indexed with termvectors enabled */

View File

@ -91,7 +91,7 @@ public final class SegmentInfo implements Cloneable {
//TODO: remove when we don't have to support old indexes anymore that had this field //TODO: remove when we don't have to support old indexes anymore that had this field
private int hasVectors = CHECK_FIELDINFO; private int hasVectors = CHECK_FIELDINFO;
//TODO: remove when we don't have to support old indexes anymore that had this field //TODO: remove when we don't have to support old indexes anymore that had this field
private int hasProx = CHECK_FIELDINFO; // True if this segment has any fields with omitTermFreqAndPositions==false private int hasProx = CHECK_FIELDINFO; // True if this segment has any fields with positional information
private FieldInfos fieldInfos; private FieldInfos fieldInfos;

View File

@ -24,6 +24,7 @@ import java.util.Collection;
import java.util.List; import java.util.List;
import org.apache.lucene.document.Document; import org.apache.lucene.document.Document;
import org.apache.lucene.index.FieldInfo.IndexOptions;
import org.apache.lucene.index.IndexReader.FieldOption; import org.apache.lucene.index.IndexReader.FieldOption;
import org.apache.lucene.index.MergePolicy.MergeAbortedException; import org.apache.lucene.index.MergePolicy.MergeAbortedException;
import org.apache.lucene.index.codecs.Codec; import org.apache.lucene.index.codecs.Codec;
@ -158,12 +159,12 @@ final class SegmentMerger {
private static void addIndexed(IndexReader reader, FieldInfos fInfos, private static void addIndexed(IndexReader reader, FieldInfos fInfos,
Collection<String> names, boolean storeTermVectors, Collection<String> names, boolean storeTermVectors,
boolean storePositionWithTermVector, boolean storeOffsetWithTermVector, boolean storePositionWithTermVector, boolean storeOffsetWithTermVector,
boolean storePayloads, boolean omitTFAndPositions) boolean storePayloads, IndexOptions indexOptions)
throws IOException { throws IOException {
for (String field : names) { for (String field : names) {
fInfos.addOrUpdate(field, true, storeTermVectors, fInfos.addOrUpdate(field, true, storeTermVectors,
storePositionWithTermVector, storeOffsetWithTermVector, !reader storePositionWithTermVector, storeOffsetWithTermVector, !reader
.hasNorms(field), storePayloads, omitTFAndPositions, null); .hasNorms(field), storePayloads, indexOptions, null);
} }
} }
@ -223,13 +224,14 @@ final class SegmentMerger {
fieldInfos.add(fi); fieldInfos.add(fi);
} }
} else { } else {
addIndexed(reader, fieldInfos, reader.getFieldNames(FieldOption.TERMVECTOR_WITH_POSITION_OFFSET), true, true, true, false, false); addIndexed(reader, fieldInfos, reader.getFieldNames(FieldOption.TERMVECTOR_WITH_POSITION_OFFSET), true, true, true, false, IndexOptions.DOCS_AND_FREQS_AND_POSITIONS);
addIndexed(reader, fieldInfos, reader.getFieldNames(FieldOption.TERMVECTOR_WITH_POSITION), true, true, false, false, false); addIndexed(reader, fieldInfos, reader.getFieldNames(FieldOption.TERMVECTOR_WITH_POSITION), true, true, false, false, IndexOptions.DOCS_AND_FREQS_AND_POSITIONS);
addIndexed(reader, fieldInfos, reader.getFieldNames(FieldOption.TERMVECTOR_WITH_OFFSET), true, false, true, false, false); addIndexed(reader, fieldInfos, reader.getFieldNames(FieldOption.TERMVECTOR_WITH_OFFSET), true, false, true, false, IndexOptions.DOCS_AND_FREQS_AND_POSITIONS);
addIndexed(reader, fieldInfos, reader.getFieldNames(FieldOption.TERMVECTOR), true, false, false, false, false); addIndexed(reader, fieldInfos, reader.getFieldNames(FieldOption.TERMVECTOR), true, false, false, false, IndexOptions.DOCS_AND_FREQS_AND_POSITIONS);
addIndexed(reader, fieldInfos, reader.getFieldNames(FieldOption.OMIT_TERM_FREQ_AND_POSITIONS), false, false, false, false, true); addIndexed(reader, fieldInfos, reader.getFieldNames(FieldOption.OMIT_POSITIONS), false, false, false, false, IndexOptions.DOCS_AND_FREQS);
addIndexed(reader, fieldInfos, reader.getFieldNames(FieldOption.STORES_PAYLOADS), false, false, false, true, false); addIndexed(reader, fieldInfos, reader.getFieldNames(FieldOption.OMIT_TERM_FREQ_AND_POSITIONS), false, false, false, false, IndexOptions.DOCS_ONLY);
addIndexed(reader, fieldInfos, reader.getFieldNames(FieldOption.INDEXED), false, false, false, false, false); addIndexed(reader, fieldInfos, reader.getFieldNames(FieldOption.STORES_PAYLOADS), false, false, false, true, IndexOptions.DOCS_AND_FREQS_AND_POSITIONS);
addIndexed(reader, fieldInfos, reader.getFieldNames(FieldOption.INDEXED), false, false, false, false, IndexOptions.DOCS_AND_FREQS_AND_POSITIONS);
fieldInfos.addOrUpdate(reader.getFieldNames(FieldOption.UNINDEXED), false); fieldInfos.addOrUpdate(reader.getFieldNames(FieldOption.UNINDEXED), false);
fieldInfos.addOrUpdate(reader.getFieldNames(FieldOption.DOC_VALUES), false); fieldInfos.addOrUpdate(reader.getFieldNames(FieldOption.DOC_VALUES), false);
} }

View File

@ -29,6 +29,7 @@ import java.util.concurrent.atomic.AtomicInteger;
import org.apache.lucene.document.Document; import org.apache.lucene.document.Document;
import org.apache.lucene.document.FieldSelector; import org.apache.lucene.document.FieldSelector;
import org.apache.lucene.index.FieldInfo.IndexOptions;
import org.apache.lucene.index.codecs.PerDocValues; import org.apache.lucene.index.codecs.PerDocValues;
import org.apache.lucene.store.BufferedIndexInput; import org.apache.lucene.store.BufferedIndexInput;
import org.apache.lucene.store.Directory; import org.apache.lucene.store.Directory;
@ -513,7 +514,10 @@ public class SegmentReader extends IndexReader implements Cloneable {
else if (!fi.isIndexed && fieldOption == IndexReader.FieldOption.UNINDEXED) { else if (!fi.isIndexed && fieldOption == IndexReader.FieldOption.UNINDEXED) {
fieldSet.add(fi.name); fieldSet.add(fi.name);
} }
else if (fi.omitTermFreqAndPositions && fieldOption == IndexReader.FieldOption.OMIT_TERM_FREQ_AND_POSITIONS) { else if (fi.indexOptions == IndexOptions.DOCS_ONLY && fieldOption == IndexReader.FieldOption.OMIT_TERM_FREQ_AND_POSITIONS) {
fieldSet.add(fi.name);
}
else if (fi.indexOptions == IndexOptions.DOCS_AND_FREQS && fieldOption == IndexReader.FieldOption.OMIT_POSITIONS) {
fieldSet.add(fi.name); fieldSet.add(fi.name);
} }
else if (fi.storePayloads && fieldOption == IndexReader.FieldOption.STORES_PAYLOADS) { else if (fi.storePayloads && fieldOption == IndexReader.FieldOption.STORES_PAYLOADS) {

View File

@ -27,6 +27,7 @@ import java.util.TreeMap;
import org.apache.lucene.index.DocsAndPositionsEnum; import org.apache.lucene.index.DocsAndPositionsEnum;
import org.apache.lucene.index.DocsEnum; import org.apache.lucene.index.DocsEnum;
import org.apache.lucene.index.FieldInfo; import org.apache.lucene.index.FieldInfo;
import org.apache.lucene.index.FieldInfo.IndexOptions;
import org.apache.lucene.index.FieldInfos; import org.apache.lucene.index.FieldInfos;
import org.apache.lucene.index.FieldsEnum; import org.apache.lucene.index.FieldsEnum;
import org.apache.lucene.index.IndexFileNames; import org.apache.lucene.index.IndexFileNames;
@ -136,7 +137,7 @@ public class BlockTermsReader extends FieldsProducer {
assert numTerms >= 0; assert numTerms >= 0;
final long termsStartPointer = in.readVLong(); final long termsStartPointer = in.readVLong();
final FieldInfo fieldInfo = fieldInfos.fieldInfo(field); final FieldInfo fieldInfo = fieldInfos.fieldInfo(field);
final long sumTotalTermFreq = fieldInfo.omitTermFreqAndPositions ? -1 : in.readVLong(); final long sumTotalTermFreq = fieldInfo.indexOptions == IndexOptions.DOCS_ONLY ? -1 : in.readVLong();
final long sumDocFreq = in.readVLong(); final long sumDocFreq = in.readVLong();
assert !fields.containsKey(fieldInfo.name); assert !fields.containsKey(fieldInfo.name);
fields.put(fieldInfo.name, new FieldReader(fieldInfo, numTerms, termsStartPointer, sumTotalTermFreq, sumDocFreq)); fields.put(fieldInfo.name, new FieldReader(fieldInfo, numTerms, termsStartPointer, sumTotalTermFreq, sumDocFreq));
@ -709,7 +710,7 @@ public class BlockTermsReader extends FieldsProducer {
public DocsAndPositionsEnum docsAndPositions(Bits liveDocs, DocsAndPositionsEnum reuse) throws IOException { public DocsAndPositionsEnum docsAndPositions(Bits liveDocs, DocsAndPositionsEnum reuse) throws IOException {
//System.out.println("BTR.d&p this=" + this); //System.out.println("BTR.d&p this=" + this);
decodeMetaData(); decodeMetaData();
if (fieldInfo.omitTermFreqAndPositions) { if (fieldInfo.indexOptions != IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) {
return null; return null;
} else { } else {
DocsAndPositionsEnum dpe = postingsReader.docsAndPositions(fieldInfo, state, liveDocs, reuse); DocsAndPositionsEnum dpe = postingsReader.docsAndPositions(fieldInfo, state, liveDocs, reuse);
@ -867,7 +868,7 @@ public class BlockTermsReader extends FieldsProducer {
// just skipN here: // just skipN here:
state.docFreq = freqReader.readVInt(); state.docFreq = freqReader.readVInt();
//System.out.println(" dF=" + state.docFreq); //System.out.println(" dF=" + state.docFreq);
if (!fieldInfo.omitTermFreqAndPositions) { if (fieldInfo.indexOptions != IndexOptions.DOCS_ONLY) {
state.totalTermFreq = state.docFreq + freqReader.readVLong(); state.totalTermFreq = state.docFreq + freqReader.readVLong();
//System.out.println(" totTF=" + state.totalTermFreq); //System.out.println(" totTF=" + state.totalTermFreq);
} }

View File

@ -23,6 +23,7 @@ import java.util.Comparator;
import java.util.List; import java.util.List;
import org.apache.lucene.index.FieldInfo; import org.apache.lucene.index.FieldInfo;
import org.apache.lucene.index.FieldInfo.IndexOptions;
import org.apache.lucene.index.FieldInfos; import org.apache.lucene.index.FieldInfos;
import org.apache.lucene.index.IndexFileNames; import org.apache.lucene.index.IndexFileNames;
import org.apache.lucene.index.SegmentWriteState; import org.apache.lucene.index.SegmentWriteState;
@ -129,7 +130,7 @@ public class BlockTermsWriter extends FieldsConsumer {
out.writeVInt(field.fieldInfo.number); out.writeVInt(field.fieldInfo.number);
out.writeVLong(field.numTerms); out.writeVLong(field.numTerms);
out.writeVLong(field.termsStartPointer); out.writeVLong(field.termsStartPointer);
if (!field.fieldInfo.omitTermFreqAndPositions) { if (field.fieldInfo.indexOptions != IndexOptions.DOCS_ONLY) {
out.writeVLong(field.sumTotalTermFreq); out.writeVLong(field.sumTotalTermFreq);
} }
out.writeVLong(field.sumDocFreq); out.writeVLong(field.sumDocFreq);
@ -298,7 +299,7 @@ public class BlockTermsWriter extends FieldsConsumer {
final TermStats stats = pendingTerms[termCount].stats; final TermStats stats = pendingTerms[termCount].stats;
assert stats != null; assert stats != null;
bytesWriter.writeVInt(stats.docFreq); bytesWriter.writeVInt(stats.docFreq);
if (!fieldInfo.omitTermFreqAndPositions) { if (fieldInfo.indexOptions != IndexOptions.DOCS_ONLY) {
bytesWriter.writeVLong(stats.totalTermFreq-stats.docFreq); bytesWriter.writeVLong(stats.totalTermFreq-stats.docFreq);
} }
} }

View File

@ -21,6 +21,7 @@ import java.io.IOException;
import org.apache.lucene.index.DocsAndPositionsEnum; import org.apache.lucene.index.DocsAndPositionsEnum;
import org.apache.lucene.index.DocsEnum; import org.apache.lucene.index.DocsEnum;
import org.apache.lucene.index.FieldInfo.IndexOptions;
import org.apache.lucene.search.DocIdSetIterator; import org.apache.lucene.search.DocIdSetIterator;
import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.BytesRef;
@ -60,16 +61,17 @@ public abstract class PostingsConsumer {
int df = 0; int df = 0;
long totTF = 0; long totTF = 0;
if (mergeState.fieldInfo.omitTermFreqAndPositions) { if (mergeState.fieldInfo.indexOptions != IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) {
while(true) { while(true) {
final int doc = postings.nextDoc(); final int doc = postings.nextDoc();
if (doc == DocIdSetIterator.NO_MORE_DOCS) { if (doc == DocIdSetIterator.NO_MORE_DOCS) {
break; break;
} }
this.startDoc(doc, postings.freq()); final int freq = postings.freq();
this.startDoc(doc, freq);
this.finishDoc(); this.finishDoc();
df++; df++;
totTF++; totTF += freq;
} }
} else { } else {
final DocsAndPositionsEnum postingsEnum = (DocsAndPositionsEnum) postings; final DocsAndPositionsEnum postingsEnum = (DocsAndPositionsEnum) postings;

View File

@ -20,6 +20,7 @@ package org.apache.lucene.index.codecs;
import java.io.IOException; import java.io.IOException;
import java.util.Comparator; import java.util.Comparator;
import org.apache.lucene.index.FieldInfo.IndexOptions;
import org.apache.lucene.index.TermsEnum; import org.apache.lucene.index.TermsEnum;
import org.apache.lucene.index.MultiDocsEnum; import org.apache.lucene.index.MultiDocsEnum;
import org.apache.lucene.index.MultiDocsAndPositionsEnum; import org.apache.lucene.index.MultiDocsAndPositionsEnum;
@ -59,7 +60,7 @@ public abstract class TermsConsumer {
long sumDocFreq = 0; long sumDocFreq = 0;
long sumDFsinceLastAbortCheck = 0; long sumDFsinceLastAbortCheck = 0;
if (mergeState.fieldInfo.omitTermFreqAndPositions) { if (mergeState.fieldInfo.indexOptions != IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) {
if (docsEnum == null) { if (docsEnum == null) {
docsEnum = new MappingMultiDocsEnum(); docsEnum = new MappingMultiDocsEnum();
} }
@ -75,6 +76,7 @@ public abstract class TermsConsumer {
final TermStats stats = postingsConsumer.merge(mergeState, docsEnum); final TermStats stats = postingsConsumer.merge(mergeState, docsEnum);
if (stats.docFreq > 0) { if (stats.docFreq > 0) {
finishTerm(term, stats); finishTerm(term, stats);
sumTotalTermFreq += stats.totalTermFreq;
sumDFsinceLastAbortCheck += stats.docFreq; sumDFsinceLastAbortCheck += stats.docFreq;
sumDocFreq += stats.docFreq; sumDocFreq += stats.docFreq;
if (sumDFsinceLastAbortCheck > 60000) { if (sumDFsinceLastAbortCheck > 60000) {

View File

@ -27,6 +27,7 @@ import java.util.TreeMap;
import org.apache.lucene.index.DocsAndPositionsEnum; import org.apache.lucene.index.DocsAndPositionsEnum;
import org.apache.lucene.index.DocsEnum; import org.apache.lucene.index.DocsEnum;
import org.apache.lucene.index.FieldInfo; import org.apache.lucene.index.FieldInfo;
import org.apache.lucene.index.FieldInfo.IndexOptions;
import org.apache.lucene.index.FieldInfos; import org.apache.lucene.index.FieldInfos;
import org.apache.lucene.index.FieldsEnum; import org.apache.lucene.index.FieldsEnum;
import org.apache.lucene.index.IndexFileNames; import org.apache.lucene.index.IndexFileNames;
@ -118,7 +119,7 @@ public class MemoryCodec extends Codec {
lastDocID = docID; lastDocID = docID;
docCount++; docCount++;
if (field.omitTermFreqAndPositions) { if (field.indexOptions == IndexOptions.DOCS_ONLY) {
buffer.writeVInt(delta); buffer.writeVInt(delta);
} else if (termDocFreq == 1) { } else if (termDocFreq == 1) {
buffer.writeVInt((delta<<1) | 1); buffer.writeVInt((delta<<1) | 1);
@ -192,7 +193,7 @@ public class MemoryCodec extends Codec {
assert buffer2.getFilePointer() == 0; assert buffer2.getFilePointer() == 0;
buffer2.writeVInt(stats.docFreq); buffer2.writeVInt(stats.docFreq);
if (!field.omitTermFreqAndPositions) { if (field.indexOptions != IndexOptions.DOCS_ONLY) {
buffer2.writeVLong(stats.totalTermFreq-stats.docFreq); buffer2.writeVLong(stats.totalTermFreq-stats.docFreq);
} }
int pos = (int) buffer2.getFilePointer(); int pos = (int) buffer2.getFilePointer();
@ -223,7 +224,7 @@ public class MemoryCodec extends Codec {
if (termCount > 0) { if (termCount > 0) {
out.writeVInt(termCount); out.writeVInt(termCount);
out.writeVInt(field.number); out.writeVInt(field.number);
if (!field.omitTermFreqAndPositions) { if (field.indexOptions != IndexOptions.DOCS_ONLY) {
out.writeVLong(sumTotalTermFreq); out.writeVLong(sumTotalTermFreq);
} }
out.writeVLong(sumDocFreq); out.writeVLong(sumDocFreq);
@ -266,7 +267,7 @@ public class MemoryCodec extends Codec {
} }
private final static class FSTDocsEnum extends DocsEnum { private final static class FSTDocsEnum extends DocsEnum {
private final boolean omitTFAP; private final IndexOptions indexOptions;
private final boolean storePayloads; private final boolean storePayloads;
private byte[] buffer = new byte[16]; private byte[] buffer = new byte[16];
private final ByteArrayDataInput in = new ByteArrayDataInput(buffer); private final ByteArrayDataInput in = new ByteArrayDataInput(buffer);
@ -278,13 +279,13 @@ public class MemoryCodec extends Codec {
private int payloadLen; private int payloadLen;
private int numDocs; private int numDocs;
public FSTDocsEnum(boolean omitTFAP, boolean storePayloads) { public FSTDocsEnum(IndexOptions indexOptions, boolean storePayloads) {
this.omitTFAP = omitTFAP; this.indexOptions = indexOptions;
this.storePayloads = storePayloads; this.storePayloads = storePayloads;
} }
public boolean canReuse(boolean omitTFAP, boolean storePayloads) { public boolean canReuse(IndexOptions indexOptions, boolean storePayloads) {
return omitTFAP == this.omitTFAP && storePayloads == this.storePayloads; return indexOptions == this.indexOptions && storePayloads == this.storePayloads;
} }
public FSTDocsEnum reset(BytesRef bufferIn, Bits liveDocs, int numDocs) { public FSTDocsEnum reset(BytesRef bufferIn, Bits liveDocs, int numDocs) {
@ -313,7 +314,7 @@ public class MemoryCodec extends Codec {
return docID = NO_MORE_DOCS; return docID = NO_MORE_DOCS;
} }
docUpto++; docUpto++;
if (omitTFAP) { if (indexOptions == IndexOptions.DOCS_ONLY) {
docID += in.readVInt(); docID += in.readVInt();
freq = 1; freq = 1;
} else { } else {
@ -327,16 +328,18 @@ public class MemoryCodec extends Codec {
assert freq > 0; assert freq > 0;
} }
// Skip positions if (indexOptions == IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) {
for(int posUpto=0;posUpto<freq;posUpto++) { // Skip positions
if (!storePayloads) { for(int posUpto=0;posUpto<freq;posUpto++) {
in.readVInt(); if (!storePayloads) {
} else { in.readVInt();
final int posCode = in.readVInt(); } else {
if ((posCode & 1) != 0) { final int posCode = in.readVInt();
payloadLen = in.readVInt(); if ((posCode & 1) != 0) {
payloadLen = in.readVInt();
}
in.skipBytes(payloadLen);
} }
in.skipBytes(payloadLen);
} }
} }
} }
@ -432,7 +435,7 @@ public class MemoryCodec extends Codec {
return docID = NO_MORE_DOCS; return docID = NO_MORE_DOCS;
} }
docUpto++; docUpto++;
final int code = in.readVInt(); final int code = in.readVInt();
docID += code >>> 1; docID += code >>> 1;
if ((code & 1) != 0) { if ((code & 1) != 0) {
@ -454,8 +457,8 @@ public class MemoryCodec extends Codec {
if (!storePayloads) { if (!storePayloads) {
in.readVInt(); in.readVInt();
} else { } else {
final int codeSkip = in.readVInt(); final int skipCode = in.readVInt();
if ((codeSkip & 1) != 0) { if ((skipCode & 1) != 0) {
payloadLength = in.readVInt(); payloadLength = in.readVInt();
if (VERBOSE) System.out.println(" new payloadLen=" + payloadLength); if (VERBOSE) System.out.println(" new payloadLen=" + payloadLength);
} }
@ -548,7 +551,7 @@ public class MemoryCodec extends Codec {
if (!didDecode) { if (!didDecode) {
buffer.reset(current.output.bytes, 0, current.output.length); buffer.reset(current.output.bytes, 0, current.output.length);
docFreq = buffer.readVInt(); docFreq = buffer.readVInt();
if (!field.omitTermFreqAndPositions) { if (field.indexOptions != IndexOptions.DOCS_ONLY) {
totalTermFreq = docFreq + buffer.readVLong(); totalTermFreq = docFreq + buffer.readVLong();
} else { } else {
totalTermFreq = 0; totalTermFreq = 0;
@ -598,11 +601,11 @@ public class MemoryCodec extends Codec {
decodeMetaData(); decodeMetaData();
FSTDocsEnum docsEnum; FSTDocsEnum docsEnum;
if (reuse == null || !(reuse instanceof FSTDocsEnum)) { if (reuse == null || !(reuse instanceof FSTDocsEnum)) {
docsEnum = new FSTDocsEnum(field.omitTermFreqAndPositions, field.storePayloads); docsEnum = new FSTDocsEnum(field.indexOptions, field.storePayloads);
} else { } else {
docsEnum = (FSTDocsEnum) reuse; docsEnum = (FSTDocsEnum) reuse;
if (!docsEnum.canReuse(field.omitTermFreqAndPositions, field.storePayloads)) { if (!docsEnum.canReuse(field.indexOptions, field.storePayloads)) {
docsEnum = new FSTDocsEnum(field.omitTermFreqAndPositions, field.storePayloads); docsEnum = new FSTDocsEnum(field.indexOptions, field.storePayloads);
} }
} }
return docsEnum.reset(current.output, liveDocs, docFreq); return docsEnum.reset(current.output, liveDocs, docFreq);
@ -610,7 +613,7 @@ public class MemoryCodec extends Codec {
@Override @Override
public DocsAndPositionsEnum docsAndPositions(Bits liveDocs, DocsAndPositionsEnum reuse) throws IOException { public DocsAndPositionsEnum docsAndPositions(Bits liveDocs, DocsAndPositionsEnum reuse) throws IOException {
if (field.omitTermFreqAndPositions) { if (field.indexOptions != IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) {
return null; return null;
} }
decodeMetaData(); decodeMetaData();
@ -686,7 +689,7 @@ public class MemoryCodec extends Codec {
public TermsReader(FieldInfos fieldInfos, IndexInput in) throws IOException { public TermsReader(FieldInfos fieldInfos, IndexInput in) throws IOException {
final int fieldNumber = in.readVInt(); final int fieldNumber = in.readVInt();
field = fieldInfos.fieldInfo(fieldNumber); field = fieldInfos.fieldInfo(fieldNumber);
if (!field.omitTermFreqAndPositions) { if (field.indexOptions != IndexOptions.DOCS_ONLY) {
sumTotalTermFreq = in.readVLong(); sumTotalTermFreq = in.readVLong();
} else { } else {
sumTotalTermFreq = 0; sumTotalTermFreq = 0;

View File

@ -25,9 +25,11 @@ import java.util.Iterator;
import java.util.Map; import java.util.Map;
import java.util.TreeMap; import java.util.TreeMap;
import org.apache.lucene.index.CorruptIndexException;
import org.apache.lucene.index.DocsAndPositionsEnum; import org.apache.lucene.index.DocsAndPositionsEnum;
import org.apache.lucene.index.DocsEnum; import org.apache.lucene.index.DocsEnum;
import org.apache.lucene.index.FieldInfo; import org.apache.lucene.index.FieldInfo;
import org.apache.lucene.index.FieldInfo.IndexOptions;
import org.apache.lucene.index.FieldInfos; import org.apache.lucene.index.FieldInfos;
import org.apache.lucene.index.FieldsEnum; import org.apache.lucene.index.FieldsEnum;
import org.apache.lucene.index.IndexFileNames; import org.apache.lucene.index.IndexFileNames;
@ -99,7 +101,7 @@ public class PreFlexFields extends FieldsProducer {
if (fi.isIndexed) { if (fi.isIndexed) {
fields.put(fi.name, fi); fields.put(fi.name, fi);
preTerms.put(fi.name, new PreTerms(fi)); preTerms.put(fi.name, new PreTerms(fi));
if (!fi.omitTermFreqAndPositions) { if (fi.indexOptions == IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) {
anyProx = true; anyProx = true;
} }
} }
@ -973,7 +975,7 @@ public class PreFlexFields extends FieldsProducer {
@Override @Override
public DocsAndPositionsEnum docsAndPositions(Bits liveDocs, DocsAndPositionsEnum reuse) throws IOException { public DocsAndPositionsEnum docsAndPositions(Bits liveDocs, DocsAndPositionsEnum reuse) throws IOException {
PreDocsAndPositionsEnum docsPosEnum; PreDocsAndPositionsEnum docsPosEnum;
if (fieldInfo.omitTermFreqAndPositions) { if (fieldInfo.indexOptions != IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) {
return null; return null;
} else if (reuse == null || !(reuse instanceof PreDocsAndPositionsEnum)) { } else if (reuse == null || !(reuse instanceof PreDocsAndPositionsEnum)) {
docsPosEnum = new PreDocsAndPositionsEnum(); docsPosEnum = new PreDocsAndPositionsEnum();

View File

@ -20,6 +20,7 @@ package org.apache.lucene.index.codecs.preflex;
import java.io.IOException; import java.io.IOException;
import org.apache.lucene.index.FieldInfo; import org.apache.lucene.index.FieldInfo;
import org.apache.lucene.index.FieldInfo.IndexOptions;
import org.apache.lucene.index.FieldInfos; import org.apache.lucene.index.FieldInfos;
import org.apache.lucene.index.Term; import org.apache.lucene.index.Term;
import org.apache.lucene.index.codecs.standard.DefaultSkipListReader; import org.apache.lucene.index.codecs.standard.DefaultSkipListReader;
@ -51,7 +52,7 @@ public class SegmentTermDocs {
private boolean haveSkipped; private boolean haveSkipped;
protected boolean currentFieldStoresPayloads; protected boolean currentFieldStoresPayloads;
protected boolean currentFieldOmitTermFreqAndPositions; protected IndexOptions indexOptions;
public SegmentTermDocs(IndexInput freqStream, TermInfosReader tis, FieldInfos fieldInfos) { public SegmentTermDocs(IndexInput freqStream, TermInfosReader tis, FieldInfos fieldInfos) {
this.freqStream = (IndexInput) freqStream.clone(); this.freqStream = (IndexInput) freqStream.clone();
@ -89,7 +90,7 @@ public class SegmentTermDocs {
void seek(TermInfo ti, Term term) throws IOException { void seek(TermInfo ti, Term term) throws IOException {
count = 0; count = 0;
FieldInfo fi = fieldInfos.fieldInfo(term.field()); FieldInfo fi = fieldInfos.fieldInfo(term.field());
currentFieldOmitTermFreqAndPositions = (fi != null) ? fi.omitTermFreqAndPositions : false; this.indexOptions = (fi != null) ? fi.indexOptions : IndexOptions.DOCS_AND_FREQS_AND_POSITIONS;
currentFieldStoresPayloads = (fi != null) ? fi.storePayloads : false; currentFieldStoresPayloads = (fi != null) ? fi.storePayloads : false;
if (ti == null) { if (ti == null) {
df = 0; df = 0;
@ -122,7 +123,7 @@ public class SegmentTermDocs {
return false; return false;
final int docCode = freqStream.readVInt(); final int docCode = freqStream.readVInt();
if (currentFieldOmitTermFreqAndPositions) { if (indexOptions == IndexOptions.DOCS_ONLY) {
doc += docCode; doc += docCode;
freq = 1; freq = 1;
} else { } else {
@ -149,7 +150,7 @@ public class SegmentTermDocs {
public int read(final int[] docs, final int[] freqs) public int read(final int[] docs, final int[] freqs)
throws IOException { throws IOException {
final int length = docs.length; final int length = docs.length;
if (currentFieldOmitTermFreqAndPositions) { if (indexOptions == IndexOptions.DOCS_ONLY) {
return readNoTf(docs, freqs, length); return readNoTf(docs, freqs, length);
} else { } else {
int i = 0; int i = 0;

View File

@ -21,6 +21,7 @@ import java.io.IOException;
import org.apache.lucene.index.FieldInfos; import org.apache.lucene.index.FieldInfos;
import org.apache.lucene.index.Term; import org.apache.lucene.index.Term;
import org.apache.lucene.index.FieldInfo.IndexOptions;
import org.apache.lucene.store.IndexInput; import org.apache.lucene.store.IndexInput;
/** /**
@ -77,8 +78,8 @@ extends SegmentTermDocs {
} }
public final int nextPosition() throws IOException { public final int nextPosition() throws IOException {
if (currentFieldOmitTermFreqAndPositions) if (indexOptions != IndexOptions.DOCS_AND_FREQS_AND_POSITIONS)
// This field does not store term freq, positions, payloads // This field does not store positions, payloads
return 0; return 0;
// perform lazy skips if necessary // perform lazy skips if necessary
lazySkip(); lazySkip();
@ -140,7 +141,7 @@ extends SegmentTermDocs {
} }
private void skipPositions(int n) throws IOException { private void skipPositions(int n) throws IOException {
assert !currentFieldOmitTermFreqAndPositions; assert indexOptions == IndexOptions.DOCS_AND_FREQS_AND_POSITIONS;
for (int f = n; f > 0; f--) { // skip unread positions for (int f = n; f > 0; f--) { // skip unread positions
readDeltaPosition(); readDeltaPosition();
skipPayload(); skipPayload();

View File

@ -22,6 +22,7 @@ import java.io.IOException;
import org.apache.lucene.index.DocsAndPositionsEnum; import org.apache.lucene.index.DocsAndPositionsEnum;
import org.apache.lucene.index.DocsEnum; import org.apache.lucene.index.DocsEnum;
import org.apache.lucene.index.FieldInfo; import org.apache.lucene.index.FieldInfo;
import org.apache.lucene.index.FieldInfo.IndexOptions;
import org.apache.lucene.index.TermState; import org.apache.lucene.index.TermState;
import org.apache.lucene.index.codecs.PostingsReaderBase; import org.apache.lucene.index.codecs.PostingsReaderBase;
import org.apache.lucene.index.codecs.BlockTermState; import org.apache.lucene.index.codecs.BlockTermState;
@ -134,8 +135,8 @@ public class PulsingPostingsReaderImpl extends PostingsReaderBase {
//System.out.println("PR nextTerm"); //System.out.println("PR nextTerm");
PulsingTermState termState = (PulsingTermState) _termState; PulsingTermState termState = (PulsingTermState) _termState;
// total TF, but in the omitTFAP case its computed based on docFreq. // if we have positions, its total TF, otherwise its computed based on docFreq.
long count = fieldInfo.omitTermFreqAndPositions ? termState.docFreq : termState.totalTermFreq; long count = fieldInfo.indexOptions == IndexOptions.DOCS_AND_FREQS_AND_POSITIONS ? termState.totalTermFreq : termState.docFreq;
//System.out.println(" count=" + count + " threshold=" + maxPositions); //System.out.println(" count=" + count + " threshold=" + maxPositions);
if (count <= maxPositions) { if (count <= maxPositions) {
@ -193,7 +194,7 @@ public class PulsingPostingsReaderImpl extends PostingsReaderBase {
// TODO: -- not great that we can't always reuse // TODO: -- not great that we can't always reuse
@Override @Override
public DocsAndPositionsEnum docsAndPositions(FieldInfo field, BlockTermState _termState, Bits liveDocs, DocsAndPositionsEnum reuse) throws IOException { public DocsAndPositionsEnum docsAndPositions(FieldInfo field, BlockTermState _termState, Bits liveDocs, DocsAndPositionsEnum reuse) throws IOException {
if (field.omitTermFreqAndPositions) { if (field.indexOptions != IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) {
return null; return null;
} }
//System.out.println("D&P: field=" + field.name); //System.out.println("D&P: field=" + field.name);
@ -223,7 +224,7 @@ public class PulsingPostingsReaderImpl extends PostingsReaderBase {
private static class PulsingDocsEnum extends DocsEnum { private static class PulsingDocsEnum extends DocsEnum {
private final ByteArrayDataInput postings = new ByteArrayDataInput(); private final ByteArrayDataInput postings = new ByteArrayDataInput();
private final boolean omitTF; private final IndexOptions indexOptions;
private final boolean storePayloads; private final boolean storePayloads;
private Bits liveDocs; private Bits liveDocs;
private int docID; private int docID;
@ -231,7 +232,7 @@ public class PulsingPostingsReaderImpl extends PostingsReaderBase {
private int payloadLength; private int payloadLength;
public PulsingDocsEnum(FieldInfo fieldInfo) { public PulsingDocsEnum(FieldInfo fieldInfo) {
omitTF = fieldInfo.omitTermFreqAndPositions; indexOptions = fieldInfo.indexOptions;
storePayloads = fieldInfo.storePayloads; storePayloads = fieldInfo.storePayloads;
} }
@ -249,7 +250,7 @@ public class PulsingPostingsReaderImpl extends PostingsReaderBase {
} }
boolean canReuse(FieldInfo fieldInfo) { boolean canReuse(FieldInfo fieldInfo) {
return omitTF == fieldInfo.omitTermFreqAndPositions && storePayloads == fieldInfo.storePayloads; return indexOptions == fieldInfo.indexOptions && storePayloads == fieldInfo.storePayloads;
} }
@Override @Override
@ -262,7 +263,7 @@ public class PulsingPostingsReaderImpl extends PostingsReaderBase {
} }
final int code = postings.readVInt(); final int code = postings.readVInt();
if (omitTF) { if (indexOptions == IndexOptions.DOCS_ONLY) {
docID += code; docID += code;
} else { } else {
docID += code >>> 1; // shift off low bit docID += code >>> 1; // shift off low bit
@ -272,22 +273,24 @@ public class PulsingPostingsReaderImpl extends PostingsReaderBase {
freq = postings.readVInt(); // else read freq freq = postings.readVInt(); // else read freq
} }
// Skip positions if (indexOptions == IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) {
if (storePayloads) { // Skip positions
for(int pos=0;pos<freq;pos++) { if (storePayloads) {
final int posCode = postings.readVInt(); for(int pos=0;pos<freq;pos++) {
if ((posCode & 1) != 0) { final int posCode = postings.readVInt();
payloadLength = postings.readVInt(); if ((posCode & 1) != 0) {
payloadLength = postings.readVInt();
}
if (payloadLength != 0) {
postings.skipBytes(payloadLength);
}
} }
if (payloadLength != 0) { } else {
postings.skipBytes(payloadLength); for(int pos=0;pos<freq;pos++) {
// TODO: skipVInt
postings.readVInt();
} }
} }
} else {
for(int pos=0;pos<freq;pos++) {
// TODO: skipVInt
postings.readVInt();
}
} }
} }

View File

@ -20,6 +20,7 @@ package org.apache.lucene.index.codecs.pulsing;
import java.io.IOException; import java.io.IOException;
import org.apache.lucene.index.FieldInfo; import org.apache.lucene.index.FieldInfo;
import org.apache.lucene.index.FieldInfo.IndexOptions;
import org.apache.lucene.index.codecs.PostingsWriterBase; import org.apache.lucene.index.codecs.PostingsWriterBase;
import org.apache.lucene.index.codecs.TermStats; import org.apache.lucene.index.codecs.TermStats;
import org.apache.lucene.store.IndexOutput; import org.apache.lucene.store.IndexOutput;
@ -46,7 +47,7 @@ public final class PulsingPostingsWriterImpl extends PostingsWriterBase {
private IndexOutput termsOut; private IndexOutput termsOut;
private boolean omitTF; private IndexOptions indexOptions;
private boolean storePayloads; private boolean storePayloads;
// one entry per position // one entry per position
@ -102,7 +103,7 @@ public final class PulsingPostingsWriterImpl extends PostingsWriterBase {
// our parent calls setField whenever the field changes // our parent calls setField whenever the field changes
@Override @Override
public void setField(FieldInfo fieldInfo) { public void setField(FieldInfo fieldInfo) {
omitTF = fieldInfo.omitTermFreqAndPositions; this.indexOptions = fieldInfo.indexOptions;
//System.out.println("PW field=" + fieldInfo.name + " omitTF=" + omitTF); //System.out.println("PW field=" + fieldInfo.name + " omitTF=" + omitTF);
storePayloads = fieldInfo.storePayloads; storePayloads = fieldInfo.storePayloads;
wrappedPostingsWriter.setField(fieldInfo); wrappedPostingsWriter.setField(fieldInfo);
@ -123,8 +124,11 @@ public final class PulsingPostingsWriterImpl extends PostingsWriterBase {
assert pendingCount < pending.length; assert pendingCount < pending.length;
currentDoc = pending[pendingCount]; currentDoc = pending[pendingCount];
currentDoc.docID = docID; currentDoc.docID = docID;
if (omitTF) { if (indexOptions == IndexOptions.DOCS_ONLY) {
pendingCount++; pendingCount++;
} else if (indexOptions == IndexOptions.DOCS_AND_FREQS) {
pendingCount++;
currentDoc.termFreq = termDocFreq;
} else { } else {
currentDoc.termFreq = termDocFreq; currentDoc.termFreq = termDocFreq;
} }
@ -196,7 +200,7 @@ public final class PulsingPostingsWriterImpl extends PostingsWriterBase {
// given codec wants to store other interesting // given codec wants to store other interesting
// stuff, it could use this pulsing codec to do so // stuff, it could use this pulsing codec to do so
if (!omitTF) { if (indexOptions == IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) {
int lastDocID = 0; int lastDocID = 0;
int pendingIDX = 0; int pendingIDX = 0;
int lastPayloadLength = -1; int lastPayloadLength = -1;
@ -239,7 +243,20 @@ public final class PulsingPostingsWriterImpl extends PostingsWriterBase {
} }
} }
} }
} else { } else if (indexOptions == IndexOptions.DOCS_AND_FREQS) {
int lastDocID = 0;
for(int posIDX=0;posIDX<pendingCount;posIDX++) {
final Position doc = pending[posIDX];
final int delta = doc.docID - lastDocID;
if (doc.termFreq == 1) {
buffer.writeVInt((delta<<1)|1);
} else {
buffer.writeVInt(delta<<1);
buffer.writeVInt(doc.termFreq);
}
lastDocID = doc.docID;
}
} else if (indexOptions == IndexOptions.DOCS_ONLY) {
int lastDocID = 0; int lastDocID = 0;
for(int posIDX=0;posIDX<pendingCount;posIDX++) { for(int posIDX=0;posIDX<pendingCount;posIDX++) {
final Position doc = pending[posIDX]; final Position doc = pending[posIDX];
@ -282,7 +299,7 @@ public final class PulsingPostingsWriterImpl extends PostingsWriterBase {
wrappedPostingsWriter.startTerm(); wrappedPostingsWriter.startTerm();
// Flush all buffered docs // Flush all buffered docs
if (!omitTF) { if (indexOptions == IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) {
Position doc = null; Position doc = null;
for(Position pos : pending) { for(Position pos : pending) {
if (doc == null) { if (doc == null) {
@ -303,7 +320,7 @@ public final class PulsingPostingsWriterImpl extends PostingsWriterBase {
//wrappedPostingsWriter.finishDoc(); //wrappedPostingsWriter.finishDoc();
} else { } else {
for(Position doc : pending) { for(Position doc : pending) {
wrappedPostingsWriter.startDoc(doc.docID, 0); wrappedPostingsWriter.startDoc(doc.docID, indexOptions == IndexOptions.DOCS_ONLY ? 0 : doc.termFreq);
} }
} }
pendingCount = -1; pendingCount = -1;

View File

@ -23,6 +23,7 @@ import java.util.Collection;
import org.apache.lucene.index.DocsAndPositionsEnum; import org.apache.lucene.index.DocsAndPositionsEnum;
import org.apache.lucene.index.DocsEnum; import org.apache.lucene.index.DocsEnum;
import org.apache.lucene.index.FieldInfo; import org.apache.lucene.index.FieldInfo;
import org.apache.lucene.index.FieldInfo.IndexOptions;
import org.apache.lucene.index.IndexFileNames; import org.apache.lucene.index.IndexFileNames;
import org.apache.lucene.index.SegmentInfo; import org.apache.lucene.index.SegmentInfo;
import org.apache.lucene.index.TermState; import org.apache.lucene.index.TermState;
@ -68,14 +69,17 @@ public class SepPostingsReaderImpl extends PostingsReaderBase {
skipIn = dir.openInput(IndexFileNames.segmentFileName(segmentInfo.name, codecId, SepPostingsWriterImpl.SKIP_EXTENSION), context); skipIn = dir.openInput(IndexFileNames.segmentFileName(segmentInfo.name, codecId, SepPostingsWriterImpl.SKIP_EXTENSION), context);
if (segmentInfo.getFieldInfos().hasFreq()) {
freqIn = intFactory.openInput(dir, IndexFileNames.segmentFileName(segmentInfo.name, codecId, SepPostingsWriterImpl.FREQ_EXTENSION), context);
} else {
freqIn = null;
}
if (segmentInfo.getHasProx()) { if (segmentInfo.getHasProx()) {
freqIn = intFactory.openInput(dir, IndexFileNames.segmentFileName(segmentInfo.name, codecId, SepPostingsWriterImpl.FREQ_EXTENSION), context);
posIn = intFactory.openInput(dir, IndexFileNames.segmentFileName(segmentInfo.name, codecId, SepPostingsWriterImpl.POS_EXTENSION), context); posIn = intFactory.openInput(dir, IndexFileNames.segmentFileName(segmentInfo.name, codecId, SepPostingsWriterImpl.POS_EXTENSION), context);
payloadIn = dir.openInput(IndexFileNames.segmentFileName(segmentInfo.name, codecId, SepPostingsWriterImpl.PAYLOAD_EXTENSION), context); payloadIn = dir.openInput(IndexFileNames.segmentFileName(segmentInfo.name, codecId, SepPostingsWriterImpl.PAYLOAD_EXTENSION), context);
} else { } else {
posIn = null; posIn = null;
payloadIn = null; payloadIn = null;
freqIn = null;
} }
success = true; success = true;
} finally { } finally {
@ -89,8 +93,11 @@ public class SepPostingsReaderImpl extends PostingsReaderBase {
files.add(IndexFileNames.segmentFileName(segmentInfo.name, codecId, SepPostingsWriterImpl.DOC_EXTENSION)); files.add(IndexFileNames.segmentFileName(segmentInfo.name, codecId, SepPostingsWriterImpl.DOC_EXTENSION));
files.add(IndexFileNames.segmentFileName(segmentInfo.name, codecId, SepPostingsWriterImpl.SKIP_EXTENSION)); files.add(IndexFileNames.segmentFileName(segmentInfo.name, codecId, SepPostingsWriterImpl.SKIP_EXTENSION));
if (segmentInfo.getHasProx()) { if (segmentInfo.getFieldInfos().hasFreq()) {
files.add(IndexFileNames.segmentFileName(segmentInfo.name, codecId, SepPostingsWriterImpl.FREQ_EXTENSION)); files.add(IndexFileNames.segmentFileName(segmentInfo.name, codecId, SepPostingsWriterImpl.FREQ_EXTENSION));
}
if (segmentInfo.getHasProx()) {
files.add(IndexFileNames.segmentFileName(segmentInfo.name, codecId, SepPostingsWriterImpl.POS_EXTENSION)); files.add(IndexFileNames.segmentFileName(segmentInfo.name, codecId, SepPostingsWriterImpl.POS_EXTENSION));
files.add(IndexFileNames.segmentFileName(segmentInfo.name, codecId, SepPostingsWriterImpl.PAYLOAD_EXTENSION)); files.add(IndexFileNames.segmentFileName(segmentInfo.name, codecId, SepPostingsWriterImpl.PAYLOAD_EXTENSION));
} }
@ -229,8 +236,11 @@ public class SepPostingsReaderImpl extends PostingsReaderBase {
final boolean isFirstTerm = termState.termCount == 0; final boolean isFirstTerm = termState.termCount == 0;
termState.docIndex.read(termState.bytesReader, isFirstTerm); termState.docIndex.read(termState.bytesReader, isFirstTerm);
//System.out.println(" docIndex=" + termState.docIndex); //System.out.println(" docIndex=" + termState.docIndex);
if (!fieldInfo.omitTermFreqAndPositions) { if (fieldInfo.indexOptions != IndexOptions.DOCS_ONLY) {
termState.freqIndex.read(termState.bytesReader, isFirstTerm); termState.freqIndex.read(termState.bytesReader, isFirstTerm);
}
if (fieldInfo.indexOptions == IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) {
//System.out.println(" freqIndex=" + termState.freqIndex); //System.out.println(" freqIndex=" + termState.freqIndex);
termState.posIndex.read(termState.bytesReader, isFirstTerm); termState.posIndex.read(termState.bytesReader, isFirstTerm);
//System.out.println(" posIndex=" + termState.posIndex); //System.out.println(" posIndex=" + termState.posIndex);
@ -277,7 +287,7 @@ public class SepPostingsReaderImpl extends PostingsReaderBase {
@Override @Override
public DocsAndPositionsEnum docsAndPositions(FieldInfo fieldInfo, BlockTermState _termState, Bits liveDocs, DocsAndPositionsEnum reuse) throws IOException { public DocsAndPositionsEnum docsAndPositions(FieldInfo fieldInfo, BlockTermState _termState, Bits liveDocs, DocsAndPositionsEnum reuse) throws IOException {
assert !fieldInfo.omitTermFreqAndPositions; assert fieldInfo.indexOptions == IndexOptions.DOCS_AND_FREQS_AND_POSITIONS;
final SepTermState termState = (SepTermState) _termState; final SepTermState termState = (SepTermState) _termState;
SepDocsAndPositionsEnum postingsEnum; SepDocsAndPositionsEnum postingsEnum;
if (reuse == null || !(reuse instanceof SepDocsAndPositionsEnum)) { if (reuse == null || !(reuse instanceof SepDocsAndPositionsEnum)) {
@ -304,6 +314,7 @@ public class SepPostingsReaderImpl extends PostingsReaderBase {
// TODO: -- should we do omitTF with 2 different enum classes? // TODO: -- should we do omitTF with 2 different enum classes?
private boolean omitTF; private boolean omitTF;
private IndexOptions indexOptions;
private boolean storePayloads; private boolean storePayloads;
private Bits liveDocs; private Bits liveDocs;
private final IntIndexInput.Reader docReader; private final IntIndexInput.Reader docReader;
@ -340,7 +351,8 @@ public class SepPostingsReaderImpl extends PostingsReaderBase {
SepDocsEnum init(FieldInfo fieldInfo, SepTermState termState, Bits liveDocs) throws IOException { SepDocsEnum init(FieldInfo fieldInfo, SepTermState termState, Bits liveDocs) throws IOException {
this.liveDocs = liveDocs; this.liveDocs = liveDocs;
omitTF = fieldInfo.omitTermFreqAndPositions; this.indexOptions = fieldInfo.indexOptions;
omitTF = indexOptions == IndexOptions.DOCS_ONLY;
storePayloads = fieldInfo.storePayloads; storePayloads = fieldInfo.storePayloads;
// TODO: can't we only do this if consumer // TODO: can't we only do this if consumer
@ -456,7 +468,7 @@ public class SepPostingsReaderImpl extends PostingsReaderBase {
0, 0,
docFreq, docFreq,
storePayloads); storePayloads);
skipper.setOmitTF(omitTF); skipper.setIndexOptions(indexOptions);
skipped = true; skipped = true;
} }
@ -633,7 +645,7 @@ public class SepPostingsReaderImpl extends PostingsReaderBase {
payloadFP, payloadFP,
docFreq, docFreq,
storePayloads); storePayloads);
skipper.setIndexOptions(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS);
skipped = true; skipped = true;
} }
final int newCount = skipper.skipTo(target); final int newCount = skipper.skipTo(target);

View File

@ -23,6 +23,7 @@ import java.util.Set;
import org.apache.lucene.index.CorruptIndexException; import org.apache.lucene.index.CorruptIndexException;
import org.apache.lucene.index.DocsEnum; import org.apache.lucene.index.DocsEnum;
import org.apache.lucene.index.FieldInfo; import org.apache.lucene.index.FieldInfo;
import org.apache.lucene.index.FieldInfo.IndexOptions;
import org.apache.lucene.index.IndexFileNames; import org.apache.lucene.index.IndexFileNames;
import org.apache.lucene.index.SegmentWriteState; import org.apache.lucene.index.SegmentWriteState;
import org.apache.lucene.index.codecs.PostingsWriterBase; import org.apache.lucene.index.codecs.PostingsWriterBase;
@ -86,7 +87,7 @@ public final class SepPostingsWriterImpl extends PostingsWriterBase {
final int totalNumDocs; final int totalNumDocs;
boolean storePayloads; boolean storePayloads;
boolean omitTF; IndexOptions indexOptions;
long lastSkipFP; long lastSkipFP;
@ -121,11 +122,13 @@ public final class SepPostingsWriterImpl extends PostingsWriterBase {
docOut = factory.createOutput(state.directory, docFileName, state.context); docOut = factory.createOutput(state.directory, docFileName, state.context);
docIndex = docOut.index(); docIndex = docOut.index();
if (state.fieldInfos.hasProx()) { if (state.fieldInfos.hasFreq()) {
final String frqFileName = IndexFileNames.segmentFileName(state.segmentName, state.codecId, FREQ_EXTENSION); final String frqFileName = IndexFileNames.segmentFileName(state.segmentName, state.codecId, FREQ_EXTENSION);
freqOut = factory.createOutput(state.directory, frqFileName, state.context); freqOut = factory.createOutput(state.directory, frqFileName, state.context);
freqIndex = freqOut.index(); freqIndex = freqOut.index();
}
if (state.fieldInfos.hasProx()) {
final String posFileName = IndexFileNames.segmentFileName(state.segmentName, state.codecId, POS_EXTENSION); final String posFileName = IndexFileNames.segmentFileName(state.segmentName, state.codecId, POS_EXTENSION);
posOut = factory.createOutput(state.directory, posFileName, state.context); posOut = factory.createOutput(state.directory, posFileName, state.context);
posIndex = posOut.index(); posIndex = posOut.index();
@ -168,12 +171,17 @@ public final class SepPostingsWriterImpl extends PostingsWriterBase {
@Override @Override
public void startTerm() throws IOException { public void startTerm() throws IOException {
docIndex.mark(); docIndex.mark();
if (!omitTF) {
if (indexOptions != IndexOptions.DOCS_ONLY) {
freqIndex.mark(); freqIndex.mark();
}
if (indexOptions == IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) {
posIndex.mark(); posIndex.mark();
payloadStart = payloadOut.getFilePointer(); payloadStart = payloadOut.getFilePointer();
lastPayloadLength = -1; lastPayloadLength = -1;
} }
skipListWriter.resetSkip(docIndex, freqIndex, posIndex); skipListWriter.resetSkip(docIndex, freqIndex, posIndex);
} }
@ -182,9 +190,9 @@ public final class SepPostingsWriterImpl extends PostingsWriterBase {
@Override @Override
public void setField(FieldInfo fieldInfo) { public void setField(FieldInfo fieldInfo) {
this.fieldInfo = fieldInfo; this.fieldInfo = fieldInfo;
omitTF = fieldInfo.omitTermFreqAndPositions; this.indexOptions = fieldInfo.indexOptions;
skipListWriter.setOmitTF(omitTF); skipListWriter.setIndexOptions(indexOptions);
storePayloads = !omitTF && fieldInfo.storePayloads; storePayloads = indexOptions == IndexOptions.DOCS_AND_FREQS_AND_POSITIONS && fieldInfo.storePayloads;
} }
/** Adds a new doc in this term. If this returns null /** Adds a new doc in this term. If this returns null
@ -209,7 +217,7 @@ public final class SepPostingsWriterImpl extends PostingsWriterBase {
lastDocID = docID; lastDocID = docID;
docOut.write(delta); docOut.write(delta);
if (!omitTF) { if (indexOptions != IndexOptions.DOCS_ONLY) {
//System.out.println(" sepw startDoc: write freq=" + termDocFreq); //System.out.println(" sepw startDoc: write freq=" + termDocFreq);
freqOut.write(termDocFreq); freqOut.write(termDocFreq);
} }
@ -227,7 +235,7 @@ public final class SepPostingsWriterImpl extends PostingsWriterBase {
/** Add a new position & payload */ /** Add a new position & payload */
@Override @Override
public void addPosition(int position, BytesRef payload) throws IOException { public void addPosition(int position, BytesRef payload) throws IOException {
assert !omitTF; assert indexOptions == IndexOptions.DOCS_AND_FREQS_AND_POSITIONS;
final int delta = position - lastPosition; final int delta = position - lastPosition;
assert delta >= 0: "position=" + position + " lastPosition=" + lastPosition; // not quite right (if pos=0 is repeated twice we don't catch it) assert delta >= 0: "position=" + position + " lastPosition=" + lastPosition; // not quite right (if pos=0 is repeated twice we don't catch it)
@ -274,10 +282,12 @@ public final class SepPostingsWriterImpl extends PostingsWriterBase {
docIndex.write(indexBytesWriter, isFirstTerm); docIndex.write(indexBytesWriter, isFirstTerm);
//System.out.println(" docIndex=" + docIndex); //System.out.println(" docIndex=" + docIndex);
if (!omitTF) { if (indexOptions != IndexOptions.DOCS_ONLY) {
freqIndex.write(indexBytesWriter, isFirstTerm); freqIndex.write(indexBytesWriter, isFirstTerm);
//System.out.println(" freqIndex=" + freqIndex); //System.out.println(" freqIndex=" + freqIndex);
}
if (indexOptions == IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) {
posIndex.write(indexBytesWriter, isFirstTerm); posIndex.write(indexBytesWriter, isFirstTerm);
//System.out.println(" posIndex=" + posIndex); //System.out.println(" posIndex=" + posIndex);
if (storePayloads) { if (storePayloads) {

View File

@ -21,6 +21,7 @@ import java.io.IOException;
import java.util.Arrays; import java.util.Arrays;
import org.apache.lucene.store.IndexInput; import org.apache.lucene.store.IndexInput;
import org.apache.lucene.index.FieldInfo.IndexOptions;
import org.apache.lucene.index.codecs.MultiLevelSkipListReader; import org.apache.lucene.index.codecs.MultiLevelSkipListReader;
/** /**
@ -87,10 +88,10 @@ class SepSkipListReader extends MultiLevelSkipListReader {
} }
} }
boolean omitTF; IndexOptions indexOptions;
void setOmitTF(boolean v) { void setIndexOptions(IndexOptions v) {
omitTF = v; indexOptions = v;
} }
void init(long skipPointer, void init(long skipPointer,
@ -177,7 +178,7 @@ class SepSkipListReader extends MultiLevelSkipListReader {
@Override @Override
protected int readSkipData(int level, IndexInput skipStream) throws IOException { protected int readSkipData(int level, IndexInput skipStream) throws IOException {
int delta; int delta;
assert !omitTF || !currentFieldStoresPayloads; assert indexOptions == IndexOptions.DOCS_AND_FREQS_AND_POSITIONS || !currentFieldStoresPayloads;
if (currentFieldStoresPayloads) { if (currentFieldStoresPayloads) {
// the current field stores payloads. // the current field stores payloads.
// if the doc delta is odd then we have // if the doc delta is odd then we have
@ -192,11 +193,11 @@ class SepSkipListReader extends MultiLevelSkipListReader {
} else { } else {
delta = skipStream.readVInt(); delta = skipStream.readVInt();
} }
if (!omitTF) { if (indexOptions != IndexOptions.DOCS_ONLY) {
freqIndex[level].read(skipStream, false); freqIndex[level].read(skipStream, false);
} }
docIndex[level].read(skipStream, false); docIndex[level].read(skipStream, false);
if (!omitTF) { if (indexOptions == IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) {
posIndex[level].read(skipStream, false); posIndex[level].read(skipStream, false);
if (currentFieldStoresPayloads) { if (currentFieldStoresPayloads) {
payloadPointer[level] += skipStream.readVInt(); payloadPointer[level] += skipStream.readVInt();

View File

@ -21,6 +21,7 @@ import java.io.IOException;
import java.util.Arrays; import java.util.Arrays;
import org.apache.lucene.store.IndexOutput; import org.apache.lucene.store.IndexOutput;
import org.apache.lucene.index.FieldInfo.IndexOptions;
import org.apache.lucene.index.codecs.MultiLevelSkipListWriter; import org.apache.lucene.index.codecs.MultiLevelSkipListWriter;
// TODO: -- skip data should somehow be more local to the // TODO: -- skip data should somehow be more local to the
@ -84,10 +85,10 @@ class SepSkipListWriter extends MultiLevelSkipListWriter {
} }
} }
boolean omitTF; IndexOptions indexOptions;
void setOmitTF(boolean v) { void setIndexOptions(IndexOptions v) {
omitTF = v; indexOptions = v;
} }
void setPosOutput(IntIndexOutput posOutput) throws IOException { void setPosOutput(IntIndexOutput posOutput) throws IOException {
@ -159,7 +160,7 @@ class SepSkipListWriter extends MultiLevelSkipListWriter {
// current payload length equals the length at the previous // current payload length equals the length at the previous
// skip point // skip point
assert !omitTF || !curStorePayloads; assert indexOptions == IndexOptions.DOCS_AND_FREQS_AND_POSITIONS || !curStorePayloads;
if (curStorePayloads) { if (curStorePayloads) {
int delta = curDoc - lastSkipDoc[level]; int delta = curDoc - lastSkipDoc[level];
@ -179,13 +180,13 @@ class SepSkipListWriter extends MultiLevelSkipListWriter {
skipBuffer.writeVInt(curDoc - lastSkipDoc[level]); skipBuffer.writeVInt(curDoc - lastSkipDoc[level]);
} }
if (!omitTF) { if (indexOptions != IndexOptions.DOCS_ONLY) {
freqIndex[level].mark(); freqIndex[level].mark();
freqIndex[level].write(skipBuffer, false); freqIndex[level].write(skipBuffer, false);
} }
docIndex[level].mark(); docIndex[level].mark();
docIndex[level].write(skipBuffer, false); docIndex[level].write(skipBuffer, false);
if (!omitTF) { if (indexOptions == IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) {
posIndex[level].mark(); posIndex[level].mark();
posIndex[level].write(skipBuffer, false); posIndex[level].write(skipBuffer, false);
if (curStorePayloads) { if (curStorePayloads) {

View File

@ -19,6 +19,7 @@ package org.apache.lucene.index.codecs.simpletext;
import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.BytesRef;
import org.apache.lucene.index.codecs.FieldsProducer; import org.apache.lucene.index.codecs.FieldsProducer;
import org.apache.lucene.index.FieldInfo.IndexOptions;
import org.apache.lucene.index.SegmentReadState; import org.apache.lucene.index.SegmentReadState;
import org.apache.lucene.index.FieldsEnum; import org.apache.lucene.index.FieldsEnum;
import org.apache.lucene.index.Terms; import org.apache.lucene.index.Terms;
@ -53,6 +54,7 @@ class SimpleTextFieldsReader extends FieldsProducer {
final static BytesRef FIELD = SimpleTextFieldsWriter.FIELD; final static BytesRef FIELD = SimpleTextFieldsWriter.FIELD;
final static BytesRef TERM = SimpleTextFieldsWriter.TERM; final static BytesRef TERM = SimpleTextFieldsWriter.TERM;
final static BytesRef DOC = SimpleTextFieldsWriter.DOC; final static BytesRef DOC = SimpleTextFieldsWriter.DOC;
final static BytesRef FREQ = SimpleTextFieldsWriter.FREQ;
final static BytesRef POS = SimpleTextFieldsWriter.POS; final static BytesRef POS = SimpleTextFieldsWriter.POS;
final static BytesRef PAYLOAD = SimpleTextFieldsWriter.PAYLOAD; final static BytesRef PAYLOAD = SimpleTextFieldsWriter.PAYLOAD;
@ -114,16 +116,16 @@ class SimpleTextFieldsReader extends FieldsProducer {
private class SimpleTextTermsEnum extends TermsEnum { private class SimpleTextTermsEnum extends TermsEnum {
private final IndexInput in; private final IndexInput in;
private final boolean omitTF; private final IndexOptions indexOptions;
private int docFreq; private int docFreq;
private long totalTermFreq; private long totalTermFreq;
private long docsStart; private long docsStart;
private boolean ended; private boolean ended;
private final BytesRefFSTEnum<PairOutputs.Pair<Long,PairOutputs.Pair<Long,Long>>> fstEnum; private final BytesRefFSTEnum<PairOutputs.Pair<Long,PairOutputs.Pair<Long,Long>>> fstEnum;
public SimpleTextTermsEnum(FST<PairOutputs.Pair<Long,PairOutputs.Pair<Long,Long>>> fst, boolean omitTF) throws IOException { public SimpleTextTermsEnum(FST<PairOutputs.Pair<Long,PairOutputs.Pair<Long,Long>>> fst, IndexOptions indexOptions) throws IOException {
this.in = (IndexInput) SimpleTextFieldsReader.this.in.clone(); this.in = (IndexInput) SimpleTextFieldsReader.this.in.clone();
this.omitTF = omitTF; this.indexOptions = indexOptions;
fstEnum = new BytesRefFSTEnum<PairOutputs.Pair<Long,PairOutputs.Pair<Long,Long>>>(fst); fstEnum = new BytesRefFSTEnum<PairOutputs.Pair<Long,PairOutputs.Pair<Long,Long>>>(fst);
} }
@ -218,12 +220,12 @@ class SimpleTextFieldsReader extends FieldsProducer {
} else { } else {
docsEnum = new SimpleTextDocsEnum(); docsEnum = new SimpleTextDocsEnum();
} }
return docsEnum.reset(docsStart, liveDocs, omitTF); return docsEnum.reset(docsStart, liveDocs, indexOptions == IndexOptions.DOCS_ONLY);
} }
@Override @Override
public DocsAndPositionsEnum docsAndPositions(Bits liveDocs, DocsAndPositionsEnum reuse) throws IOException { public DocsAndPositionsEnum docsAndPositions(Bits liveDocs, DocsAndPositionsEnum reuse) throws IOException {
if (omitTF) { if (indexOptions != IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) {
return null; return null;
} }
@ -303,8 +305,11 @@ class SimpleTextFieldsReader extends FieldsProducer {
docID = ArrayUtil.parseInt(scratchUTF16.chars, 0, scratchUTF16.length); docID = ArrayUtil.parseInt(scratchUTF16.chars, 0, scratchUTF16.length);
termFreq = 0; termFreq = 0;
first = false; first = false;
} else if (scratch.startsWith(FREQ)) {
UnicodeUtil.UTF8toUTF16(scratch.bytes, scratch.offset+FREQ.length, scratch.length-FREQ.length, scratchUTF16);
termFreq = ArrayUtil.parseInt(scratchUTF16.chars, 0, scratchUTF16.length);
} else if (scratch.startsWith(POS)) { } else if (scratch.startsWith(POS)) {
termFreq++; // skip termFreq++;
} else if (scratch.startsWith(PAYLOAD)) { } else if (scratch.startsWith(PAYLOAD)) {
// skip // skip
} else { } else {
@ -384,10 +389,13 @@ class SimpleTextFieldsReader extends FieldsProducer {
UnicodeUtil.UTF8toUTF16(scratch.bytes, scratch.offset+DOC.length, scratch.length-DOC.length, scratchUTF16); UnicodeUtil.UTF8toUTF16(scratch.bytes, scratch.offset+DOC.length, scratch.length-DOC.length, scratchUTF16);
docID = ArrayUtil.parseInt(scratchUTF16.chars, 0, scratchUTF16.length); docID = ArrayUtil.parseInt(scratchUTF16.chars, 0, scratchUTF16.length);
tf = 0; tf = 0;
posStart = in.getFilePointer();
first = false; first = false;
} else if (scratch.startsWith(FREQ)) {
UnicodeUtil.UTF8toUTF16(scratch.bytes, scratch.offset+FREQ.length, scratch.length-FREQ.length, scratchUTF16);
tf = ArrayUtil.parseInt(scratchUTF16.chars, 0, scratchUTF16.length);
posStart = in.getFilePointer();
} else if (scratch.startsWith(POS)) { } else if (scratch.startsWith(POS)) {
tf++; // skip
} else if (scratch.startsWith(PAYLOAD)) { } else if (scratch.startsWith(PAYLOAD)) {
// skip // skip
} else { } else {
@ -461,7 +469,7 @@ class SimpleTextFieldsReader extends FieldsProducer {
private class SimpleTextTerms extends Terms { private class SimpleTextTerms extends Terms {
private final long termsStart; private final long termsStart;
private final boolean omitTF; private final IndexOptions indexOptions;
private long sumTotalTermFreq; private long sumTotalTermFreq;
private long sumDocFreq; private long sumDocFreq;
private FST<PairOutputs.Pair<Long,PairOutputs.Pair<Long,Long>>> fst; private FST<PairOutputs.Pair<Long,PairOutputs.Pair<Long,Long>>> fst;
@ -470,7 +478,7 @@ class SimpleTextFieldsReader extends FieldsProducer {
public SimpleTextTerms(String field, long termsStart) throws IOException { public SimpleTextTerms(String field, long termsStart) throws IOException {
this.termsStart = termsStart; this.termsStart = termsStart;
omitTF = fieldInfos.fieldInfo(field).omitTermFreqAndPositions; indexOptions = fieldInfos.fieldInfo(field).indexOptions;
loadTerms(); loadTerms();
} }
@ -533,7 +541,7 @@ class SimpleTextFieldsReader extends FieldsProducer {
@Override @Override
public TermsEnum iterator() throws IOException { public TermsEnum iterator() throws IOException {
if (fst != null) { if (fst != null) {
return new SimpleTextTermsEnum(fst, omitTF); return new SimpleTextTermsEnum(fst, indexOptions);
} else { } else {
return TermsEnum.EMPTY; return TermsEnum.EMPTY;
} }

View File

@ -24,6 +24,7 @@ import org.apache.lucene.index.codecs.TermsConsumer;
import org.apache.lucene.index.codecs.PostingsConsumer; import org.apache.lucene.index.codecs.PostingsConsumer;
import org.apache.lucene.index.codecs.TermStats; import org.apache.lucene.index.codecs.TermStats;
import org.apache.lucene.index.FieldInfo; import org.apache.lucene.index.FieldInfo;
import org.apache.lucene.index.FieldInfo.IndexOptions;
import org.apache.lucene.index.SegmentWriteState; import org.apache.lucene.index.SegmentWriteState;
import org.apache.lucene.store.IndexOutput; import org.apache.lucene.store.IndexOutput;
@ -41,6 +42,7 @@ class SimpleTextFieldsWriter extends FieldsConsumer {
final static BytesRef FIELD = new BytesRef("field "); final static BytesRef FIELD = new BytesRef("field ");
final static BytesRef TERM = new BytesRef(" term "); final static BytesRef TERM = new BytesRef(" term ");
final static BytesRef DOC = new BytesRef(" doc "); final static BytesRef DOC = new BytesRef(" doc ");
final static BytesRef FREQ = new BytesRef(" freq ");
final static BytesRef POS = new BytesRef(" pos "); final static BytesRef POS = new BytesRef(" pos ");
final static BytesRef PAYLOAD = new BytesRef(" payload "); final static BytesRef PAYLOAD = new BytesRef(" payload ");
@ -73,11 +75,15 @@ class SimpleTextFieldsWriter extends FieldsConsumer {
write(FIELD); write(FIELD);
write(field.name); write(field.name);
out.writeByte(NEWLINE); out.writeByte(NEWLINE);
return new SimpleTextTermsWriter(); return new SimpleTextTermsWriter(field);
} }
private class SimpleTextTermsWriter extends TermsConsumer { private class SimpleTextTermsWriter extends TermsConsumer {
private final SimpleTextPostingsWriter postingsWriter = new SimpleTextPostingsWriter(); private final SimpleTextPostingsWriter postingsWriter;
public SimpleTextTermsWriter(FieldInfo field) {
postingsWriter = new SimpleTextPostingsWriter(field);
}
@Override @Override
public PostingsConsumer startTerm(BytesRef term) throws IOException { public PostingsConsumer startTerm(BytesRef term) throws IOException {
@ -101,7 +107,12 @@ class SimpleTextFieldsWriter extends FieldsConsumer {
private class SimpleTextPostingsWriter extends PostingsConsumer { private class SimpleTextPostingsWriter extends PostingsConsumer {
private BytesRef term; private BytesRef term;
private boolean wroteTerm; private boolean wroteTerm;
private IndexOptions indexOptions;
public SimpleTextPostingsWriter(FieldInfo field) {
this.indexOptions = field.indexOptions;
}
@Override @Override
public void startDoc(int docID, int termDocFreq) throws IOException { public void startDoc(int docID, int termDocFreq) throws IOException {
if (!wroteTerm) { if (!wroteTerm) {
@ -115,7 +126,14 @@ class SimpleTextFieldsWriter extends FieldsConsumer {
write(DOC); write(DOC);
write(Integer.toString(docID)); write(Integer.toString(docID));
newline(); newline();
if (indexOptions != IndexOptions.DOCS_ONLY) {
write(FREQ);
write(Integer.toString(termDocFreq));
newline();
}
} }
public PostingsConsumer reset(BytesRef term) { public PostingsConsumer reset(BytesRef term) {
this.term = term; this.term = term;

View File

@ -23,6 +23,7 @@ import java.util.Collection;
import org.apache.lucene.index.DocsAndPositionsEnum; import org.apache.lucene.index.DocsAndPositionsEnum;
import org.apache.lucene.index.DocsEnum; import org.apache.lucene.index.DocsEnum;
import org.apache.lucene.index.FieldInfo; import org.apache.lucene.index.FieldInfo;
import org.apache.lucene.index.FieldInfo.IndexOptions;
import org.apache.lucene.index.IndexFileNames; import org.apache.lucene.index.IndexFileNames;
import org.apache.lucene.index.SegmentInfo; import org.apache.lucene.index.SegmentInfo;
import org.apache.lucene.index.TermState; import org.apache.lucene.index.TermState;
@ -190,7 +191,7 @@ public class StandardPostingsReader extends PostingsReaderBase {
// undefined // undefined
} }
if (!fieldInfo.omitTermFreqAndPositions) { if (fieldInfo.indexOptions == IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) {
if (isFirstTerm) { if (isFirstTerm) {
termState.proxOffset = termState.bytesReader.readVLong(); termState.proxOffset = termState.bytesReader.readVLong();
} else { } else {
@ -219,7 +220,7 @@ public class StandardPostingsReader extends PostingsReaderBase {
@Override @Override
public DocsAndPositionsEnum docsAndPositions(FieldInfo fieldInfo, BlockTermState termState, Bits liveDocs, DocsAndPositionsEnum reuse) throws IOException { public DocsAndPositionsEnum docsAndPositions(FieldInfo fieldInfo, BlockTermState termState, Bits liveDocs, DocsAndPositionsEnum reuse) throws IOException {
if (fieldInfo.omitTermFreqAndPositions) { if (fieldInfo.indexOptions != IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) {
return null; return null;
} }
@ -282,7 +283,7 @@ public class StandardPostingsReader extends PostingsReaderBase {
} }
public SegmentDocsEnum reset(FieldInfo fieldInfo, StandardTermState termState, Bits liveDocs) throws IOException { public SegmentDocsEnum reset(FieldInfo fieldInfo, StandardTermState termState, Bits liveDocs) throws IOException {
omitTF = fieldInfo.omitTermFreqAndPositions; omitTF = fieldInfo.indexOptions == IndexOptions.DOCS_ONLY;
if (omitTF) { if (omitTF) {
freq = 1; freq = 1;
} }
@ -455,7 +456,7 @@ public class StandardPostingsReader extends PostingsReaderBase {
} }
public SegmentDocsAndPositionsEnum reset(FieldInfo fieldInfo, StandardTermState termState, Bits liveDocs) throws IOException { public SegmentDocsAndPositionsEnum reset(FieldInfo fieldInfo, StandardTermState termState, Bits liveDocs) throws IOException {
assert !fieldInfo.omitTermFreqAndPositions; assert fieldInfo.indexOptions == IndexOptions.DOCS_AND_FREQS_AND_POSITIONS;
assert !fieldInfo.storePayloads; assert !fieldInfo.storePayloads;
this.liveDocs = liveDocs; this.liveDocs = liveDocs;
@ -649,7 +650,7 @@ public class StandardPostingsReader extends PostingsReaderBase {
} }
public SegmentDocsAndPositionsAndPayloadsEnum reset(FieldInfo fieldInfo, StandardTermState termState, Bits liveDocs) throws IOException { public SegmentDocsAndPositionsAndPayloadsEnum reset(FieldInfo fieldInfo, StandardTermState termState, Bits liveDocs) throws IOException {
assert !fieldInfo.omitTermFreqAndPositions; assert fieldInfo.indexOptions == IndexOptions.DOCS_AND_FREQS_AND_POSITIONS;
assert fieldInfo.storePayloads; assert fieldInfo.storePayloads;
if (payload == null) { if (payload == null) {
payload = new BytesRef(); payload = new BytesRef();

View File

@ -25,6 +25,7 @@ import java.io.IOException;
import org.apache.lucene.index.CorruptIndexException; import org.apache.lucene.index.CorruptIndexException;
import org.apache.lucene.index.DocsEnum; import org.apache.lucene.index.DocsEnum;
import org.apache.lucene.index.FieldInfo; import org.apache.lucene.index.FieldInfo;
import org.apache.lucene.index.FieldInfo.IndexOptions;
import org.apache.lucene.index.IndexFileNames; import org.apache.lucene.index.IndexFileNames;
import org.apache.lucene.index.SegmentWriteState; import org.apache.lucene.index.SegmentWriteState;
import org.apache.lucene.index.codecs.PostingsWriterBase; import org.apache.lucene.index.codecs.PostingsWriterBase;
@ -66,7 +67,7 @@ public final class StandardPostingsWriter extends PostingsWriterBase {
final int totalNumDocs; final int totalNumDocs;
IndexOutput termsOut; IndexOutput termsOut;
boolean omitTermFreqAndPositions; IndexOptions indexOptions;
boolean storePayloads; boolean storePayloads;
// Starts a new term // Starts a new term
long lastFreqStart; long lastFreqStart;
@ -144,7 +145,7 @@ public final class StandardPostingsWriter extends PostingsWriterBase {
public void setField(FieldInfo fieldInfo) { public void setField(FieldInfo fieldInfo) {
//System.out.println("SPW: setField"); //System.out.println("SPW: setField");
this.fieldInfo = fieldInfo; this.fieldInfo = fieldInfo;
omitTermFreqAndPositions = fieldInfo.omitTermFreqAndPositions; indexOptions = fieldInfo.indexOptions;
storePayloads = fieldInfo.storePayloads; storePayloads = fieldInfo.storePayloads;
//System.out.println(" set init blockFreqStart=" + freqStart); //System.out.println(" set init blockFreqStart=" + freqStart);
//System.out.println(" set init blockProxStart=" + proxStart); //System.out.println(" set init blockProxStart=" + proxStart);
@ -173,7 +174,7 @@ public final class StandardPostingsWriter extends PostingsWriterBase {
assert docID < totalNumDocs: "docID=" + docID + " totalNumDocs=" + totalNumDocs; assert docID < totalNumDocs: "docID=" + docID + " totalNumDocs=" + totalNumDocs;
lastDocID = docID; lastDocID = docID;
if (omitTermFreqAndPositions) { if (indexOptions == IndexOptions.DOCS_ONLY) {
freqOut.writeVInt(delta); freqOut.writeVInt(delta);
} else if (1 == termDocFreq) { } else if (1 == termDocFreq) {
freqOut.writeVInt((delta<<1) | 1); freqOut.writeVInt((delta<<1) | 1);
@ -189,7 +190,7 @@ public final class StandardPostingsWriter extends PostingsWriterBase {
@Override @Override
public void addPosition(int position, BytesRef payload) throws IOException { public void addPosition(int position, BytesRef payload) throws IOException {
//System.out.println("StandardW: addPos pos=" + position + " payload=" + (payload == null ? "null" : (payload.length + " bytes")) + " proxFP=" + proxOut.getFilePointer()); //System.out.println("StandardW: addPos pos=" + position + " payload=" + (payload == null ? "null" : (payload.length + " bytes")) + " proxFP=" + proxOut.getFilePointer());
assert !omitTermFreqAndPositions: "omitTermFreqAndPositions is true"; assert indexOptions == IndexOptions.DOCS_AND_FREQS_AND_POSITIONS: "invalid indexOptions: " + indexOptions;
assert proxOut != null; assert proxOut != null;
final int delta = position - lastPosition; final int delta = position - lastPosition;
@ -246,7 +247,7 @@ public final class StandardPostingsWriter extends PostingsWriterBase {
bytesWriter.writeVInt((int) (skipListWriter.writeSkip(freqOut)-freqStart)); bytesWriter.writeVInt((int) (skipListWriter.writeSkip(freqOut)-freqStart));
} }
if (!omitTermFreqAndPositions) { if (indexOptions == IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) {
//System.out.println(" proxFP=" + proxStart); //System.out.println(" proxFP=" + proxStart);
if (isFirstTerm) { if (isFirstTerm) {
bytesWriter.writeVLong(proxStart); bytesWriter.writeVLong(proxStart);

View File

@ -195,7 +195,7 @@ public class MultiPhraseQuery extends Query {
if (postingsEnum == null) { if (postingsEnum == null) {
if (reader.termDocsEnum(liveDocs, term.field(), term.bytes()) != null) { if (reader.termDocsEnum(liveDocs, term.field(), term.bytes()) != null) {
// term does exist, but has no positions // term does exist, but has no positions
throw new IllegalStateException("field \"" + term.field() + "\" was indexed with Field.omitTermFreqAndPositions=true; cannot run PhraseQuery (term=" + term.text() + ")"); throw new IllegalStateException("field \"" + term.field() + "\" was indexed without position data; cannot run PhraseQuery (term=" + term.text() + ")");
} else { } else {
// term does not exist // term does not exist
return null; return null;
@ -443,7 +443,7 @@ class UnionDocsAndPositionsEnum extends DocsAndPositionsEnum {
} else { } else {
if (indexReader.termDocsEnum(liveDocs, terms[i].field(), terms[i].bytes()) != null) { if (indexReader.termDocsEnum(liveDocs, terms[i].field(), terms[i].bytes()) != null) {
// term does exist, but has no positions // term does exist, but has no positions
throw new IllegalStateException("field \"" + terms[i].field() + "\" was indexed with Field.omitTermFreqAndPositions=true; cannot run PhraseQuery (term=" + terms[i].text() + ")"); throw new IllegalStateException("field \"" + terms[i].field() + "\" was indexed without position data; cannot run PhraseQuery (term=" + terms[i].text() + ")");
} }
} }
} }

View File

@ -229,7 +229,7 @@ public class PhraseQuery extends Query {
if (postingsEnum == null) { if (postingsEnum == null) {
assert (reader.termDocsEnum(liveDocs, t.field(), t.bytes(), state) != null) : "termstate found but no term exists in reader"; assert (reader.termDocsEnum(liveDocs, t.field(), t.bytes(), state) != null) : "termstate found but no term exists in reader";
// term does exist, but has no positions // term does exist, but has no positions
throw new IllegalStateException("field \"" + t.field() + "\" was indexed with Field.omitTermFreqAndPositions=true; cannot run PhraseQuery (term=" + t.text() + ")"); throw new IllegalStateException("field \"" + t.field() + "\" was indexed without position data; cannot run PhraseQuery (term=" + t.text() + ")");
} }
// get the docFreq without seeking // get the docFreq without seeking
TermsEnum te = reader.fields().terms(field).getThreadTermsEnum(); TermsEnum te = reader.fields().terms(field).getThreadTermsEnum();

View File

@ -92,7 +92,7 @@ public class SpanTermQuery extends SpanQuery {
} else { } else {
if (reader.termDocsEnum(reader.getLiveDocs(), term.field(), term.bytes()) != null) { if (reader.termDocsEnum(reader.getLiveDocs(), term.field(), term.bytes()) != null) {
// term does exist, but has no positions // term does exist, but has no positions
throw new IllegalStateException("field \"" + term.field() + "\" was indexed with Field.omitTermFreqAndPositions=true; cannot run SpanTermQuery (term=" + term.text() + ")"); throw new IllegalStateException("field \"" + term.field() + "\" was indexed without position data; cannot run SpanTermQuery (term=" + term.text() + ")");
} else { } else {
// term does not exist // term does not exist
return TermSpans.EMPTY_TERM_SPANS; return TermSpans.EMPTY_TERM_SPANS;

View File

@ -99,6 +99,10 @@
to stored fields file, previously they were stored in to stored fields file, previously they were stored in
text format only. text format only.
</p> </p>
<p>
In version 3.4, fields can omit position data while
still indexing term frequencies.
</p>
</section> </section>
<section id="Definitions"><title>Definitions</title> <section id="Definitions"><title>Definitions</title>
@ -276,7 +280,7 @@
<p>Term Frequency <p>Term Frequency
data. For each term in the dictionary, the numbers of all the data. For each term in the dictionary, the numbers of all the
documents that contain that term, and the frequency of the term in documents that contain that term, and the frequency of the term in
that document if omitTf is false. that document, unless frequencies are omitted (IndexOptions.DOCS_ONLY)
</p> </p>
</li> </li>
@ -284,8 +288,7 @@
<p>Term Proximity <p>Term Proximity
data. For each term in the dictionary, the positions that the term data. For each term in the dictionary, the positions that the term
occurs in each document. Note that this will occurs in each document. Note that this will
not exist if all fields in all documents set not exist if all fields in all documents omit position data.
omitTf to true.
</p> </p>
</li> </li>
@ -1080,7 +1083,7 @@
<p> <p>
HasProx is 1 if any fields in this segment have HasProx is 1 if any fields in this segment have
omitTf set to false; else, it's 0. position data (IndexOptions.DOCS_AND_FREQS_AND_POSITIONS); else, it's 0.
</p> </p>
<p> <p>
@ -1217,11 +1220,13 @@
<li>If the fourth lowest-order bit is set (0x08), term offsets are stored with the term vectors.</li> <li>If the fourth lowest-order bit is set (0x08), term offsets are stored with the term vectors.</li>
<li>If the fifth lowest-order bit is set (0x10), norms are omitted for the indexed field.</li> <li>If the fifth lowest-order bit is set (0x10), norms are omitted for the indexed field.</li>
<li>If the sixth lowest-order bit is set (0x20), payloads are stored for the indexed field.</li> <li>If the sixth lowest-order bit is set (0x20), payloads are stored for the indexed field.</li>
<li>If the seventh lowest-order bit is set (0x40), term frequencies and positions omitted for the indexed field.</li>
<li>If the eighth lowest-order bit is set (0x80), positions are omitted for the indexed field.</li>
</ul> </ul>
</p> </p>
<p> <p>
FNMVersion (added in 2.9) is always -2. FNMVersion (added in 2.9) is -2 for indexes from 2.9 - 3.3. It is -3 for indexes in Lucene 3.4+
</p> </p>
<p> <p>
@ -1419,7 +1424,7 @@
file. In particular, it is the difference between the position of file. In particular, it is the difference between the position of
this term's data in that file and the position of the previous this term's data in that file and the position of the previous
term's data (or zero, for the first term in the file. For fields term's data (or zero, for the first term in the file. For fields
with omitTf true, this will be 0 since that omit position data, this will be 0 since
prox information is not stored. prox information is not stored.
</p> </p>
<p>SkipDelta determines the position of this <p>SkipDelta determines the position of this
@ -1494,7 +1499,7 @@
<p> <p>
The .frq file contains the lists of documents The .frq file contains the lists of documents
which contain each term, along with the frequency of the term in that which contain each term, along with the frequency of the term in that
document (if omitTf is false). document (except when frequencies are omitted: IndexOptions.DOCS_ONLY).
</p> </p>
<p>FreqFile (.frq) --&gt; <p>FreqFile (.frq) --&gt;
&lt;TermFreqs, SkipData&gt; &lt;TermFreqs, SkipData&gt;
@ -1531,26 +1536,26 @@
<p>TermFreq <p>TermFreq
entries are ordered by increasing document number. entries are ordered by increasing document number.
</p> </p>
<p>DocDelta: if omitTf is false, this determines both <p>DocDelta: if frequencies are indexed, this determines both
the document number and the frequency. In the document number and the frequency. In
particular, DocDelta/2 is the difference between particular, DocDelta/2 is the difference between
this document number and the previous document this document number and the previous document
number (or zero when this is the first document in number (or zero when this is the first document in
a TermFreqs). When DocDelta is odd, the frequency a TermFreqs). When DocDelta is odd, the frequency
is one. When DocDelta is even, the frequency is is one. When DocDelta is even, the frequency is
read as another VInt. If omitTf is true, DocDelta read as another VInt. If frequencies are omitted, DocDelta
contains the gap (not multiplied by 2) between contains the gap (not multiplied by 2) between
document numbers and no frequency information is document numbers and no frequency information is
stored. stored.
</p> </p>
<p>For example, the TermFreqs for a term which occurs <p>For example, the TermFreqs for a term which occurs
once in document seven and three times in document once in document seven and three times in document
eleven, with omitTf false, would be the following eleven, with frequencies indexed, would be the following
sequence of VInts: sequence of VInts:
</p> </p>
<p>15, 8, 3 <p>15, 8, 3
</p> </p>
<p> If omitTf were true it would be this sequence <p> If frequencies were omitted (IndexOptions.DOCS_ONLY) it would be this sequence
of VInts instead: of VInts instead:
</p> </p>
<p> <p>
@ -1621,9 +1626,9 @@
<p> <p>
The .prx file contains the lists of positions that The .prx file contains the lists of positions that
each term occurs at within documents. Note that each term occurs at within documents. Note that
fields with omitTf true do not store fields omitting positional data do not store
anything into this file, and if all fields in the anything into this file, and if all fields in the
index have omitTf true then the .prx file will not index omit positional data then the .prx file will not
exist. exist.
</p> </p>
<p>ProxFile (.prx) --&gt; <p>ProxFile (.prx) --&gt;

View File

@ -29,6 +29,7 @@ import org.apache.lucene.analysis.MockTokenizer;
import org.apache.lucene.document.Document; import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field; import org.apache.lucene.document.Field;
import org.apache.lucene.document.Fieldable; import org.apache.lucene.document.Fieldable;
import org.apache.lucene.index.FieldInfo.IndexOptions;
import org.apache.lucene.search.SimilarityProvider; import org.apache.lucene.search.SimilarityProvider;
import org.apache.lucene.store.Directory; import org.apache.lucene.store.Directory;
import org.apache.lucene.util.LuceneTestCase; import org.apache.lucene.util.LuceneTestCase;
@ -67,7 +68,7 @@ class DocHelper {
public static Field noTFField = new Field(NO_TF_KEY, NO_TF_TEXT, public static Field noTFField = new Field(NO_TF_KEY, NO_TF_TEXT,
Field.Store.YES, Field.Index.ANALYZED); Field.Store.YES, Field.Index.ANALYZED);
static { static {
noTFField.setOmitTermFreqAndPositions(true); noTFField.setIndexOptions(IndexOptions.DOCS_ONLY);
} }
public static final String UNINDEXED_FIELD_TEXT = "unindexed field text"; public static final String UNINDEXED_FIELD_TEXT = "unindexed field text";
@ -173,7 +174,7 @@ class DocHelper {
if (f.isStored()) add(stored,f); if (f.isStored()) add(stored,f);
else add(unstored,f); else add(unstored,f);
if (f.getOmitNorms()) add(noNorms,f); if (f.getOmitNorms()) add(noNorms,f);
if (f.getOmitTermFreqAndPositions()) add(noTf,f); if (f.getIndexOptions() == IndexOptions.DOCS_ONLY) add(noTf,f);
if (f.isLazy()) add(lazy, f); if (f.isLazy()) add(lazy, f);
} }
} }

View File

@ -22,6 +22,7 @@ import java.util.Comparator;
import org.apache.lucene.index.CorruptIndexException; import org.apache.lucene.index.CorruptIndexException;
import org.apache.lucene.index.FieldInfo; import org.apache.lucene.index.FieldInfo;
import org.apache.lucene.index.FieldInfo.IndexOptions;
import org.apache.lucene.index.IndexFileNames; import org.apache.lucene.index.IndexFileNames;
import org.apache.lucene.index.SegmentWriteState; import org.apache.lucene.index.SegmentWriteState;
import org.apache.lucene.index.codecs.FieldsConsumer; import org.apache.lucene.index.codecs.FieldsConsumer;
@ -90,7 +91,7 @@ class PreFlexFieldsWriter extends FieldsConsumer {
public PreFlexTermsWriter(FieldInfo fieldInfo) { public PreFlexTermsWriter(FieldInfo fieldInfo) {
this.fieldInfo = fieldInfo; this.fieldInfo = fieldInfo;
omitTF = fieldInfo.omitTermFreqAndPositions; omitTF = fieldInfo.indexOptions == IndexOptions.DOCS_ONLY;
storePayloads = fieldInfo.storePayloads; storePayloads = fieldInfo.storePayloads;
} }

View File

@ -422,7 +422,7 @@ public class _TestUtil {
List<Fieldable> fields = doc.getFields(); List<Fieldable> fields = doc.getFields();
for (Fieldable field : fields) { for (Fieldable field : fields) {
fieldInfos.addOrUpdate(field.name(), field.isIndexed(), field.isTermVectorStored(), field.isStorePositionWithTermVector(), fieldInfos.addOrUpdate(field.name(), field.isIndexed(), field.isTermVectorStored(), field.isStorePositionWithTermVector(),
field.isStoreOffsetWithTermVector(), field.getOmitNorms(), false, field.getOmitTermFreqAndPositions(), field.docValuesType()); field.isStoreOffsetWithTermVector(), field.getOmitNorms(), false, field.getIndexOptions(), field.docValuesType());
} }
} }
@ -507,7 +507,7 @@ public class _TestUtil {
field1.isStored() ? Field.Store.YES : Field.Store.NO, field1.isStored() ? Field.Store.YES : Field.Store.NO,
field1.isIndexed() ? (field1.isTokenized() ? Field.Index.ANALYZED : Field.Index.NOT_ANALYZED) : Field.Index.NO); field1.isIndexed() ? (field1.isTokenized() ? Field.Index.ANALYZED : Field.Index.NOT_ANALYZED) : Field.Index.NO);
field2.setOmitNorms(field1.getOmitNorms()); field2.setOmitNorms(field1.getOmitNorms());
field2.setOmitTermFreqAndPositions(field1.getOmitTermFreqAndPositions()); field2.setIndexOptions(field1.getIndexOptions());
doc2.add(field2); doc2.add(field2);
} }

View File

@ -24,6 +24,7 @@ import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.apache.lucene.document.Document; import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field; import org.apache.lucene.document.Field;
import org.apache.lucene.index.FieldInfo.IndexOptions;
import org.apache.lucene.index.codecs.CodecProvider; import org.apache.lucene.index.codecs.CodecProvider;
import org.apache.lucene.store.MockDirectoryWrapper; import org.apache.lucene.store.MockDirectoryWrapper;
import org.apache.lucene.util.LuceneTestCase; import org.apache.lucene.util.LuceneTestCase;
@ -62,7 +63,7 @@ public class Test2BPostings extends LuceneTestCase {
Document doc = new Document(); Document doc = new Document();
Field field = new Field("field", new MyTokenStream()); Field field = new Field("field", new MyTokenStream());
field.setOmitTermFreqAndPositions(true); field.setIndexOptions(IndexOptions.DOCS_ONLY);
field.setOmitNorms(true); field.setOmitNorms(true);
doc.add(field); doc.add(field);

View File

@ -23,6 +23,7 @@ import org.apache.lucene.search.*;
import org.apache.lucene.analysis.*; import org.apache.lucene.analysis.*;
import org.apache.lucene.analysis.tokenattributes.*; import org.apache.lucene.analysis.tokenattributes.*;
import org.apache.lucene.document.*; import org.apache.lucene.document.*;
import org.apache.lucene.index.FieldInfo.IndexOptions;
import org.apache.lucene.index.codecs.CodecProvider; import org.apache.lucene.index.codecs.CodecProvider;
import java.io.File; import java.io.File;
import java.io.IOException; import java.io.IOException;
@ -177,7 +178,7 @@ public class Test2BTerms extends LuceneTestCase {
Document doc = new Document(); Document doc = new Document();
final MyTokenStream ts = new MyTokenStream(random, TERMS_PER_DOC); final MyTokenStream ts = new MyTokenStream(random, TERMS_PER_DOC);
Field field = new Field("field", ts); Field field = new Field("field", ts);
field.setOmitTermFreqAndPositions(true); field.setIndexOptions(IndexOptions.DOCS_ONLY);
field.setOmitNorms(true); field.setOmitNorms(true);
doc.add(field); doc.add(field);
//w.setInfoStream(System.out); //w.setInfoStream(System.out);

View File

@ -31,6 +31,7 @@ import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field; import org.apache.lucene.document.Field;
import org.apache.lucene.document.Fieldable; import org.apache.lucene.document.Fieldable;
import org.apache.lucene.document.NumericField; import org.apache.lucene.document.NumericField;
import org.apache.lucene.index.FieldInfo.IndexOptions;
import org.apache.lucene.index.IndexWriterConfig.OpenMode; import org.apache.lucene.index.IndexWriterConfig.OpenMode;
import org.apache.lucene.search.DefaultSimilarity; import org.apache.lucene.search.DefaultSimilarity;
import org.apache.lucene.search.DocIdSetIterator; import org.apache.lucene.search.DocIdSetIterator;
@ -606,10 +607,10 @@ public class TestBackwardsCompatibility extends LuceneTestCase {
private void addNoProxDoc(IndexWriter writer) throws IOException { private void addNoProxDoc(IndexWriter writer) throws IOException {
Document doc = new Document(); Document doc = new Document();
Field f = new Field("content3", "aaa", Field.Store.YES, Field.Index.ANALYZED); Field f = new Field("content3", "aaa", Field.Store.YES, Field.Index.ANALYZED);
f.setOmitTermFreqAndPositions(true); f.setIndexOptions(IndexOptions.DOCS_ONLY);
doc.add(f); doc.add(f);
f = new Field("content4", "aaa", Field.Store.YES, Field.Index.NO); f = new Field("content4", "aaa", Field.Store.YES, Field.Index.NO);
f.setOmitTermFreqAndPositions(true); f.setIndexOptions(IndexOptions.DOCS_ONLY);
doc.add(f); doc.add(f);
writer.addDocument(doc); writer.addDocument(doc);
} }

View File

@ -25,6 +25,7 @@ import org.apache.lucene.analysis.MockAnalyzer;
import org.apache.lucene.document.Document; import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field; import org.apache.lucene.document.Field;
import org.apache.lucene.document.Field.Store; import org.apache.lucene.document.Field.Store;
import org.apache.lucene.index.FieldInfo.IndexOptions;
import org.apache.lucene.index.codecs.CodecProvider; import org.apache.lucene.index.codecs.CodecProvider;
import org.apache.lucene.index.codecs.FieldsConsumer; import org.apache.lucene.index.codecs.FieldsConsumer;
import org.apache.lucene.index.codecs.FieldsProducer; import org.apache.lucene.index.codecs.FieldsProducer;
@ -84,7 +85,8 @@ public class TestCodecs extends LuceneTestCase {
this.storePayloads = storePayloads; this.storePayloads = storePayloads;
fieldInfos.addOrUpdate(name, true); fieldInfos.addOrUpdate(name, true);
fieldInfo = fieldInfos.fieldInfo(name); fieldInfo = fieldInfos.fieldInfo(name);
fieldInfo.omitTermFreqAndPositions = omitTF; // TODO: change this test to use all three
fieldInfo.indexOptions = omitTF ? IndexOptions.DOCS_ONLY : IndexOptions.DOCS_AND_FREQS_AND_POSITIONS;
fieldInfo.storePayloads = storePayloads; fieldInfo.storePayloads = storePayloads;
this.terms = terms; this.terms = terms;
for(int i=0;i<terms.length;i++) for(int i=0;i<terms.length;i++)

View File

@ -34,6 +34,7 @@ import org.apache.lucene.document.Field.Index;
import org.apache.lucene.document.Field.Store; import org.apache.lucene.document.Field.Store;
import org.apache.lucene.document.Field.TermVector; import org.apache.lucene.document.Field.TermVector;
import org.apache.lucene.document.Fieldable; import org.apache.lucene.document.Fieldable;
import org.apache.lucene.index.FieldInfo.IndexOptions;
import org.apache.lucene.store.Directory; import org.apache.lucene.store.Directory;
import org.apache.lucene.store.IOContext; import org.apache.lucene.store.IOContext;
import org.apache.lucene.store.IOContext.Context; import org.apache.lucene.store.IOContext.Context;
@ -303,7 +304,7 @@ public class TestDocumentWriter extends LuceneTestCase {
doc.add(newField("f1", "v2", Store.YES, Index.NO)); doc.add(newField("f1", "v2", Store.YES, Index.NO));
// f2 has no TF // f2 has no TF
Field f = newField("f2", "v1", Store.NO, Index.ANALYZED); Field f = newField("f2", "v1", Store.NO, Index.ANALYZED);
f.setOmitTermFreqAndPositions(true); f.setIndexOptions(IndexOptions.DOCS_ONLY);
doc.add(f); doc.add(f);
doc.add(newField("f2", "v2", Store.YES, Index.NO)); doc.add(newField("f2", "v2", Store.YES, Index.NO));
@ -319,10 +320,10 @@ public class TestDocumentWriter extends LuceneTestCase {
FieldInfos fi = reader.fieldInfos(); FieldInfos fi = reader.fieldInfos();
// f1 // f1
assertFalse("f1 should have no norms", reader.hasNorms("f1")); assertFalse("f1 should have no norms", reader.hasNorms("f1"));
assertFalse("omitTermFreqAndPositions field bit should not be set for f1", fi.fieldInfo("f1").omitTermFreqAndPositions); assertEquals("omitTermFreqAndPositions field bit should not be set for f1", IndexOptions.DOCS_AND_FREQS_AND_POSITIONS, fi.fieldInfo("f1").indexOptions);
// f2 // f2
assertTrue("f2 should have norms", reader.hasNorms("f2")); assertTrue("f2 should have norms", reader.hasNorms("f2"));
assertTrue("omitTermFreqAndPositions field bit should be set for f2", fi.fieldInfo("f2").omitTermFreqAndPositions); assertEquals("omitTermFreqAndPositions field bit should be set for f2", IndexOptions.DOCS_ONLY, fi.fieldInfo("f2").indexOptions);
reader.close(); reader.close();
} }
} }

View File

@ -20,6 +20,7 @@ package org.apache.lucene.index;
import org.apache.lucene.util.LuceneTestCase; import org.apache.lucene.util.LuceneTestCase;
import org.apache.lucene.util._TestUtil; import org.apache.lucene.util._TestUtil;
import org.apache.lucene.document.Document; import org.apache.lucene.document.Document;
import org.apache.lucene.index.FieldInfo.IndexOptions;
import org.apache.lucene.store.Directory; import org.apache.lucene.store.Directory;
import org.apache.lucene.store.IndexOutput; import org.apache.lucene.store.IndexOutput;
@ -137,7 +138,7 @@ public class TestFieldInfos extends LuceneTestCase {
try { try {
readOnly.addOrUpdate("bogus", random.nextBoolean(), random.nextBoolean(), readOnly.addOrUpdate("bogus", random.nextBoolean(), random.nextBoolean(),
random.nextBoolean(), random.nextBoolean(), random.nextBoolean(), random.nextBoolean(), random.nextBoolean(), random.nextBoolean(),
random.nextBoolean(), random.nextBoolean(), null); random.nextBoolean(), random.nextBoolean() ? IndexOptions.DOCS_ONLY : IndexOptions.DOCS_AND_FREQS_AND_POSITIONS, null);
fail("instance should be read only"); fail("instance should be read only");
} catch (IllegalStateException e) { } catch (IllegalStateException e) {
// expected // expected

View File

@ -30,6 +30,7 @@ import org.apache.lucene.document.FieldSelectorResult;
import org.apache.lucene.document.Fieldable; import org.apache.lucene.document.Fieldable;
import org.apache.lucene.document.LoadFirstFieldSelector; import org.apache.lucene.document.LoadFirstFieldSelector;
import org.apache.lucene.document.SetBasedFieldSelector; import org.apache.lucene.document.SetBasedFieldSelector;
import org.apache.lucene.index.FieldInfo.IndexOptions;
import org.apache.lucene.index.IndexWriterConfig.OpenMode; import org.apache.lucene.index.IndexWriterConfig.OpenMode;
import org.apache.lucene.search.FieldCache; import org.apache.lucene.search.FieldCache;
import org.apache.lucene.store.AlreadyClosedException; import org.apache.lucene.store.AlreadyClosedException;
@ -91,7 +92,7 @@ public class TestFieldsReader extends LuceneTestCase {
assertTrue(field.isStoreOffsetWithTermVector() == true); assertTrue(field.isStoreOffsetWithTermVector() == true);
assertTrue(field.isStorePositionWithTermVector() == true); assertTrue(field.isStorePositionWithTermVector() == true);
assertTrue(field.getOmitNorms() == false); assertTrue(field.getOmitNorms() == false);
assertTrue(field.getOmitTermFreqAndPositions() == false); assertTrue(field.getIndexOptions() == IndexOptions.DOCS_AND_FREQS_AND_POSITIONS);
field = doc.getField(DocHelper.TEXT_FIELD_3_KEY); field = doc.getField(DocHelper.TEXT_FIELD_3_KEY);
assertTrue(field != null); assertTrue(field != null);
@ -99,7 +100,7 @@ public class TestFieldsReader extends LuceneTestCase {
assertTrue(field.isStoreOffsetWithTermVector() == false); assertTrue(field.isStoreOffsetWithTermVector() == false);
assertTrue(field.isStorePositionWithTermVector() == false); assertTrue(field.isStorePositionWithTermVector() == false);
assertTrue(field.getOmitNorms() == true); assertTrue(field.getOmitNorms() == true);
assertTrue(field.getOmitTermFreqAndPositions() == false); assertTrue(field.getIndexOptions() == IndexOptions.DOCS_AND_FREQS_AND_POSITIONS);
field = doc.getField(DocHelper.NO_TF_KEY); field = doc.getField(DocHelper.NO_TF_KEY);
assertTrue(field != null); assertTrue(field != null);
@ -107,7 +108,7 @@ public class TestFieldsReader extends LuceneTestCase {
assertTrue(field.isStoreOffsetWithTermVector() == false); assertTrue(field.isStoreOffsetWithTermVector() == false);
assertTrue(field.isStorePositionWithTermVector() == false); assertTrue(field.isStorePositionWithTermVector() == false);
assertTrue(field.getOmitNorms() == false); assertTrue(field.getOmitNorms() == false);
assertTrue(field.getOmitTermFreqAndPositions() == true); assertTrue(field.getIndexOptions() == IndexOptions.DOCS_ONLY);
reader.close(); reader.close();
} }

View File

@ -26,6 +26,7 @@ import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.tokenattributes.TermToBytesRefAttribute; import org.apache.lucene.analysis.tokenattributes.TermToBytesRefAttribute;
import org.apache.lucene.document.Document; import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field; import org.apache.lucene.document.Field;
import org.apache.lucene.index.FieldInfo.IndexOptions;
import org.apache.lucene.index.codecs.CodecProvider; import org.apache.lucene.index.codecs.CodecProvider;
import org.apache.lucene.store.Directory; import org.apache.lucene.store.Directory;
import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.BytesRef;
@ -63,8 +64,8 @@ public class TestLongPostings extends LuceneTestCase {
} }
public void testLongPostings() throws Exception { public void testLongPostings() throws Exception {
assumeFalse("Too slow with SimpleText codec", CodecProvider.getDefault().getFieldCodec("field").equals("SimpleText")); assumeFalse("Too slow with SimpleText codec at night", TEST_NIGHTLY && CodecProvider.getDefault().getFieldCodec("field").equals("SimpleText"));
assumeFalse("Too slow with Memory codec", CodecProvider.getDefault().getFieldCodec("field").equals("Memory")); assumeFalse("Too slow with Memory codec at night", TEST_NIGHTLY && CodecProvider.getDefault().getFieldCodec("field").equals("Memory"));
// Don't use _TestUtil.getTempDir so that we own the // Don't use _TestUtil.getTempDir so that we own the
// randomness (ie same seed will point to same dir): // randomness (ie same seed will point to same dir):
@ -250,4 +251,187 @@ public class TestLongPostings extends LuceneTestCase {
r.close(); r.close();
dir.close(); dir.close();
} }
// a weaker form of testLongPostings, that doesnt check positions
public void testLongPostingsNoPositions() throws Exception {
doTestLongPostingsNoPositions(IndexOptions.DOCS_ONLY);
doTestLongPostingsNoPositions(IndexOptions.DOCS_AND_FREQS);
}
public void doTestLongPostingsNoPositions(IndexOptions options) throws Exception {
assumeFalse("Too slow with SimpleText codec at night", TEST_NIGHTLY && CodecProvider.getDefault().getFieldCodec("field").equals("SimpleText"));
assumeFalse("Too slow with Memory codec at night", TEST_NIGHTLY && CodecProvider.getDefault().getFieldCodec("field").equals("Memory"));
// Don't use _TestUtil.getTempDir so that we own the
// randomness (ie same seed will point to same dir):
Directory dir = newFSDirectory(_TestUtil.getTempDir("longpostings" + "." + random.nextLong()));
final int NUM_DOCS = atLeast(2000);
if (VERBOSE) {
System.out.println("TEST: NUM_DOCS=" + NUM_DOCS);
}
final String s1 = getRandomTerm(null);
final String s2 = getRandomTerm(s1);
if (VERBOSE) {
System.out.println("\nTEST: s1=" + s1 + " s2=" + s2);
/*
for(int idx=0;idx<s1.length();idx++) {
System.out.println(" s1 ch=0x" + Integer.toHexString(s1.charAt(idx)));
}
for(int idx=0;idx<s2.length();idx++) {
System.out.println(" s2 ch=0x" + Integer.toHexString(s2.charAt(idx)));
}
*/
}
final FixedBitSet isS1 = new FixedBitSet(NUM_DOCS);
for(int idx=0;idx<NUM_DOCS;idx++) {
if (random.nextBoolean()) {
isS1.set(idx);
}
}
final IndexReader r;
if (true) {
final IndexWriterConfig iwc = newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random))
.setOpenMode(IndexWriterConfig.OpenMode.CREATE)
.setMergePolicy(newLogMergePolicy());
iwc.setRAMBufferSizeMB(16.0 + 16.0 * random.nextDouble());
iwc.setMaxBufferedDocs(-1);
final RandomIndexWriter riw = new RandomIndexWriter(random, dir, iwc);
for(int idx=0;idx<NUM_DOCS;idx++) {
final Document doc = new Document();
String s = isS1.get(idx) ? s1 : s2;
final Field f = newField("field", s, Field.Index.ANALYZED);
f.setIndexOptions(options);
final int count = _TestUtil.nextInt(random, 1, 4);
for(int ct=0;ct<count;ct++) {
doc.add(f);
}
riw.addDocument(doc);
}
r = riw.getReader();
riw.close();
} else {
r = IndexReader.open(dir);
}
/*
if (VERBOSE) {
System.out.println("TEST: terms");
TermEnum termEnum = r.terms();
while(termEnum.next()) {
System.out.println(" term=" + termEnum.term() + " len=" + termEnum.term().text().length());
assertTrue(termEnum.docFreq() > 0);
System.out.println(" s1?=" + (termEnum.term().text().equals(s1)) + " s1len=" + s1.length());
System.out.println(" s2?=" + (termEnum.term().text().equals(s2)) + " s2len=" + s2.length());
final String s = termEnum.term().text();
for(int idx=0;idx<s.length();idx++) {
System.out.println(" ch=0x" + Integer.toHexString(s.charAt(idx)));
}
}
}
*/
assertEquals(NUM_DOCS, r.numDocs());
assertTrue(r.docFreq(new Term("field", s1)) > 0);
assertTrue(r.docFreq(new Term("field", s2)) > 0);
int num = atLeast(1000);
for(int iter=0;iter<num;iter++) {
final String term;
final boolean doS1;
if (random.nextBoolean()) {
term = s1;
doS1 = true;
} else {
term = s2;
doS1 = false;
}
if (VERBOSE) {
System.out.println("\nTEST: iter=" + iter + " doS1=" + doS1);
}
final DocsEnum postings = MultiFields.getTermDocsEnum(r, null, "field", new BytesRef(term));
int docID = -1;
while(docID < DocsEnum.NO_MORE_DOCS) {
final int what = random.nextInt(3);
if (what == 0) {
if (VERBOSE) {
System.out.println("TEST: docID=" + docID + "; do next()");
}
// nextDoc
int expected = docID+1;
while(true) {
if (expected == NUM_DOCS) {
expected = Integer.MAX_VALUE;
break;
} else if (isS1.get(expected) == doS1) {
break;
} else {
expected++;
}
}
docID = postings.nextDoc();
if (VERBOSE) {
System.out.println(" got docID=" + docID);
}
assertEquals(expected, docID);
if (docID == DocsEnum.NO_MORE_DOCS) {
break;
}
if (random.nextInt(6) == 3) {
final int freq = postings.freq();
assertTrue(freq >=1 && freq <= 4);
}
} else {
// advance
final int targetDocID;
if (docID == -1) {
targetDocID = random.nextInt(NUM_DOCS+1);
} else {
targetDocID = docID + _TestUtil.nextInt(random, 1, NUM_DOCS - docID);
}
if (VERBOSE) {
System.out.println("TEST: docID=" + docID + "; do advance(" + targetDocID + ")");
}
int expected = targetDocID;
while(true) {
if (expected == NUM_DOCS) {
expected = Integer.MAX_VALUE;
break;
} else if (isS1.get(expected) == doS1) {
break;
} else {
expected++;
}
}
docID = postings.advance(targetDocID);
if (VERBOSE) {
System.out.println(" got docID=" + docID);
}
assertEquals(expected, docID);
if (docID == DocsEnum.NO_MORE_DOCS) {
break;
}
if (random.nextInt(6) == 3) {
final int freq = postings.freq();
assertTrue(freq >=1 && freq <= 4);
}
}
}
}
r.close();
dir.close();
}
} }

View File

@ -0,0 +1,232 @@
package org.apache.lucene.index;
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.MockAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.index.FieldInfo.IndexOptions;
import org.apache.lucene.search.DocIdSetIterator;
import org.apache.lucene.store.Directory;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.LuceneTestCase;
/**
*
* @lucene.experimental
*/
public class TestOmitPositions extends LuceneTestCase {
public void testBasic() throws Exception {
Directory dir = newDirectory();
RandomIndexWriter w = new RandomIndexWriter(random, dir);
Document doc = new Document();
Field f = newField("foo", "this is a test test", Field.Index.ANALYZED);
f.setIndexOptions(IndexOptions.DOCS_AND_FREQS);
doc.add(f);
for (int i = 0; i < 100; i++) {
w.addDocument(doc);
}
IndexReader reader = w.getReader();
w.close();
assertNull(MultiFields.getTermPositionsEnum(reader, null, "foo", new BytesRef("test")));
DocsEnum de = MultiFields.getTermDocsEnum(reader, null, "foo", new BytesRef("test"));
while (de.nextDoc() != DocIdSetIterator.NO_MORE_DOCS) {
assertEquals(2, de.freq());
}
reader.close();
dir.close();
}
// Tests whether the DocumentWriter correctly enable the
// omitTermFreqAndPositions bit in the FieldInfo
public void testPositions() throws Exception {
Directory ram = newDirectory();
Analyzer analyzer = new MockAnalyzer(random);
IndexWriter writer = new IndexWriter(ram, newIndexWriterConfig( TEST_VERSION_CURRENT, analyzer));
Document d = new Document();
// f1,f2,f3: docs only
Field f1 = newField("f1", "This field has docs only", Field.Store.NO, Field.Index.ANALYZED);
f1.setIndexOptions(IndexOptions.DOCS_ONLY);
d.add(f1);
Field f2 = newField("f2", "This field has docs only", Field.Store.NO, Field.Index.ANALYZED);
f2.setIndexOptions(IndexOptions.DOCS_ONLY);
d.add(f2);
Field f3 = newField("f3", "This field has docs only", Field.Store.NO, Field.Index.ANALYZED);
f3.setIndexOptions(IndexOptions.DOCS_ONLY);
d.add(f3);
// f4,f5,f6 docs and freqs
Field f4 = newField("f4", "This field has docs and freqs", Field.Store.NO, Field.Index.ANALYZED);
f4.setIndexOptions(IndexOptions.DOCS_AND_FREQS);
d.add(f4);
Field f5 = newField("f5", "This field has docs and freqs", Field.Store.NO, Field.Index.ANALYZED);
f5.setIndexOptions(IndexOptions.DOCS_AND_FREQS);
d.add(f5);
Field f6 = newField("f6", "This field has docs and freqs", Field.Store.NO, Field.Index.ANALYZED);
f6.setIndexOptions(IndexOptions.DOCS_AND_FREQS);
d.add(f6);
// f7,f8,f9 docs/freqs/positions
Field f7 = newField("f7", "This field has docs and freqs and positions", Field.Store.NO, Field.Index.ANALYZED);
f7.setIndexOptions(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS);
d.add(f7);
Field f8 = newField("f8", "This field has docs and freqs and positions", Field.Store.NO, Field.Index.ANALYZED);
f8.setIndexOptions(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS);
d.add(f8);
Field f9 = newField("f9", "This field has docs and freqs and positions", Field.Store.NO, Field.Index.ANALYZED);
f9.setIndexOptions(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS);
d.add(f9);
writer.addDocument(d);
writer.optimize();
// now we add another document which has docs-only for f1, f4, f7, docs/freqs for f2, f5, f8,
// and docs/freqs/positions for f3, f6, f9
d = new Document();
// f1,f4,f7: docs only
f1 = newField("f1", "This field has docs only", Field.Store.NO, Field.Index.ANALYZED);
f1.setIndexOptions(IndexOptions.DOCS_ONLY);
d.add(f1);
f4 = newField("f4", "This field has docs only", Field.Store.NO, Field.Index.ANALYZED);
f4.setIndexOptions(IndexOptions.DOCS_ONLY);
d.add(f4);
f7 = newField("f7", "This field has docs only", Field.Store.NO, Field.Index.ANALYZED);
f7.setIndexOptions(IndexOptions.DOCS_ONLY);
d.add(f7);
// f2, f5, f8: docs and freqs
f2 = newField("f2", "This field has docs and freqs", Field.Store.NO, Field.Index.ANALYZED);
f2.setIndexOptions(IndexOptions.DOCS_AND_FREQS);
d.add(f2);
f5 = newField("f5", "This field has docs and freqs", Field.Store.NO, Field.Index.ANALYZED);
f5.setIndexOptions(IndexOptions.DOCS_AND_FREQS);
d.add(f5);
f8 = newField("f8", "This field has docs and freqs", Field.Store.NO, Field.Index.ANALYZED);
f8.setIndexOptions(IndexOptions.DOCS_AND_FREQS);
d.add(f8);
// f3, f6, f9: docs and freqs and positions
f3 = newField("f3", "This field has docs and freqs and positions", Field.Store.NO, Field.Index.ANALYZED);
f3.setIndexOptions(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS);
d.add(f3);
f6 = newField("f6", "This field has docs and freqs and positions", Field.Store.NO, Field.Index.ANALYZED);
f6.setIndexOptions(IndexOptions.DOCS_AND_FREQS);
d.add(f6);
f9 = newField("f9", "This field has docs and freqs and positions", Field.Store.NO, Field.Index.ANALYZED);
f9.setIndexOptions(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS);
d.add(f9);
writer.addDocument(d);
// force merge
writer.optimize();
// flush
writer.close();
SegmentReader reader = getOnlySegmentReader(IndexReader.open(ram, false));
FieldInfos fi = reader.fieldInfos();
// docs + docs = docs
assertEquals(IndexOptions.DOCS_ONLY, fi.fieldInfo("f1").indexOptions);
// docs + docs/freqs = docs
assertEquals(IndexOptions.DOCS_ONLY, fi.fieldInfo("f2").indexOptions);
// docs + docs/freqs/pos = docs
assertEquals(IndexOptions.DOCS_ONLY, fi.fieldInfo("f3").indexOptions);
// docs/freqs + docs = docs
assertEquals(IndexOptions.DOCS_ONLY, fi.fieldInfo("f4").indexOptions);
// docs/freqs + docs/freqs = docs/freqs
assertEquals(IndexOptions.DOCS_AND_FREQS, fi.fieldInfo("f5").indexOptions);
// docs/freqs + docs/freqs/pos = docs/freqs
assertEquals(IndexOptions.DOCS_AND_FREQS, fi.fieldInfo("f6").indexOptions);
// docs/freqs/pos + docs = docs
assertEquals(IndexOptions.DOCS_ONLY, fi.fieldInfo("f7").indexOptions);
// docs/freqs/pos + docs/freqs = docs/freqs
assertEquals(IndexOptions.DOCS_AND_FREQS, fi.fieldInfo("f8").indexOptions);
// docs/freqs/pos + docs/freqs/pos = docs/freqs/pos
assertEquals(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS, fi.fieldInfo("f9").indexOptions);
reader.close();
ram.close();
}
private void assertNoPrx(Directory dir) throws Throwable {
final String[] files = dir.listAll();
for(int i=0;i<files.length;i++) {
assertFalse(files[i].endsWith(".prx"));
assertFalse(files[i].endsWith(".pos"));
}
}
// Verifies no *.prx exists when all fields omit term positions:
public void testNoPrxFile() throws Throwable {
Directory ram = newDirectory();
Analyzer analyzer = new MockAnalyzer(random);
IndexWriter writer = new IndexWriter(ram, newIndexWriterConfig(
TEST_VERSION_CURRENT, analyzer).setMaxBufferedDocs(3).setMergePolicy(newLogMergePolicy()));
LogMergePolicy lmp = (LogMergePolicy) writer.getConfig().getMergePolicy();
lmp.setMergeFactor(2);
lmp.setUseCompoundFile(false);
Document d = new Document();
Field f1 = newField("f1", "This field has term freqs", Field.Store.NO, Field.Index.ANALYZED);
f1.setIndexOptions(IndexOptions.DOCS_AND_FREQS);
d.add(f1);
for(int i=0;i<30;i++)
writer.addDocument(d);
writer.commit();
assertNoPrx(ram);
// now add some documents with positions, and check there is no prox after optimization
d = new Document();
f1 = newField("f1", "This field has positions", Field.Store.NO, Field.Index.ANALYZED);
d.add(f1);
for(int i=0;i<30;i++)
writer.addDocument(d);
// force merge
writer.optimize();
// flush
writer.close();
assertNoPrx(ram);
ram.close();
}
}

View File

@ -26,6 +26,7 @@ import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.MockAnalyzer; import org.apache.lucene.analysis.MockAnalyzer;
import org.apache.lucene.document.Document; import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field; import org.apache.lucene.document.Field;
import org.apache.lucene.index.FieldInfo.IndexOptions;
import org.apache.lucene.index.IndexReader.AtomicReaderContext; import org.apache.lucene.index.IndexReader.AtomicReaderContext;
import org.apache.lucene.search.*; import org.apache.lucene.search.*;
import org.apache.lucene.search.BooleanClause.Occur; import org.apache.lucene.search.BooleanClause.Occur;
@ -65,7 +66,7 @@ public class TestOmitTf extends LuceneTestCase {
// this field will NOT have Tf // this field will NOT have Tf
Field f2 = newField("f2", "This field has NO Tf in all docs", Field.Store.NO, Field.Index.ANALYZED); Field f2 = newField("f2", "This field has NO Tf in all docs", Field.Store.NO, Field.Index.ANALYZED);
f2.setOmitTermFreqAndPositions(true); f2.setIndexOptions(IndexOptions.DOCS_ONLY);
d.add(f2); d.add(f2);
writer.addDocument(d); writer.addDocument(d);
@ -75,10 +76,10 @@ public class TestOmitTf extends LuceneTestCase {
d = new Document(); d = new Document();
// Reverse // Reverse
f1.setOmitTermFreqAndPositions(true); f1.setIndexOptions(IndexOptions.DOCS_ONLY);
d.add(f1); d.add(f1);
f2.setOmitTermFreqAndPositions(false); f2.setIndexOptions(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS);
d.add(f2); d.add(f2);
writer.addDocument(d); writer.addDocument(d);
@ -90,8 +91,8 @@ public class TestOmitTf extends LuceneTestCase {
SegmentReader reader = getOnlySegmentReader(IndexReader.open(ram, false)); SegmentReader reader = getOnlySegmentReader(IndexReader.open(ram, false));
FieldInfos fi = reader.fieldInfos(); FieldInfos fi = reader.fieldInfos();
assertTrue("OmitTermFreqAndPositions field bit should be set.", fi.fieldInfo("f1").omitTermFreqAndPositions); assertEquals("OmitTermFreqAndPositions field bit should be set.", IndexOptions.DOCS_ONLY, fi.fieldInfo("f1").indexOptions);
assertTrue("OmitTermFreqAndPositions field bit should be set.", fi.fieldInfo("f2").omitTermFreqAndPositions); assertEquals("OmitTermFreqAndPositions field bit should be set.", IndexOptions.DOCS_ONLY, fi.fieldInfo("f2").indexOptions);
reader.close(); reader.close();
ram.close(); ram.close();
@ -117,7 +118,7 @@ public class TestOmitTf extends LuceneTestCase {
// this field will NOT have Tf // this field will NOT have Tf
Field f2 = newField("f2", "This field has NO Tf in all docs", Field.Store.NO, Field.Index.ANALYZED); Field f2 = newField("f2", "This field has NO Tf in all docs", Field.Store.NO, Field.Index.ANALYZED);
f2.setOmitTermFreqAndPositions(true); f2.setIndexOptions(IndexOptions.DOCS_ONLY);
d.add(f2); d.add(f2);
for(int i=0;i<30;i++) for(int i=0;i<30;i++)
@ -128,10 +129,10 @@ public class TestOmitTf extends LuceneTestCase {
d = new Document(); d = new Document();
// Reverese // Reverese
f1.setOmitTermFreqAndPositions(true); f1.setIndexOptions(IndexOptions.DOCS_ONLY);
d.add(f1); d.add(f1);
f2.setOmitTermFreqAndPositions(false); f2.setIndexOptions(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS);
d.add(f2); d.add(f2);
for(int i=0;i<30;i++) for(int i=0;i<30;i++)
@ -144,8 +145,8 @@ public class TestOmitTf extends LuceneTestCase {
SegmentReader reader = getOnlySegmentReader(IndexReader.open(ram, false)); SegmentReader reader = getOnlySegmentReader(IndexReader.open(ram, false));
FieldInfos fi = reader.fieldInfos(); FieldInfos fi = reader.fieldInfos();
assertTrue("OmitTermFreqAndPositions field bit should be set.", fi.fieldInfo("f1").omitTermFreqAndPositions); assertEquals("OmitTermFreqAndPositions field bit should be set.", IndexOptions.DOCS_ONLY, fi.fieldInfo("f1").indexOptions);
assertTrue("OmitTermFreqAndPositions field bit should be set.", fi.fieldInfo("f2").omitTermFreqAndPositions); assertEquals("OmitTermFreqAndPositions field bit should be set.", IndexOptions.DOCS_ONLY, fi.fieldInfo("f2").indexOptions);
reader.close(); reader.close();
ram.close(); ram.close();
@ -176,7 +177,7 @@ public class TestOmitTf extends LuceneTestCase {
for(int i=0;i<5;i++) for(int i=0;i<5;i++)
writer.addDocument(d); writer.addDocument(d);
f2.setOmitTermFreqAndPositions(true); f2.setIndexOptions(IndexOptions.DOCS_ONLY);
for(int i=0;i<20;i++) for(int i=0;i<20;i++)
writer.addDocument(d); writer.addDocument(d);
@ -189,8 +190,8 @@ public class TestOmitTf extends LuceneTestCase {
SegmentReader reader = getOnlySegmentReader(IndexReader.open(ram, false)); SegmentReader reader = getOnlySegmentReader(IndexReader.open(ram, false));
FieldInfos fi = reader.fieldInfos(); FieldInfos fi = reader.fieldInfos();
assertTrue("OmitTermFreqAndPositions field bit should not be set.", !fi.fieldInfo("f1").omitTermFreqAndPositions); assertEquals("OmitTermFreqAndPositions field bit should not be set.", IndexOptions.DOCS_AND_FREQS_AND_POSITIONS, fi.fieldInfo("f1").indexOptions);
assertTrue("OmitTermFreqAndPositions field bit should be set.", fi.fieldInfo("f2").omitTermFreqAndPositions); assertEquals("OmitTermFreqAndPositions field bit should be set.", IndexOptions.DOCS_ONLY, fi.fieldInfo("f2").indexOptions);
reader.close(); reader.close();
ram.close(); ram.close();
@ -198,8 +199,10 @@ public class TestOmitTf extends LuceneTestCase {
private void assertNoPrx(Directory dir) throws Throwable { private void assertNoPrx(Directory dir) throws Throwable {
final String[] files = dir.listAll(); final String[] files = dir.listAll();
for(int i=0;i<files.length;i++) for(int i=0;i<files.length;i++) {
assertFalse(files[i].endsWith(".prx")); assertFalse(files[i].endsWith(".prx"));
assertFalse(files[i].endsWith(".pos"));
}
} }
// Verifies no *.prx exists when all fields omit term freq: // Verifies no *.prx exists when all fields omit term freq:
@ -213,8 +216,8 @@ public class TestOmitTf extends LuceneTestCase {
lmp.setUseCompoundFile(false); lmp.setUseCompoundFile(false);
Document d = new Document(); Document d = new Document();
Field f1 = newField("f1", "This field has term freqs", Field.Store.NO, Field.Index.ANALYZED); Field f1 = newField("f1", "This field has no term freqs", Field.Store.NO, Field.Index.ANALYZED);
f1.setOmitTermFreqAndPositions(true); f1.setIndexOptions(IndexOptions.DOCS_ONLY);
d.add(f1); d.add(f1);
for(int i=0;i<30;i++) for(int i=0;i<30;i++)
@ -223,7 +226,15 @@ public class TestOmitTf extends LuceneTestCase {
writer.commit(); writer.commit();
assertNoPrx(ram); assertNoPrx(ram);
// now add some documents with positions, and check there is no prox after optimization
d = new Document();
f1 = newField("f1", "This field has positions", Field.Store.NO, Field.Index.ANALYZED);
d.add(f1);
for(int i=0;i<30;i++)
writer.addDocument(d);
// force merge // force merge
writer.optimize(); writer.optimize();
// flush // flush
@ -253,7 +264,7 @@ public class TestOmitTf extends LuceneTestCase {
sb.append(term).append(" "); sb.append(term).append(" ");
String content = sb.toString(); String content = sb.toString();
Field noTf = newField("noTf", content + (i%2==0 ? "" : " notf"), Field.Store.NO, Field.Index.ANALYZED); Field noTf = newField("noTf", content + (i%2==0 ? "" : " notf"), Field.Store.NO, Field.Index.ANALYZED);
noTf.setOmitTermFreqAndPositions(true); noTf.setIndexOptions(IndexOptions.DOCS_ONLY);
d.add(noTf); d.add(noTf);
Field tf = newField("tf", content + (i%2==0 ? " tf" : ""), Field.Store.NO, Field.Index.ANALYZED); Field tf = newField("tf", content + (i%2==0 ? " tf" : ""), Field.Store.NO, Field.Index.ANALYZED);

View File

@ -21,6 +21,7 @@ import org.apache.lucene.document.Field.Index;
import org.apache.lucene.document.Field.Store; import org.apache.lucene.document.Field.Store;
import org.apache.lucene.index.CorruptIndexException; import org.apache.lucene.index.CorruptIndexException;
import org.apache.lucene.index.DocsEnum; import org.apache.lucene.index.DocsEnum;
import org.apache.lucene.index.FieldInfo.IndexOptions;
import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter; import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig; import org.apache.lucene.index.IndexWriterConfig;
@ -181,7 +182,7 @@ public class LuceneTaxonomyWriter implements TaxonomyWriter {
parentStreamField = new Field(Consts.FIELD_PAYLOADS, parentStream); parentStreamField = new Field(Consts.FIELD_PAYLOADS, parentStream);
parentStreamField.setOmitNorms(true); parentStreamField.setOmitNorms(true);
fullPathField = new Field(Consts.FULL, "", Store.YES, Index.NOT_ANALYZED_NO_NORMS); fullPathField = new Field(Consts.FULL, "", Store.YES, Index.NOT_ANALYZED_NO_NORMS);
fullPathField.setOmitTermFreqAndPositions(true); fullPathField.setIndexOptions(IndexOptions.DOCS_ONLY);
this.nextID = indexWriter.maxDoc(); this.nextID = indexWriter.maxDoc();

View File

@ -24,6 +24,7 @@ import org.apache.lucene.analysis.MockAnalyzer;
import org.apache.lucene.document.Document; import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field; import org.apache.lucene.document.Field;
import org.apache.lucene.document.NumericField; import org.apache.lucene.document.NumericField;
import org.apache.lucene.index.FieldInfo.IndexOptions;
import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.RandomIndexWriter; import org.apache.lucene.index.RandomIndexWriter;
import org.apache.lucene.index.Term; import org.apache.lucene.index.Term;
@ -395,7 +396,7 @@ public class TestGrouping extends LuceneTestCase {
} }
// So we can pull filter marking last doc in block: // So we can pull filter marking last doc in block:
final Field groupEnd = newField("groupend", "x", Field.Index.NOT_ANALYZED); final Field groupEnd = newField("groupend", "x", Field.Index.NOT_ANALYZED);
groupEnd.setOmitTermFreqAndPositions(true); groupEnd.setIndexOptions(IndexOptions.DOCS_ONLY);
groupEnd.setOmitNorms(true); groupEnd.setOmitNorms(true);
docs.get(docs.size()-1).add(groupEnd); docs.get(docs.size()-1).add(groupEnd);
// Add as a doc block: // Add as a doc block:

View File

@ -28,6 +28,7 @@ import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.document.Document; import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field; import org.apache.lucene.document.Field;
import org.apache.lucene.index.FieldInfo.IndexOptions;
import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter; import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig; import org.apache.lucene.index.IndexWriterConfig;
@ -618,7 +619,7 @@ public class SpellChecker implements java.io.Closeable {
// the word field is never queried on... its indexed so it can be quickly // the word field is never queried on... its indexed so it can be quickly
// checked for rebuild (and stored for retrieval). Doesn't need norms or TF/pos // checked for rebuild (and stored for retrieval). Doesn't need norms or TF/pos
Field f = new Field(F_WORD, text, Field.Store.YES, Field.Index.NOT_ANALYZED); Field f = new Field(F_WORD, text, Field.Store.YES, Field.Index.NOT_ANALYZED);
f.setOmitTermFreqAndPositions(true); f.setIndexOptions(IndexOptions.DOCS_ONLY);
f.setOmitNorms(true); f.setOmitNorms(true);
doc.add(f); // orig term doc.add(f); // orig term
addGram(text, doc, ng1, ng2); addGram(text, doc, ng1, ng2);
@ -636,7 +637,7 @@ public class SpellChecker implements java.io.Closeable {
if (i == 0) { if (i == 0) {
// only one term possible in the startXXField, TF/pos and norms aren't needed. // only one term possible in the startXXField, TF/pos and norms aren't needed.
Field startField = new Field("start" + ng, gram, Field.Store.NO, Field.Index.NOT_ANALYZED); Field startField = new Field("start" + ng, gram, Field.Store.NO, Field.Index.NOT_ANALYZED);
startField.setOmitTermFreqAndPositions(true); startField.setIndexOptions(IndexOptions.DOCS_ONLY);
startField.setOmitNorms(true); startField.setOmitNorms(true);
doc.add(startField); doc.add(startField);
} }
@ -645,7 +646,7 @@ public class SpellChecker implements java.io.Closeable {
if (end != null) { // may not be present if len==ng1 if (end != null) { // may not be present if len==ng1
// only one term possible in the endXXField, TF/pos and norms aren't needed. // only one term possible in the endXXField, TF/pos and norms aren't needed.
Field endField = new Field("end" + ng, end, Field.Store.NO, Field.Index.NOT_ANALYZED); Field endField = new Field("end" + ng, end, Field.Store.NO, Field.Index.NOT_ANALYZED);
endField.setOmitTermFreqAndPositions(true); endField.setIndexOptions(IndexOptions.DOCS_ONLY);
endField.setOmitNorms(true); endField.setOmitNorms(true);
doc.add(endField); doc.add(endField);
} }

View File

@ -321,6 +321,8 @@ New Features
before adding to the index. Fix a null pointer exception in logging before adding to the index. Fix a null pointer exception in logging
when there was no unique key. (David Smiley via yonik) when there was no unique key. (David Smiley via yonik)
* LUCENE-2048: Added omitPositions to the schema, so you can omit position
information while still indexing term frequencies. (rmuir)
Optimizations Optimizations
---------------------- ----------------------

View File

@ -35,6 +35,7 @@ import org.slf4j.LoggerFactory;
import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.document.Document; import org.apache.lucene.document.Document;
import org.apache.lucene.document.Fieldable; import org.apache.lucene.document.Fieldable;
import org.apache.lucene.index.FieldInfo.IndexOptions;
import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.Term; import org.apache.lucene.index.Term;
import org.apache.lucene.index.Fields; import org.apache.lucene.index.Fields;
@ -202,7 +203,10 @@ public class LukeRequestHandler extends RequestHandlerBase
flags.append( (f != null && f.storeTermOffsets() ) ? FieldFlag.TERM_VECTOR_OFFSET.getAbbreviation() : '-' ); flags.append( (f != null && f.storeTermOffsets() ) ? FieldFlag.TERM_VECTOR_OFFSET.getAbbreviation() : '-' );
flags.append( (f != null && f.storeTermPositions() ) ? FieldFlag.TERM_VECTOR_POSITION.getAbbreviation() : '-' ); flags.append( (f != null && f.storeTermPositions() ) ? FieldFlag.TERM_VECTOR_POSITION.getAbbreviation() : '-' );
flags.append( (f != null && f.omitNorms()) ? FieldFlag.OMIT_NORMS.getAbbreviation() : '-' ); flags.append( (f != null && f.omitNorms()) ? FieldFlag.OMIT_NORMS.getAbbreviation() : '-' );
flags.append( (f != null && f.omitTf()) ? FieldFlag.OMIT_TF.getAbbreviation() : '-' ); flags.append( (f != null &&
f.indexOptions() == IndexOptions.DOCS_ONLY) ? FieldFlag.OMIT_TF.getAbbreviation() : '-' );
flags.append( (f != null &&
f.indexOptions() == IndexOptions.DOCS_AND_FREQS) ? FieldFlag.OMIT_POSITIONS.getAbbreviation() : '-' );
flags.append( (lazy) ? FieldFlag.LAZY.getAbbreviation() : '-' ); flags.append( (lazy) ? FieldFlag.LAZY.getAbbreviation() : '-' );
flags.append( (binary) ? FieldFlag.BINARY.getAbbreviation() : '-' ); flags.append( (binary) ? FieldFlag.BINARY.getAbbreviation() : '-' );
flags.append( (f != null && f.sortMissingFirst() ) ? FieldFlag.SORT_MISSING_FIRST.getAbbreviation() : '-' ); flags.append( (f != null && f.sortMissingFirst() ) ? FieldFlag.SORT_MISSING_FIRST.getAbbreviation() : '-' );

View File

@ -47,13 +47,14 @@ public abstract class FieldProperties {
protected final static int SORT_MISSING_LAST = 0x00000800; protected final static int SORT_MISSING_LAST = 0x00000800;
protected final static int REQUIRED = 0x00001000; protected final static int REQUIRED = 0x00001000;
protected final static int OMIT_POSITIONS = 0x00002000;
static final String[] propertyNames = { static final String[] propertyNames = {
"indexed", "tokenized", "stored", "indexed", "tokenized", "stored",
"binary", "omitNorms", "omitTermFreqAndPositions", "binary", "omitNorms", "omitTermFreqAndPositions",
"termVectors", "termPositions", "termOffsets", "termVectors", "termPositions", "termOffsets",
"multiValued", "multiValued",
"sortMissingFirst","sortMissingLast","required" "sortMissingFirst","sortMissingLast","required", "omitPositions"
}; };
static final Map<String,Integer> propertyMap = new HashMap<String,Integer>(); static final Map<String,Integer> propertyMap = new HashMap<String,Integer>();

View File

@ -23,6 +23,7 @@ import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.apache.lucene.analysis.tokenattributes.OffsetAttribute; import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
import org.apache.lucene.document.Field; import org.apache.lucene.document.Field;
import org.apache.lucene.document.Fieldable; import org.apache.lucene.document.Fieldable;
import org.apache.lucene.index.FieldInfo.IndexOptions;
import org.apache.lucene.index.Term; import org.apache.lucene.index.Term;
import org.apache.lucene.queries.function.ValueSource; import org.apache.lucene.queries.function.ValueSource;
import org.apache.lucene.search.Query; import org.apache.lucene.search.Query;
@ -251,7 +252,7 @@ public abstract class FieldType extends FieldProperties {
return createField(field.getName(), val, getFieldStore(field, val), return createField(field.getName(), val, getFieldStore(field, val),
getFieldIndex(field, val), getFieldTermVec(field, val), field.omitNorms(), getFieldIndex(field, val), getFieldTermVec(field, val), field.omitNorms(),
field.omitTf(), boost); field.indexOptions(), boost);
} }
@ -269,14 +270,14 @@ public abstract class FieldType extends FieldProperties {
* @return the {@link org.apache.lucene.document.Fieldable}. * @return the {@link org.apache.lucene.document.Fieldable}.
*/ */
protected Fieldable createField(String name, String val, Field.Store storage, Field.Index index, protected Fieldable createField(String name, String val, Field.Store storage, Field.Index index,
Field.TermVector vec, boolean omitNorms, boolean omitTFPos, float boost){ Field.TermVector vec, boolean omitNorms, IndexOptions options, float boost){
Field f = new Field(name, Field f = new Field(name,
val, val,
storage, storage,
index, index,
vec); vec);
f.setOmitNorms(omitNorms); f.setOmitNorms(omitNorms);
f.setOmitTermFreqAndPositions(omitTFPos); f.setIndexOptions(options);
f.setBoost(boost); f.setBoost(boost);
return f; return f;
} }

View File

@ -18,6 +18,7 @@ package org.apache.solr.schema;
import org.apache.lucene.document.Field; import org.apache.lucene.document.Field;
import org.apache.lucene.document.Fieldable; import org.apache.lucene.document.Fieldable;
import org.apache.lucene.index.FieldInfo.IndexOptions;
import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexReader.AtomicReaderContext; import org.apache.lucene.index.IndexReader.AtomicReaderContext;
import org.apache.lucene.queries.function.DocValues; import org.apache.lucene.queries.function.DocValues;
@ -77,7 +78,7 @@ public class LatLonType extends AbstractSubTypeFieldType implements SpatialQuery
if (field.stored()) { if (field.stored()) {
f[f.length - 1] = createField(field.getName(), externalVal, f[f.length - 1] = createField(field.getName(), externalVal,
getFieldStore(field, externalVal), Field.Index.NO, Field.TermVector.NO, getFieldStore(field, externalVal), Field.Index.NO, Field.TermVector.NO,
false, false, boost); false, IndexOptions.DOCS_AND_FREQS_AND_POSITIONS, boost);
} }
return f; return f;
} }

View File

@ -19,6 +19,7 @@ package org.apache.solr.schema;
import org.apache.lucene.document.Field; import org.apache.lucene.document.Field;
import org.apache.lucene.document.Fieldable; import org.apache.lucene.document.Fieldable;
import org.apache.lucene.index.FieldInfo.IndexOptions;
import org.apache.lucene.queries.function.ValueSource; import org.apache.lucene.queries.function.ValueSource;
import org.apache.lucene.queries.function.valuesource.VectorValueSource; import org.apache.lucene.queries.function.valuesource.VectorValueSource;
import org.apache.lucene.search.BooleanClause; import org.apache.lucene.search.BooleanClause;
@ -90,7 +91,7 @@ public class PointType extends CoordinateFieldType implements SpatialQueryable {
String storedVal = externalVal; // normalize or not? String storedVal = externalVal; // normalize or not?
f[f.length - 1] = createField(field.getName(), storedVal, f[f.length - 1] = createField(field.getName(), storedVal,
getFieldStore(field, storedVal), Field.Index.NO, Field.TermVector.NO, getFieldStore(field, storedVal), Field.Index.NO, Field.TermVector.NO,
false, false, boost); false, IndexOptions.DOCS_AND_FREQS_AND_POSITIONS, boost);
} }
return f; return f;

View File

@ -20,6 +20,7 @@ package org.apache.solr.schema;
import org.apache.solr.common.SolrException; import org.apache.solr.common.SolrException;
import org.apache.solr.common.SolrException.ErrorCode; import org.apache.solr.common.SolrException.ErrorCode;
import org.apache.lucene.document.Fieldable; import org.apache.lucene.document.Fieldable;
import org.apache.lucene.index.FieldInfo.IndexOptions;
import org.apache.lucene.search.SortField; import org.apache.lucene.search.SortField;
import org.apache.solr.search.QParser; import org.apache.solr.search.QParser;
@ -81,7 +82,17 @@ public final class SchemaField extends FieldProperties {
public boolean storeTermPositions() { return (properties & STORE_TERMPOSITIONS)!=0; } public boolean storeTermPositions() { return (properties & STORE_TERMPOSITIONS)!=0; }
public boolean storeTermOffsets() { return (properties & STORE_TERMOFFSETS)!=0; } public boolean storeTermOffsets() { return (properties & STORE_TERMOFFSETS)!=0; }
public boolean omitNorms() { return (properties & OMIT_NORMS)!=0; } public boolean omitNorms() { return (properties & OMIT_NORMS)!=0; }
public boolean omitTf() { return (properties & OMIT_TF_POSITIONS)!=0; }
public IndexOptions indexOptions() {
if ((properties & OMIT_TF_POSITIONS) != 0) {
return IndexOptions.DOCS_ONLY;
} else if ((properties & OMIT_POSITIONS) != 0) {
return IndexOptions.DOCS_AND_FREQS;
} else {
return IndexOptions.DOCS_AND_FREQS_AND_POSITIONS;
}
}
public boolean multiValued() { return (properties & MULTIVALUED)!=0; } public boolean multiValued() { return (properties & MULTIVALUED)!=0; }
public boolean sortMissingFirst() { return (properties & SORT_MISSING_FIRST)!=0; } public boolean sortMissingFirst() { return (properties & SORT_MISSING_FIRST)!=0; }
public boolean sortMissingLast() { return (properties & SORT_MISSING_LAST)!=0; } public boolean sortMissingLast() { return (properties & SORT_MISSING_LAST)!=0; }
@ -215,7 +226,7 @@ public final class SchemaField extends FieldProperties {
} }
if (on(falseProps,INDEXED)) { if (on(falseProps,INDEXED)) {
int pp = (INDEXED | OMIT_NORMS | OMIT_TF_POSITIONS int pp = (INDEXED | OMIT_NORMS | OMIT_TF_POSITIONS | OMIT_POSITIONS
| STORE_TERMVECTORS | STORE_TERMPOSITIONS | STORE_TERMOFFSETS | STORE_TERMVECTORS | STORE_TERMPOSITIONS | STORE_TERMOFFSETS
| SORT_MISSING_FIRST | SORT_MISSING_LAST); | SORT_MISSING_FIRST | SORT_MISSING_LAST);
if (on(pp,trueProps)) { if (on(pp,trueProps)) {
@ -225,6 +236,14 @@ public final class SchemaField extends FieldProperties {
} }
if (on(falseProps,OMIT_TF_POSITIONS)) {
int pp = (OMIT_POSITIONS | OMIT_TF_POSITIONS);
if (on(pp, trueProps)) {
throw new RuntimeException("SchemaField: " + name + " conflicting indexed field options:" + props);
}
p &= ~pp;
}
if (on(falseProps,STORE_TERMVECTORS)) { if (on(falseProps,STORE_TERMVECTORS)) {
int pp = (STORE_TERMVECTORS | STORE_TERMPOSITIONS | STORE_TERMOFFSETS); int pp = (STORE_TERMVECTORS | STORE_TERMPOSITIONS | STORE_TERMOFFSETS);
if (on(pp,trueProps)) { if (on(pp,trueProps)) {

View File

@ -19,6 +19,7 @@ package org.apache.solr.schema;
import org.apache.lucene.document.Fieldable; import org.apache.lucene.document.Fieldable;
import org.apache.lucene.document.Field; import org.apache.lucene.document.Field;
import org.apache.lucene.document.NumericField; import org.apache.lucene.document.NumericField;
import org.apache.lucene.index.FieldInfo.IndexOptions;
import org.apache.lucene.queries.function.ValueSource; import org.apache.lucene.queries.function.ValueSource;
import org.apache.lucene.queries.function.valuesource.DoubleFieldSource; import org.apache.lucene.queries.function.valuesource.DoubleFieldSource;
import org.apache.lucene.queries.function.valuesource.FloatFieldSource; import org.apache.lucene.queries.function.valuesource.FloatFieldSource;
@ -521,7 +522,7 @@ public class TrieField extends FieldType {
} }
f.setOmitNorms(field.omitNorms()); f.setOmitNorms(field.omitNorms());
f.setOmitTermFreqAndPositions(field.omitTf()); f.setIndexOptions(field.indexOptions());
f.setBoost(boost); f.setBoost(boost);
return f; return f;
} }

View File

@ -417,6 +417,13 @@
<tokenizer class="solr.WhitespaceTokenizerFactory"/> <tokenizer class="solr.WhitespaceTokenizerFactory"/>
</analyzer> </analyzer>
</fieldType> </fieldType>
<!-- omitPositions example -->
<fieldType name="nopositions" class="solr.TextField" omitPositions="true">
<analyzer>
<tokenizer class="solr.WhitespaceTokenizerFactory"/>
</analyzer>
</fieldType>
</types> </types>
@ -525,6 +532,8 @@
<field name="sim1text" type="sim1" indexed="true" stored="true"/> <field name="sim1text" type="sim1" indexed="true" stored="true"/>
<field name="sim2text" type="sim2" indexed="true" stored="true"/> <field name="sim2text" type="sim2" indexed="true" stored="true"/>
<field name="sim3text" type="sim3" indexed="true" stored="true"/> <field name="sim3text" type="sim3" indexed="true" stored="true"/>
<field name="nopositionstext" type="nopositions" indexed="true" stored="true"/>
<field name="tlong" type="tlong" indexed="true" stored="true" /> <field name="tlong" type="tlong" indexed="true" stored="true" />

View File

@ -0,0 +1,60 @@
package org.apache.solr.schema;
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import org.apache.solr.SolrTestCaseJ4;
import org.junit.BeforeClass;
public class TestOmitPositions extends SolrTestCaseJ4 {
@BeforeClass
public static void beforeClass() throws Exception {
initCore("solrconfig.xml","schema.xml");
// add some docs
assertU(adoc("id", "1", "nopositionstext", "this is a test this is only a test", "text", "just another test"));
assertU(adoc("id", "2", "nopositionstext", "test test test test test test test test test test test test test", "text", "have a nice day"));
assertU(commit());
}
public void testFrequencies() {
// doc 2 should be ranked above doc 1
assertQ("term query: ",
req("fl", "id", "q", "nopositionstext:test"),
"//*[@numFound='2']",
"//result/doc[1]/int[@name='id'][.=2]",
"//result/doc[2]/int[@name='id'][.=1]"
);
}
public void testPositions() {
// no results should be found:
// lucene 3.x: silent failure
// lucene 4.x: illegal state exception, field was indexed without positions
ignoreException("was indexed without position data");
try {
assertQ("phrase query: ",
req("fl", "id", "q", "nopositionstext:\"test test\""),
"//*[@numFound='0']"
);
} catch (Exception expected) {
assertTrue(expected.getCause() instanceof IllegalStateException);
// in lucene 4.0, queries don't silently fail
}
resetExceptionIgnores();
}
}

View File

@ -31,6 +31,7 @@ public enum FieldFlag {
TERM_VECTOR_POSITION('p', "Store Position With TermVector"), TERM_VECTOR_POSITION('p', "Store Position With TermVector"),
OMIT_NORMS('O', "Omit Norms"), OMIT_NORMS('O', "Omit Norms"),
OMIT_TF('F', "Omit Tf"), OMIT_TF('F', "Omit Tf"),
OMIT_POSITIONS('P', "Omit Positions"),
LAZY('L', "Lazy"), LAZY('L', "Lazy"),
BINARY('B', "Binary"), BINARY('B', "Binary"),
SORT_MISSING_FIRST('f', "Sort Missing First"), SORT_MISSING_FIRST('f', "Sort Missing First"),