LUCENE-5270: add Terms.hasFreqs

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1531005 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Michael McCandless 2013-10-10 14:42:54 +00:00
parent 2ab0051551
commit d09bb02f91
23 changed files with 152 additions and 9 deletions

View File

@ -103,6 +103,9 @@ New Features
the analysis chain directly, or to make it easier to implement
query parsers. (Robert Muir, Uwe Schindler)
* LUCENE-5270: Add Terms.hasFreqs, to determine whether a given field
indexed per-doc term frequencies. (Mike McCandless)
Bug Fixes
* LUCENE-4998: Fixed a few places to pass IOContext.READONCE instead

View File

@ -248,6 +248,11 @@ public class BlockTermsReader extends FieldsProducer {
return new SegmentTermsEnum();
}
@Override
public boolean hasFreqs() {
return fieldInfo.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS) >= 0;
}
@Override
public boolean hasOffsets() {
return fieldInfo.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS) >= 0;

View File

@ -276,6 +276,11 @@ public final class BloomFilteringPostingsFormat extends PostingsFormat {
return delegateTerms.getDocCount();
}
@Override
public boolean hasFreqs() {
return delegateTerms.hasFreqs();
}
@Override
public boolean hasOffsets() {
return delegateTerms.hasOffsets();

View File

@ -659,6 +659,11 @@ public final class DirectPostingsFormat extends PostingsFormat {
return docCount;
}
@Override
public boolean hasFreqs() {
return hasFreq;
}
@Override
public boolean hasOffsets() {
return hasOffsets;

View File

@ -175,6 +175,11 @@ public class FSTTermsReader extends FieldsProducer {
this.dict = new FST<FSTTermOutputs.TermData>(in, new FSTTermOutputs(fieldInfo, longsSize));
}
@Override
public boolean hasFreqs() {
return fieldInfo.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS) >= 0;
}
@Override
public boolean hasOffsets() {
return fieldInfo.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS) >= 0;

View File

@ -816,6 +816,11 @@ public final class MemoryPostingsFormat extends PostingsFormat {
return new FSTTermsEnum(field, fst);
}
@Override
public boolean hasFreqs() {
return field.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS) >= 0;
}
@Override
public boolean hasOffsets() {
return field.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS) >= 0;

View File

@ -603,6 +603,11 @@ class SimpleTextFieldsReader extends FieldsProducer {
return docCount;
}
@Override
public boolean hasFreqs() {
return fieldInfo.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS) >= 0;
}
@Override
public boolean hasOffsets() {
return fieldInfo.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS) >= 0;

View File

@ -23,7 +23,6 @@ import java.io.IOException;
import org.apache.lucene.codecs.FieldsConsumer;
import org.apache.lucene.index.DocsAndPositionsEnum;
import org.apache.lucene.index.DocsEnum;
import org.apache.lucene.index.FieldInfo.IndexOptions;
import org.apache.lucene.index.FieldInfo;
import org.apache.lucene.index.FieldInfos;
import org.apache.lucene.index.Fields;
@ -85,10 +84,7 @@ class SimpleTextFieldsWriter extends FieldsConsumer implements Closeable {
boolean wroteField = false;
boolean hasPositions = terms.hasPositions();
// TODO: shouldn't we add hasFreqs to Terms?
// then we don't need FieldInfos here?
boolean hasFreqs = fieldInfo.getIndexOptions().compareTo(IndexOptions.DOCS_ONLY) > 0;
boolean hasFreqs = terms.hasFreqs();
boolean hasPayloads = fieldInfo.hasPayloads();
boolean hasOffsets = terms.hasOffsets();

View File

@ -290,6 +290,11 @@ public class SimpleTextTermVectorsReader extends TermVectorsReader {
return 1;
}
@Override
public boolean hasFreqs() {
return true;
}
@Override
public boolean hasOffsets() {
return hasOffsets;

View File

@ -496,6 +496,11 @@ public class BlockTreeTermsReader extends FieldsProducer {
return new SegmentTermsEnum().computeBlockStats();
}
@Override
public boolean hasFreqs() {
return fieldInfo.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS) >= 0;
}
@Override
public boolean hasOffsets() {
return fieldInfo.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS) >= 0;

View File

@ -740,6 +740,11 @@ public final class CompressingTermVectorsReader extends TermVectorsReader implem
return 1;
}
@Override
public boolean hasFreqs() {
return true;
}
@Override
public boolean hasOffsets() {
return (flags & OFFSETS) != 0;

View File

@ -359,6 +359,11 @@ public class Lucene40TermVectorsReader extends TermVectorsReader implements Clos
return 1;
}
@Override
public boolean hasFreqs() {
return true;
}
@Override
public boolean hasOffsets() {
return storeOffsets;

View File

@ -744,10 +744,40 @@ public class CheckIndex {
continue;
}
final boolean hasFreqs = terms.hasFreqs();
final boolean hasPositions = terms.hasPositions();
final boolean hasPayloads = terms.hasPayloads();
final boolean hasOffsets = terms.hasOffsets();
// term vectors cannot omit TF
final boolean hasFreqs = isVectors || fieldInfo.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS) >= 0;
// term vectors cannot omit TF:
final boolean expectedHasFreqs = (isVectors || fieldInfo.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS) >= 0);
if (hasFreqs != expectedHasFreqs) {
throw new RuntimeException("field \"" + field + "\" should have hasFreqs=" + expectedHasFreqs + " but got " + hasFreqs);
}
if (hasFreqs == false) {
if (terms.getSumTotalTermFreq() != -1) {
throw new RuntimeException("field \"" + field + "\" hasFreqs is false, but Terms.getSumTotalTermFreq()=" + terms.getSumTotalTermFreq() + " (should be -1)");
}
}
if (!isVectors) {
final boolean expectedHasPositions = fieldInfo.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) >= 0;
if (hasPositions != expectedHasPositions) {
throw new RuntimeException("field \"" + field + "\" should have hasPositions=" + expectedHasPositions + " but got " + hasPositions);
}
final boolean expectedHasPayloads = fieldInfo.hasPayloads();
if (hasPayloads != expectedHasPayloads) {
throw new RuntimeException("field \"" + field + "\" should have hasPayloads=" + expectedHasPayloads + " but got " + hasPayloads);
}
final boolean expectedHasOffsets = fieldInfo.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS) >= 0;
if (hasOffsets != expectedHasOffsets) {
throw new RuntimeException("field \"" + field + "\" should have hasOffsets=" + expectedHasOffsets + " but got " + hasOffsets);
}
}
final TermsEnum termsEnum = terms.iterator(null);
@ -787,6 +817,12 @@ public class CheckIndex {
docs = termsEnum.docs(liveDocs, docs);
postings = termsEnum.docsAndPositions(liveDocs, postings);
if (hasFreqs == false) {
if (termsEnum.totalTermFreq() != -1) {
throw new RuntimeException("field \"" + field + "\" hasFreqs is false, but TermsEnum.totalTermFreq()=" + termsEnum.totalTermFreq() + " (should be -1)");
}
}
if (hasOrd) {
long ord = -1;
@ -829,6 +865,13 @@ public class CheckIndex {
}
status.totPos += freq;
totalTermFreq += freq;
} else {
// When a field didn't index freq, it must
// consistently "lie" and pretend that freq was
// 1:
if (docs2.freq() != 1) {
throw new RuntimeException("term " + term + ": doc " + doc + ": freq " + freq + " != 1 when Terms.hasFreqs() is false");
}
}
docCount++;

View File

@ -118,6 +118,11 @@ public class FilterAtomicReader extends AtomicReader {
return in.getDocCount();
}
@Override
public boolean hasFreqs() {
return in.hasFreqs();
}
@Override
public boolean hasOffsets() {
return in.hasOffsets();

View File

@ -103,6 +103,11 @@ class FreqProxFields extends Fields {
throw new UnsupportedOperationException();
}
@Override
public boolean hasFreqs() {
return terms.fieldInfo.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS) >= 0;
}
@Override
public boolean hasOffsets() {
// NOTE: the in-memory buffer may have indexed offsets

View File

@ -35,6 +35,7 @@ import org.apache.lucene.util.automaton.CompiledAutomaton;
public final class MultiTerms extends Terms {
private final Terms[] subs;
private final ReaderSlice[] subSlices;
private final boolean hasFreqs;
private final boolean hasOffsets;
private final boolean hasPositions;
private final boolean hasPayloads;
@ -50,15 +51,18 @@ public final class MultiTerms extends Terms {
this.subSlices = subSlices;
assert subs.length > 0 : "inefficient: don't use MultiTerms over one sub";
boolean _hasFreqs = true;
boolean _hasOffsets = true;
boolean _hasPositions = true;
boolean _hasPayloads = false;
for(int i=0;i<subs.length;i++) {
_hasFreqs &= subs[i].hasFreqs();
_hasOffsets &= subs[i].hasOffsets();
_hasPositions &= subs[i].hasPositions();
_hasPayloads |= subs[i].hasPayloads();
}
hasFreqs = _hasFreqs;
hasOffsets = _hasOffsets;
hasPositions = _hasPositions;
hasPayloads = hasPositions && _hasPayloads; // if all subs have pos, and at least one has payloads.
@ -143,6 +147,11 @@ public final class MultiTerms extends Terms {
return sum;
}
@Override
public boolean hasFreqs() {
return hasFreqs;
}
@Override
public boolean hasOffsets() {
return hasOffsets;

View File

@ -102,8 +102,10 @@ public abstract class Terms {
* into account. */
public abstract int getDocCount() throws IOException;
// TODO: shouldn't we have hasFreq() as well?
/** Returns true if documents in this field store
* per-document term frequency ({@link DocsEnum#freq}). */
public abstract boolean hasFreqs();
/** Returns true if documents in this field store offsets. */
public abstract boolean hasOffsets();

View File

@ -113,6 +113,11 @@ public final class DocTermOrdsRewriteMethod extends MultiTermQuery.RewriteMethod
return -1;
}
@Override
public boolean hasFreqs() {
return false;
}
@Override
public boolean hasOffsets() {
return false;

View File

@ -113,6 +113,11 @@ public final class FieldCacheRewriteMethod extends MultiTermQuery.RewriteMethod
return -1;
}
@Override
public boolean hasFreqs() {
return false;
}
@Override
public boolean hasOffsets() {
return false;

View File

@ -690,6 +690,11 @@ public class TestCodecs extends LuceneTestCase {
throw new UnsupportedOperationException();
}
@Override
public boolean hasFreqs() {
return fieldData.fieldInfo.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS) >= 0;
}
@Override
public boolean hasOffsets() {
return fieldData.fieldInfo.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS) >= 0;

View File

@ -824,6 +824,11 @@ public class MemoryIndex {
return info.terms.size() > 0 ? 1 : 0;
}
@Override
public boolean hasFreqs() {
return true;
}
@Override
public boolean hasOffsets() {
return storeOffsets;

View File

@ -144,6 +144,11 @@ public final class RAMOnlyPostingsFormat extends PostingsFormat {
return new RAMTermsEnum(RAMOnlyPostingsFormat.RAMField.this);
}
@Override
public boolean hasFreqs() {
return info.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS) >= 0;
}
@Override
public boolean hasOffsets() {
return info.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS) >= 0;

View File

@ -526,6 +526,11 @@ public abstract class BasePostingsFormatTestCase extends LuceneTestCase {
throw new UnsupportedOperationException();
}
@Override
public boolean hasFreqs() {
return fieldInfo.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS) >= 0;
}
@Override
public boolean hasOffsets() {
return fieldInfo.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS) >= 0;