mirror of https://github.com/apache/lucene.git
LUCENE-5270: add Terms.hasFreqs
git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1531005 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
2ab0051551
commit
d09bb02f91
|
@ -103,6 +103,9 @@ New Features
|
|||
the analysis chain directly, or to make it easier to implement
|
||||
query parsers. (Robert Muir, Uwe Schindler)
|
||||
|
||||
* LUCENE-5270: Add Terms.hasFreqs, to determine whether a given field
|
||||
indexed per-doc term frequencies. (Mike McCandless)
|
||||
|
||||
Bug Fixes
|
||||
|
||||
* LUCENE-4998: Fixed a few places to pass IOContext.READONCE instead
|
||||
|
|
|
@ -248,6 +248,11 @@ public class BlockTermsReader extends FieldsProducer {
|
|||
return new SegmentTermsEnum();
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean hasFreqs() {
|
||||
return fieldInfo.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS) >= 0;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean hasOffsets() {
|
||||
return fieldInfo.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS) >= 0;
|
||||
|
|
|
@ -276,6 +276,11 @@ public final class BloomFilteringPostingsFormat extends PostingsFormat {
|
|||
return delegateTerms.getDocCount();
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean hasFreqs() {
|
||||
return delegateTerms.hasFreqs();
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean hasOffsets() {
|
||||
return delegateTerms.hasOffsets();
|
||||
|
|
|
@ -659,6 +659,11 @@ public final class DirectPostingsFormat extends PostingsFormat {
|
|||
return docCount;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean hasFreqs() {
|
||||
return hasFreq;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean hasOffsets() {
|
||||
return hasOffsets;
|
||||
|
|
|
@ -175,6 +175,11 @@ public class FSTTermsReader extends FieldsProducer {
|
|||
this.dict = new FST<FSTTermOutputs.TermData>(in, new FSTTermOutputs(fieldInfo, longsSize));
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean hasFreqs() {
|
||||
return fieldInfo.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS) >= 0;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean hasOffsets() {
|
||||
return fieldInfo.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS) >= 0;
|
||||
|
|
|
@ -816,6 +816,11 @@ public final class MemoryPostingsFormat extends PostingsFormat {
|
|||
return new FSTTermsEnum(field, fst);
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean hasFreqs() {
|
||||
return field.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS) >= 0;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean hasOffsets() {
|
||||
return field.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS) >= 0;
|
||||
|
|
|
@ -603,6 +603,11 @@ class SimpleTextFieldsReader extends FieldsProducer {
|
|||
return docCount;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean hasFreqs() {
|
||||
return fieldInfo.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS) >= 0;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean hasOffsets() {
|
||||
return fieldInfo.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS) >= 0;
|
||||
|
|
|
@ -23,7 +23,6 @@ import java.io.IOException;
|
|||
import org.apache.lucene.codecs.FieldsConsumer;
|
||||
import org.apache.lucene.index.DocsAndPositionsEnum;
|
||||
import org.apache.lucene.index.DocsEnum;
|
||||
import org.apache.lucene.index.FieldInfo.IndexOptions;
|
||||
import org.apache.lucene.index.FieldInfo;
|
||||
import org.apache.lucene.index.FieldInfos;
|
||||
import org.apache.lucene.index.Fields;
|
||||
|
@ -85,10 +84,7 @@ class SimpleTextFieldsWriter extends FieldsConsumer implements Closeable {
|
|||
boolean wroteField = false;
|
||||
|
||||
boolean hasPositions = terms.hasPositions();
|
||||
|
||||
// TODO: shouldn't we add hasFreqs to Terms?
|
||||
// then we don't need FieldInfos here?
|
||||
boolean hasFreqs = fieldInfo.getIndexOptions().compareTo(IndexOptions.DOCS_ONLY) > 0;
|
||||
boolean hasFreqs = terms.hasFreqs();
|
||||
boolean hasPayloads = fieldInfo.hasPayloads();
|
||||
boolean hasOffsets = terms.hasOffsets();
|
||||
|
||||
|
|
|
@ -290,6 +290,11 @@ public class SimpleTextTermVectorsReader extends TermVectorsReader {
|
|||
return 1;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean hasFreqs() {
|
||||
return true;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean hasOffsets() {
|
||||
return hasOffsets;
|
||||
|
|
|
@ -496,6 +496,11 @@ public class BlockTreeTermsReader extends FieldsProducer {
|
|||
return new SegmentTermsEnum().computeBlockStats();
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean hasFreqs() {
|
||||
return fieldInfo.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS) >= 0;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean hasOffsets() {
|
||||
return fieldInfo.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS) >= 0;
|
||||
|
|
|
@ -740,6 +740,11 @@ public final class CompressingTermVectorsReader extends TermVectorsReader implem
|
|||
return 1;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean hasFreqs() {
|
||||
return true;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean hasOffsets() {
|
||||
return (flags & OFFSETS) != 0;
|
||||
|
|
|
@ -359,6 +359,11 @@ public class Lucene40TermVectorsReader extends TermVectorsReader implements Clos
|
|||
return 1;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean hasFreqs() {
|
||||
return true;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean hasOffsets() {
|
||||
return storeOffsets;
|
||||
|
|
|
@ -744,10 +744,40 @@ public class CheckIndex {
|
|||
continue;
|
||||
}
|
||||
|
||||
final boolean hasFreqs = terms.hasFreqs();
|
||||
final boolean hasPositions = terms.hasPositions();
|
||||
final boolean hasPayloads = terms.hasPayloads();
|
||||
final boolean hasOffsets = terms.hasOffsets();
|
||||
// term vectors cannot omit TF
|
||||
final boolean hasFreqs = isVectors || fieldInfo.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS) >= 0;
|
||||
|
||||
// term vectors cannot omit TF:
|
||||
final boolean expectedHasFreqs = (isVectors || fieldInfo.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS) >= 0);
|
||||
|
||||
if (hasFreqs != expectedHasFreqs) {
|
||||
throw new RuntimeException("field \"" + field + "\" should have hasFreqs=" + expectedHasFreqs + " but got " + hasFreqs);
|
||||
}
|
||||
|
||||
if (hasFreqs == false) {
|
||||
if (terms.getSumTotalTermFreq() != -1) {
|
||||
throw new RuntimeException("field \"" + field + "\" hasFreqs is false, but Terms.getSumTotalTermFreq()=" + terms.getSumTotalTermFreq() + " (should be -1)");
|
||||
}
|
||||
}
|
||||
|
||||
if (!isVectors) {
|
||||
final boolean expectedHasPositions = fieldInfo.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) >= 0;
|
||||
if (hasPositions != expectedHasPositions) {
|
||||
throw new RuntimeException("field \"" + field + "\" should have hasPositions=" + expectedHasPositions + " but got " + hasPositions);
|
||||
}
|
||||
|
||||
final boolean expectedHasPayloads = fieldInfo.hasPayloads();
|
||||
if (hasPayloads != expectedHasPayloads) {
|
||||
throw new RuntimeException("field \"" + field + "\" should have hasPayloads=" + expectedHasPayloads + " but got " + hasPayloads);
|
||||
}
|
||||
|
||||
final boolean expectedHasOffsets = fieldInfo.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS) >= 0;
|
||||
if (hasOffsets != expectedHasOffsets) {
|
||||
throw new RuntimeException("field \"" + field + "\" should have hasOffsets=" + expectedHasOffsets + " but got " + hasOffsets);
|
||||
}
|
||||
}
|
||||
|
||||
final TermsEnum termsEnum = terms.iterator(null);
|
||||
|
||||
|
@ -787,6 +817,12 @@ public class CheckIndex {
|
|||
|
||||
docs = termsEnum.docs(liveDocs, docs);
|
||||
postings = termsEnum.docsAndPositions(liveDocs, postings);
|
||||
|
||||
if (hasFreqs == false) {
|
||||
if (termsEnum.totalTermFreq() != -1) {
|
||||
throw new RuntimeException("field \"" + field + "\" hasFreqs is false, but TermsEnum.totalTermFreq()=" + termsEnum.totalTermFreq() + " (should be -1)");
|
||||
}
|
||||
}
|
||||
|
||||
if (hasOrd) {
|
||||
long ord = -1;
|
||||
|
@ -829,6 +865,13 @@ public class CheckIndex {
|
|||
}
|
||||
status.totPos += freq;
|
||||
totalTermFreq += freq;
|
||||
} else {
|
||||
// When a field didn't index freq, it must
|
||||
// consistently "lie" and pretend that freq was
|
||||
// 1:
|
||||
if (docs2.freq() != 1) {
|
||||
throw new RuntimeException("term " + term + ": doc " + doc + ": freq " + freq + " != 1 when Terms.hasFreqs() is false");
|
||||
}
|
||||
}
|
||||
docCount++;
|
||||
|
||||
|
|
|
@ -118,6 +118,11 @@ public class FilterAtomicReader extends AtomicReader {
|
|||
return in.getDocCount();
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean hasFreqs() {
|
||||
return in.hasFreqs();
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean hasOffsets() {
|
||||
return in.hasOffsets();
|
||||
|
|
|
@ -103,6 +103,11 @@ class FreqProxFields extends Fields {
|
|||
throw new UnsupportedOperationException();
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean hasFreqs() {
|
||||
return terms.fieldInfo.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS) >= 0;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean hasOffsets() {
|
||||
// NOTE: the in-memory buffer may have indexed offsets
|
||||
|
|
|
@ -35,6 +35,7 @@ import org.apache.lucene.util.automaton.CompiledAutomaton;
|
|||
public final class MultiTerms extends Terms {
|
||||
private final Terms[] subs;
|
||||
private final ReaderSlice[] subSlices;
|
||||
private final boolean hasFreqs;
|
||||
private final boolean hasOffsets;
|
||||
private final boolean hasPositions;
|
||||
private final boolean hasPayloads;
|
||||
|
@ -50,15 +51,18 @@ public final class MultiTerms extends Terms {
|
|||
this.subSlices = subSlices;
|
||||
|
||||
assert subs.length > 0 : "inefficient: don't use MultiTerms over one sub";
|
||||
boolean _hasFreqs = true;
|
||||
boolean _hasOffsets = true;
|
||||
boolean _hasPositions = true;
|
||||
boolean _hasPayloads = false;
|
||||
for(int i=0;i<subs.length;i++) {
|
||||
_hasFreqs &= subs[i].hasFreqs();
|
||||
_hasOffsets &= subs[i].hasOffsets();
|
||||
_hasPositions &= subs[i].hasPositions();
|
||||
_hasPayloads |= subs[i].hasPayloads();
|
||||
}
|
||||
|
||||
hasFreqs = _hasFreqs;
|
||||
hasOffsets = _hasOffsets;
|
||||
hasPositions = _hasPositions;
|
||||
hasPayloads = hasPositions && _hasPayloads; // if all subs have pos, and at least one has payloads.
|
||||
|
@ -143,6 +147,11 @@ public final class MultiTerms extends Terms {
|
|||
return sum;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean hasFreqs() {
|
||||
return hasFreqs;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean hasOffsets() {
|
||||
return hasOffsets;
|
||||
|
|
|
@ -102,8 +102,10 @@ public abstract class Terms {
|
|||
* into account. */
|
||||
public abstract int getDocCount() throws IOException;
|
||||
|
||||
// TODO: shouldn't we have hasFreq() as well?
|
||||
|
||||
/** Returns true if documents in this field store
|
||||
* per-document term frequency ({@link DocsEnum#freq}). */
|
||||
public abstract boolean hasFreqs();
|
||||
|
||||
/** Returns true if documents in this field store offsets. */
|
||||
public abstract boolean hasOffsets();
|
||||
|
||||
|
|
|
@ -113,6 +113,11 @@ public final class DocTermOrdsRewriteMethod extends MultiTermQuery.RewriteMethod
|
|||
return -1;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean hasFreqs() {
|
||||
return false;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean hasOffsets() {
|
||||
return false;
|
||||
|
|
|
@ -113,6 +113,11 @@ public final class FieldCacheRewriteMethod extends MultiTermQuery.RewriteMethod
|
|||
return -1;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean hasFreqs() {
|
||||
return false;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean hasOffsets() {
|
||||
return false;
|
||||
|
|
|
@ -690,6 +690,11 @@ public class TestCodecs extends LuceneTestCase {
|
|||
throw new UnsupportedOperationException();
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean hasFreqs() {
|
||||
return fieldData.fieldInfo.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS) >= 0;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean hasOffsets() {
|
||||
return fieldData.fieldInfo.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS) >= 0;
|
||||
|
|
|
@ -824,6 +824,11 @@ public class MemoryIndex {
|
|||
return info.terms.size() > 0 ? 1 : 0;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean hasFreqs() {
|
||||
return true;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean hasOffsets() {
|
||||
return storeOffsets;
|
||||
|
|
|
@ -144,6 +144,11 @@ public final class RAMOnlyPostingsFormat extends PostingsFormat {
|
|||
return new RAMTermsEnum(RAMOnlyPostingsFormat.RAMField.this);
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean hasFreqs() {
|
||||
return info.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS) >= 0;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean hasOffsets() {
|
||||
return info.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS) >= 0;
|
||||
|
|
|
@ -526,6 +526,11 @@ public abstract class BasePostingsFormatTestCase extends LuceneTestCase {
|
|||
throw new UnsupportedOperationException();
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean hasFreqs() {
|
||||
return fieldInfo.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS) >= 0;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean hasOffsets() {
|
||||
return fieldInfo.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS) >= 0;
|
||||
|
|
Loading…
Reference in New Issue