mirror of https://github.com/apache/lucene.git
Merge branch 'master' of https://git-wip-us.apache.org/repos/asf/lucene-solr
This commit is contained in:
commit
dae5c570b9
37
build.xml
37
build.xml
|
@ -186,20 +186,26 @@
|
||||||
}
|
}
|
||||||
|
|
||||||
def checkLicenseHeaderPrecedes = { f, description, contentPattern, commentPattern, text, ratDocument ->
|
def checkLicenseHeaderPrecedes = { f, description, contentPattern, commentPattern, text, ratDocument ->
|
||||||
def contentMatcher = contentPattern.matcher(text);
|
def contentMatcher = contentPattern.matcher(text);
|
||||||
if (contentMatcher.find()) {
|
if (contentMatcher.find()) {
|
||||||
def contentStartPos = contentMatcher.start();
|
def contentStartPos = contentMatcher.start();
|
||||||
def commentMatcher = commentPattern.matcher(text);
|
def commentMatcher = commentPattern.matcher(text);
|
||||||
while (commentMatcher.find()) {
|
while (commentMatcher.find()) {
|
||||||
if (isLicense(commentMatcher, ratDocument)) {
|
if (isLicense(commentMatcher, ratDocument)) {
|
||||||
if (commentMatcher.start() < contentStartPos) {
|
if (commentMatcher.start() < contentStartPos) {
|
||||||
break; // This file is all good, so break loop: license header precedes 'description' definition
|
break; // This file is all good, so break loop: license header precedes 'description' definition
|
||||||
} else {
|
} else {
|
||||||
reportViolation(f, description+' declaration precedes license header');
|
reportViolation(f, description+' declaration precedes license header');
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
def checkMockitoAssume = { f, text ->
|
||||||
|
if (text.contains("mockito") && !text.contains("assumeWorkingMockito()")) {
|
||||||
|
reportViolation(f, 'File uses Mockito but has no assumeWorkingMockito() call');
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
def checkForUnescapedSymbolSubstitutions = { f, text ->
|
def checkForUnescapedSymbolSubstitutions = { f, text ->
|
||||||
|
@ -265,18 +271,21 @@
|
||||||
ratDocument.getMetaData().value(MetaData.RAT_URL_LICENSE_FAMILY_NAME)));
|
ratDocument.getMetaData().value(MetaData.RAT_URL_LICENSE_FAMILY_NAME)));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if (f.toString().endsWith('.java')) {
|
if (f.name.endsWith('.java')) {
|
||||||
if (text.contains('org.slf4j.LoggerFactory')) {
|
if (text.contains('org.slf4j.LoggerFactory')) {
|
||||||
if (!validLoggerPattern.matcher(text).find()) {
|
if (!validLoggerPattern.matcher(text).find()) {
|
||||||
reportViolation(f, 'invalid logging pattern [not private static final, uses static class name]');
|
reportViolation(f, 'invalid logging pattern [not private static final, uses static class name]');
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
checkLicenseHeaderPrecedes(f, 'package', packagePattern, javaCommentPattern, text, ratDocument);
|
checkLicenseHeaderPrecedes(f, 'package', packagePattern, javaCommentPattern, text, ratDocument);
|
||||||
|
if (f.name.contains("Test")) {
|
||||||
|
checkMockitoAssume(f, text);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
if (f.toString().endsWith('.xml') || f.toString().endsWith('.xml.template')) {
|
if (f.name.endsWith('.xml') || f.name.endsWith('.xml.template')) {
|
||||||
checkLicenseHeaderPrecedes(f, '<tag>', xmlTagPattern, xmlCommentPattern, text, ratDocument);
|
checkLicenseHeaderPrecedes(f, '<tag>', xmlTagPattern, xmlCommentPattern, text, ratDocument);
|
||||||
}
|
}
|
||||||
if (f.toString().endsWith('.adoc')) {
|
if (f.name.endsWith('.adoc')) {
|
||||||
checkForUnescapedSymbolSubstitutions(f, text);
|
checkForUnescapedSymbolSubstitutions(f, text);
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
|
@ -12,6 +12,16 @@
|
||||||
</content>
|
</content>
|
||||||
<orderEntry type="inheritedJdk" />
|
<orderEntry type="inheritedJdk" />
|
||||||
<orderEntry type="sourceFolder" forTests="false" />
|
<orderEntry type="sourceFolder" forTests="false" />
|
||||||
|
<orderEntry type="module-library" exported="">
|
||||||
|
<library>
|
||||||
|
<CLASSES>
|
||||||
|
<root url="file://$MODULE_DIR$/lib" />
|
||||||
|
</CLASSES>
|
||||||
|
<JAVADOC />
|
||||||
|
<SOURCES />
|
||||||
|
<jarDirectory url="file://$MODULE_DIR$/lib" recursive="false" />
|
||||||
|
</library>
|
||||||
|
</orderEntry>
|
||||||
<orderEntry type="library" scope="TEST" name="JUnit" level="project" />
|
<orderEntry type="library" scope="TEST" name="JUnit" level="project" />
|
||||||
<orderEntry type="module" scope="TEST" module-name="lucene-test-framework" />
|
<orderEntry type="module" scope="TEST" module-name="lucene-test-framework" />
|
||||||
<orderEntry type="module" module-name="analysis-common" />
|
<orderEntry type="module" module-name="analysis-common" />
|
||||||
|
|
|
@ -5,6 +5,14 @@ http://s.apache.org/luceneversions
|
||||||
|
|
||||||
======================= Lucene 8.0.0 =======================
|
======================= Lucene 8.0.0 =======================
|
||||||
|
|
||||||
|
API Changes
|
||||||
|
|
||||||
|
* LUCENE-8007: Index statistics Terms.getSumDocFreq(), Terms.getDocCount() are
|
||||||
|
now required to be stored by codecs. Additionally, TermsEnum.totalTermFreq()
|
||||||
|
and Terms.getSumTotalTermFreq() are now required: if frequencies are not
|
||||||
|
stored they are equal to TermsEnum.docFreq() and Terms.getSumDocFreq(),
|
||||||
|
respectively, because all freq() values equal 1. (Adrien Grand, Robert Muir)
|
||||||
|
|
||||||
Changes in Runtime Behavior
|
Changes in Runtime Behavior
|
||||||
|
|
||||||
* LUCENE-7837: Indices that were created before the previous major version
|
* LUCENE-7837: Indices that were created before the previous major version
|
||||||
|
@ -25,6 +33,11 @@ Improvements
|
||||||
|
|
||||||
======================= Lucene 7.2.0 =======================
|
======================= Lucene 7.2.0 =======================
|
||||||
|
|
||||||
|
API Changes
|
||||||
|
|
||||||
|
* LUCENE-8017: Weight now exposes a getCacheHelper() method to help query caches
|
||||||
|
determine whether or not a query can be cached. (Alan Woodward)
|
||||||
|
|
||||||
Bug Fixes
|
Bug Fixes
|
||||||
|
|
||||||
* LUCENE-7991: KNearestNeighborDocumentClassifier.knnSearch no longer applies
|
* LUCENE-7991: KNearestNeighborDocumentClassifier.knnSearch no longer applies
|
||||||
|
@ -49,6 +62,16 @@ Optimizations
|
||||||
* LUCENE-7994: Use int/int scatter map to gather facet counts when the
|
* LUCENE-7994: Use int/int scatter map to gather facet counts when the
|
||||||
number of hits is small relative to the number of unique facet labels
|
number of hits is small relative to the number of unique facet labels
|
||||||
(Dawid Weiss, Robert Muir, Mike McCandless)
|
(Dawid Weiss, Robert Muir, Mike McCandless)
|
||||||
|
|
||||||
|
Tests
|
||||||
|
|
||||||
|
* LUCENE-8035: Run tests with JDK-specific options: --illegal-access=deny
|
||||||
|
on Java 9+. (Uwe Schindler)
|
||||||
|
|
||||||
|
Build
|
||||||
|
|
||||||
|
* LUCENE-6144: Upgrade Ivy to 2.4.0; 'ant ivy-bootstrap' now removes old Ivy
|
||||||
|
jars in ~/.ant/lib/. (Shawn Heisey, Steve Rowe)
|
||||||
|
|
||||||
======================= Lucene 7.1.0 =======================
|
======================= Lucene 7.1.0 =======================
|
||||||
|
|
||||||
|
|
|
@ -139,8 +139,9 @@ public class BlockTermsReader extends FieldsProducer {
|
||||||
assert numTerms >= 0;
|
assert numTerms >= 0;
|
||||||
final long termsStartPointer = in.readVLong();
|
final long termsStartPointer = in.readVLong();
|
||||||
final FieldInfo fieldInfo = state.fieldInfos.fieldInfo(field);
|
final FieldInfo fieldInfo = state.fieldInfos.fieldInfo(field);
|
||||||
final long sumTotalTermFreq = fieldInfo.getIndexOptions() == IndexOptions.DOCS ? -1 : in.readVLong();
|
final long sumTotalTermFreq = in.readVLong();
|
||||||
final long sumDocFreq = in.readVLong();
|
// when frequencies are omitted, sumDocFreq=totalTermFreq and we only write one value
|
||||||
|
final long sumDocFreq = fieldInfo.getIndexOptions() == IndexOptions.DOCS ? sumTotalTermFreq : in.readVLong();
|
||||||
final int docCount = in.readVInt();
|
final int docCount = in.readVInt();
|
||||||
final int longsSize = in.readVInt();
|
final int longsSize = in.readVInt();
|
||||||
if (docCount < 0 || docCount > state.segmentInfo.maxDoc()) { // #docs with field must be <= #docs
|
if (docCount < 0 || docCount > state.segmentInfo.maxDoc()) { // #docs with field must be <= #docs
|
||||||
|
@ -149,7 +150,7 @@ public class BlockTermsReader extends FieldsProducer {
|
||||||
if (sumDocFreq < docCount) { // #postings must be >= #docs with field
|
if (sumDocFreq < docCount) { // #postings must be >= #docs with field
|
||||||
throw new CorruptIndexException("invalid sumDocFreq: " + sumDocFreq + " docCount: " + docCount, in);
|
throw new CorruptIndexException("invalid sumDocFreq: " + sumDocFreq + " docCount: " + docCount, in);
|
||||||
}
|
}
|
||||||
if (sumTotalTermFreq != -1 && sumTotalTermFreq < sumDocFreq) { // #positions must be >= #postings
|
if (sumTotalTermFreq < sumDocFreq) { // #positions must be >= #postings
|
||||||
throw new CorruptIndexException("invalid sumTotalTermFreq: " + sumTotalTermFreq + " sumDocFreq: " + sumDocFreq, in);
|
throw new CorruptIndexException("invalid sumTotalTermFreq: " + sumTotalTermFreq + " sumDocFreq: " + sumDocFreq, in);
|
||||||
}
|
}
|
||||||
FieldReader previous = fields.put(fieldInfo.name, new FieldReader(fieldInfo, numTerms, termsStartPointer, sumTotalTermFreq, sumDocFreq, docCount, longsSize));
|
FieldReader previous = fields.put(fieldInfo.name, new FieldReader(fieldInfo, numTerms, termsStartPointer, sumTotalTermFreq, sumDocFreq, docCount, longsSize));
|
||||||
|
@ -810,7 +811,9 @@ public class BlockTermsReader extends FieldsProducer {
|
||||||
// docFreq, totalTermFreq
|
// docFreq, totalTermFreq
|
||||||
state.docFreq = freqReader.readVInt();
|
state.docFreq = freqReader.readVInt();
|
||||||
//System.out.println(" dF=" + state.docFreq);
|
//System.out.println(" dF=" + state.docFreq);
|
||||||
if (fieldInfo.getIndexOptions() != IndexOptions.DOCS) {
|
if (fieldInfo.getIndexOptions() == IndexOptions.DOCS) {
|
||||||
|
state.totalTermFreq = state.docFreq; // all postings have tf=1
|
||||||
|
} else {
|
||||||
state.totalTermFreq = state.docFreq + freqReader.readVLong();
|
state.totalTermFreq = state.docFreq + freqReader.readVLong();
|
||||||
//System.out.println(" totTF=" + state.totalTermFreq);
|
//System.out.println(" totTF=" + state.totalTermFreq);
|
||||||
}
|
}
|
||||||
|
|
|
@ -126,8 +126,9 @@ public final class OrdsBlockTreeTermsReader extends FieldsProducer {
|
||||||
final FieldInfo fieldInfo = state.fieldInfos.fieldInfo(field);
|
final FieldInfo fieldInfo = state.fieldInfos.fieldInfo(field);
|
||||||
assert fieldInfo != null: "field=" + field;
|
assert fieldInfo != null: "field=" + field;
|
||||||
assert numTerms <= Integer.MAX_VALUE;
|
assert numTerms <= Integer.MAX_VALUE;
|
||||||
final long sumTotalTermFreq = fieldInfo.getIndexOptions() == IndexOptions.DOCS ? -1 : in.readVLong();
|
final long sumTotalTermFreq = in.readVLong();
|
||||||
final long sumDocFreq = in.readVLong();
|
// when frequencies are omitted, sumDocFreq=totalTermFreq and we only write one value
|
||||||
|
final long sumDocFreq = fieldInfo.getIndexOptions() == IndexOptions.DOCS ? sumTotalTermFreq : in.readVLong();
|
||||||
final int docCount = in.readVInt();
|
final int docCount = in.readVInt();
|
||||||
final int longsSize = in.readVInt();
|
final int longsSize = in.readVInt();
|
||||||
// System.out.println(" longsSize=" + longsSize);
|
// System.out.println(" longsSize=" + longsSize);
|
||||||
|
@ -140,7 +141,7 @@ public final class OrdsBlockTreeTermsReader extends FieldsProducer {
|
||||||
if (sumDocFreq < docCount) { // #postings must be >= #docs with field
|
if (sumDocFreq < docCount) { // #postings must be >= #docs with field
|
||||||
throw new CorruptIndexException("invalid sumDocFreq: " + sumDocFreq + " docCount: " + docCount, in);
|
throw new CorruptIndexException("invalid sumDocFreq: " + sumDocFreq + " docCount: " + docCount, in);
|
||||||
}
|
}
|
||||||
if (sumTotalTermFreq != -1 && sumTotalTermFreq < sumDocFreq) { // #positions must be >= #postings
|
if (sumTotalTermFreq < sumDocFreq) { // #positions must be >= #postings
|
||||||
throw new CorruptIndexException("invalid sumTotalTermFreq: " + sumTotalTermFreq + " sumDocFreq: " + sumDocFreq, in);
|
throw new CorruptIndexException("invalid sumTotalTermFreq: " + sumTotalTermFreq + " sumDocFreq: " + sumDocFreq, in);
|
||||||
}
|
}
|
||||||
final long indexStartFP = indexIn.readVLong();
|
final long indexStartFP = indexIn.readVLong();
|
||||||
|
|
|
@ -292,7 +292,9 @@ final class OrdsIntersectTermsEnumFrame {
|
||||||
// stats
|
// stats
|
||||||
termState.docFreq = statsReader.readVInt();
|
termState.docFreq = statsReader.readVInt();
|
||||||
//if (DEBUG) System.out.println(" dF=" + state.docFreq);
|
//if (DEBUG) System.out.println(" dF=" + state.docFreq);
|
||||||
if (ite.fr.fieldInfo.getIndexOptions() != IndexOptions.DOCS) {
|
if (ite.fr.fieldInfo.getIndexOptions() == IndexOptions.DOCS) {
|
||||||
|
termState.totalTermFreq = termState.docFreq; // all tf values are 1
|
||||||
|
} else {
|
||||||
termState.totalTermFreq = termState.docFreq + statsReader.readVLong();
|
termState.totalTermFreq = termState.docFreq + statsReader.readVLong();
|
||||||
//if (DEBUG) System.out.println(" totTF=" + state.totalTermFreq);
|
//if (DEBUG) System.out.println(" totTF=" + state.totalTermFreq);
|
||||||
}
|
}
|
||||||
|
|
|
@ -499,7 +499,9 @@ final class OrdsSegmentTermsEnumFrame {
|
||||||
// stats
|
// stats
|
||||||
state.docFreq = statsReader.readVInt();
|
state.docFreq = statsReader.readVInt();
|
||||||
//if (DEBUG) System.out.println(" dF=" + state.docFreq);
|
//if (DEBUG) System.out.println(" dF=" + state.docFreq);
|
||||||
if (ste.fr.fieldInfo.getIndexOptions() != IndexOptions.DOCS) {
|
if (ste.fr.fieldInfo.getIndexOptions() == IndexOptions.DOCS) {
|
||||||
|
state.totalTermFreq = state.docFreq; // all tf values are 1
|
||||||
|
} else {
|
||||||
state.totalTermFreq = state.docFreq + statsReader.readVLong();
|
state.totalTermFreq = state.docFreq + statsReader.readVLong();
|
||||||
//if (DEBUG) System.out.println(" totTF=" + state.totalTermFreq);
|
//if (DEBUG) System.out.println(" totTF=" + state.totalTermFreq);
|
||||||
}
|
}
|
||||||
|
|
|
@ -19,7 +19,6 @@ package org.apache.lucene.codecs.memory;
|
||||||
|
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.util.ArrayList;
|
import java.util.ArrayList;
|
||||||
import java.util.Arrays;
|
|
||||||
import java.util.BitSet;
|
import java.util.BitSet;
|
||||||
import java.util.Collection;
|
import java.util.Collection;
|
||||||
import java.util.Collections;
|
import java.util.Collections;
|
||||||
|
@ -111,8 +110,9 @@ public class FSTOrdTermsReader extends FieldsProducer {
|
||||||
FieldInfo fieldInfo = fieldInfos.fieldInfo(blockIn.readVInt());
|
FieldInfo fieldInfo = fieldInfos.fieldInfo(blockIn.readVInt());
|
||||||
boolean hasFreq = fieldInfo.getIndexOptions() != IndexOptions.DOCS;
|
boolean hasFreq = fieldInfo.getIndexOptions() != IndexOptions.DOCS;
|
||||||
long numTerms = blockIn.readVLong();
|
long numTerms = blockIn.readVLong();
|
||||||
long sumTotalTermFreq = hasFreq ? blockIn.readVLong() : -1;
|
long sumTotalTermFreq = blockIn.readVLong();
|
||||||
long sumDocFreq = blockIn.readVLong();
|
// if freqs are omitted, sumDocFreq=sumTotalTermFreq and we only write one value
|
||||||
|
long sumDocFreq = hasFreq ? blockIn.readVLong() : sumTotalTermFreq;
|
||||||
int docCount = blockIn.readVInt();
|
int docCount = blockIn.readVInt();
|
||||||
int longsSize = blockIn.readVInt();
|
int longsSize = blockIn.readVInt();
|
||||||
FST<Long> index = new FST<>(indexIn, PositiveIntOutputs.getSingleton());
|
FST<Long> index = new FST<>(indexIn, PositiveIntOutputs.getSingleton());
|
||||||
|
@ -146,7 +146,7 @@ public class FSTOrdTermsReader extends FieldsProducer {
|
||||||
throw new CorruptIndexException("invalid sumDocFreq: " + field.sumDocFreq + " docCount: " + field.docCount + " (blockIn=" + blockIn + ")", indexIn);
|
throw new CorruptIndexException("invalid sumDocFreq: " + field.sumDocFreq + " docCount: " + field.docCount + " (blockIn=" + blockIn + ")", indexIn);
|
||||||
}
|
}
|
||||||
// #positions must be >= #postings
|
// #positions must be >= #postings
|
||||||
if (field.sumTotalTermFreq != -1 && field.sumTotalTermFreq < field.sumDocFreq) {
|
if (field.sumTotalTermFreq < field.sumDocFreq) {
|
||||||
throw new CorruptIndexException("invalid sumTotalTermFreq: " + field.sumTotalTermFreq + " sumDocFreq: " + field.sumDocFreq + " (blockIn=" + blockIn + ")", indexIn);
|
throw new CorruptIndexException("invalid sumTotalTermFreq: " + field.sumTotalTermFreq + " sumDocFreq: " + field.sumDocFreq + " (blockIn=" + blockIn + ")", indexIn);
|
||||||
}
|
}
|
||||||
if (previous != null) {
|
if (previous != null) {
|
||||||
|
@ -343,9 +343,6 @@ public class FSTOrdTermsReader extends FieldsProducer {
|
||||||
this.totalTermFreq = new long[INTERVAL];
|
this.totalTermFreq = new long[INTERVAL];
|
||||||
this.statsBlockOrd = -1;
|
this.statsBlockOrd = -1;
|
||||||
this.metaBlockOrd = -1;
|
this.metaBlockOrd = -1;
|
||||||
if (!hasFreqs()) {
|
|
||||||
Arrays.fill(totalTermFreq, -1);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/** Decodes stats data into term state */
|
/** Decodes stats data into term state */
|
||||||
|
@ -388,6 +385,7 @@ public class FSTOrdTermsReader extends FieldsProducer {
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
docFreq[i] = code;
|
docFreq[i] = code;
|
||||||
|
totalTermFreq[i] = code;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -94,8 +94,9 @@ public class FSTTermsReader extends FieldsProducer {
|
||||||
int fieldNumber = in.readVInt();
|
int fieldNumber = in.readVInt();
|
||||||
FieldInfo fieldInfo = fieldInfos.fieldInfo(fieldNumber);
|
FieldInfo fieldInfo = fieldInfos.fieldInfo(fieldNumber);
|
||||||
long numTerms = in.readVLong();
|
long numTerms = in.readVLong();
|
||||||
long sumTotalTermFreq = fieldInfo.getIndexOptions() == IndexOptions.DOCS ? -1 : in.readVLong();
|
long sumTotalTermFreq = in.readVLong();
|
||||||
long sumDocFreq = in.readVLong();
|
// if frequencies are omitted, sumTotalTermFreq=sumDocFreq and we only write one value
|
||||||
|
long sumDocFreq = fieldInfo.getIndexOptions() == IndexOptions.DOCS ? sumTotalTermFreq : in.readVLong();
|
||||||
int docCount = in.readVInt();
|
int docCount = in.readVInt();
|
||||||
int longsSize = in.readVInt();
|
int longsSize = in.readVInt();
|
||||||
TermsReader current = new TermsReader(fieldInfo, in, numTerms, sumTotalTermFreq, sumDocFreq, docCount, longsSize);
|
TermsReader current = new TermsReader(fieldInfo, in, numTerms, sumTotalTermFreq, sumDocFreq, docCount, longsSize);
|
||||||
|
@ -126,7 +127,7 @@ public class FSTTermsReader extends FieldsProducer {
|
||||||
throw new CorruptIndexException("invalid sumDocFreq: " + field.sumDocFreq + " docCount: " + field.docCount, in);
|
throw new CorruptIndexException("invalid sumDocFreq: " + field.sumDocFreq + " docCount: " + field.docCount, in);
|
||||||
}
|
}
|
||||||
// #positions must be >= #postings
|
// #positions must be >= #postings
|
||||||
if (field.sumTotalTermFreq != -1 && field.sumTotalTermFreq < field.sumDocFreq) {
|
if (field.sumTotalTermFreq < field.sumDocFreq) {
|
||||||
throw new CorruptIndexException("invalid sumTotalTermFreq: " + field.sumTotalTermFreq + " sumDocFreq: " + field.sumDocFreq, in);
|
throw new CorruptIndexException("invalid sumTotalTermFreq: " + field.sumTotalTermFreq + " sumDocFreq: " + field.sumDocFreq, in);
|
||||||
}
|
}
|
||||||
if (previous != null) {
|
if (previous != null) {
|
||||||
|
@ -288,7 +289,7 @@ public class FSTTermsReader extends FieldsProducer {
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public long totalTermFreq() throws IOException {
|
public long totalTermFreq() throws IOException {
|
||||||
return state.totalTermFreq;
|
return state.totalTermFreq == -1 ? state.docFreq : state.totalTermFreq;
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
|
|
@ -733,10 +733,10 @@ public final class MemoryPostingsFormat extends PostingsFormat {
|
||||||
if (!didDecode) {
|
if (!didDecode) {
|
||||||
buffer.reset(current.output.bytes, current.output.offset, current.output.length);
|
buffer.reset(current.output.bytes, current.output.offset, current.output.length);
|
||||||
docFreq = buffer.readVInt();
|
docFreq = buffer.readVInt();
|
||||||
if (field.getIndexOptions() != IndexOptions.DOCS) {
|
if (field.getIndexOptions() == IndexOptions.DOCS) {
|
||||||
totalTermFreq = docFreq + buffer.readVLong();
|
totalTermFreq = docFreq;
|
||||||
} else {
|
} else {
|
||||||
totalTermFreq = -1;
|
totalTermFreq = docFreq + buffer.readVLong();
|
||||||
}
|
}
|
||||||
postingsSpare.bytes = current.output.bytes;
|
postingsSpare.bytes = current.output.bytes;
|
||||||
postingsSpare.offset = buffer.getPosition();
|
postingsSpare.offset = buffer.getPosition();
|
||||||
|
@ -873,12 +873,15 @@ public final class MemoryPostingsFormat extends PostingsFormat {
|
||||||
field = fieldInfos.fieldInfo(fieldNumber);
|
field = fieldInfos.fieldInfo(fieldNumber);
|
||||||
if (field == null) {
|
if (field == null) {
|
||||||
throw new CorruptIndexException("invalid field number: " + fieldNumber, in);
|
throw new CorruptIndexException("invalid field number: " + fieldNumber, in);
|
||||||
} else if (field.getIndexOptions() != IndexOptions.DOCS) {
|
|
||||||
sumTotalTermFreq = in.readVLong();
|
|
||||||
} else {
|
} else {
|
||||||
sumTotalTermFreq = -1;
|
sumTotalTermFreq = in.readVLong();
|
||||||
|
}
|
||||||
|
// if frequencies are omitted, sumDocFreq = sumTotalTermFreq and we only write one value.
|
||||||
|
if (field.getIndexOptions() == IndexOptions.DOCS) {
|
||||||
|
sumDocFreq = sumTotalTermFreq;
|
||||||
|
} else {
|
||||||
|
sumDocFreq = in.readVLong();
|
||||||
}
|
}
|
||||||
sumDocFreq = in.readVLong();
|
|
||||||
docCount = in.readVInt();
|
docCount = in.readVInt();
|
||||||
|
|
||||||
fst = new FST<>(in, outputs);
|
fst = new FST<>(in, outputs);
|
||||||
|
|
|
@ -202,7 +202,7 @@ class SimpleTextFieldsReader extends FieldsProducer {
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public long totalTermFreq() {
|
public long totalTermFreq() {
|
||||||
return indexOptions == IndexOptions.DOCS ? -1 : totalTermFreq;
|
return indexOptions == IndexOptions.DOCS ? docFreq : totalTermFreq;
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
@ -568,12 +568,13 @@ class SimpleTextFieldsReader extends FieldsProducer {
|
||||||
} else if (StringHelper.startsWith(scratch.get(), DOC)) {
|
} else if (StringHelper.startsWith(scratch.get(), DOC)) {
|
||||||
docFreq++;
|
docFreq++;
|
||||||
sumDocFreq++;
|
sumDocFreq++;
|
||||||
|
totalTermFreq++;
|
||||||
scratchUTF16.copyUTF8Bytes(scratch.bytes(), DOC.length, scratch.length()-DOC.length);
|
scratchUTF16.copyUTF8Bytes(scratch.bytes(), DOC.length, scratch.length()-DOC.length);
|
||||||
int docID = ArrayUtil.parseInt(scratchUTF16.chars(), 0, scratchUTF16.length());
|
int docID = ArrayUtil.parseInt(scratchUTF16.chars(), 0, scratchUTF16.length());
|
||||||
visitedDocs.set(docID);
|
visitedDocs.set(docID);
|
||||||
} else if (StringHelper.startsWith(scratch.get(), FREQ)) {
|
} else if (StringHelper.startsWith(scratch.get(), FREQ)) {
|
||||||
scratchUTF16.copyUTF8Bytes(scratch.bytes(), FREQ.length, scratch.length()-FREQ.length);
|
scratchUTF16.copyUTF8Bytes(scratch.bytes(), FREQ.length, scratch.length()-FREQ.length);
|
||||||
totalTermFreq += ArrayUtil.parseInt(scratchUTF16.chars(), 0, scratchUTF16.length());
|
totalTermFreq += ArrayUtil.parseInt(scratchUTF16.chars(), 0, scratchUTF16.length()) - 1;
|
||||||
} else if (StringHelper.startsWith(scratch.get(), TERM)) {
|
} else if (StringHelper.startsWith(scratch.get(), TERM)) {
|
||||||
if (lastDocsStart != -1) {
|
if (lastDocsStart != -1) {
|
||||||
b.add(Util.toIntsRef(lastTerm.get(), scratchIntsRef), outputs.newPair(lastDocsStart,
|
b.add(Util.toIntsRef(lastTerm.get(), scratchIntsRef), outputs.newPair(lastDocsStart,
|
||||||
|
@ -637,7 +638,7 @@ class SimpleTextFieldsReader extends FieldsProducer {
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public long getSumTotalTermFreq() {
|
public long getSumTotalTermFreq() {
|
||||||
return fieldInfo.getIndexOptions() == IndexOptions.DOCS ? -1 : sumTotalTermFreq;
|
return sumTotalTermFreq;
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
|
|
@ -288,7 +288,13 @@ public class SimpleTextTermVectorsReader extends TermVectorsReader {
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public long getSumTotalTermFreq() throws IOException {
|
public long getSumTotalTermFreq() throws IOException {
|
||||||
return -1;
|
// TODO: make it constant-time
|
||||||
|
long ttf = 0;
|
||||||
|
TermsEnum iterator = iterator();
|
||||||
|
for (BytesRef b = iterator.next(); b != null; b = iterator.next()) {
|
||||||
|
ttf += iterator.totalTermFreq();
|
||||||
|
}
|
||||||
|
return ttf;
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
|
|
@ -80,13 +80,15 @@
|
||||||
<!-- Needed in case a module needs the original build, also for compile-tools to be called from a module -->
|
<!-- Needed in case a module needs the original build, also for compile-tools to be called from a module -->
|
||||||
<property name="common.build.dir" location="${common.dir}/build"/>
|
<property name="common.build.dir" location="${common.dir}/build"/>
|
||||||
|
|
||||||
<property name="ivy.bootstrap.version" value="2.3.0" /> <!-- UPGRADE NOTE: update disallowed.ivy.jars regex in ivy-availability-check -->
|
<property name="ivy.bootstrap.version" value="2.4.0" /> <!-- UPGRADE NOTE: update disallowed_ivy_jars_regex below -->
|
||||||
|
<property name="disallowed_ivy_jars_regex" value="ivy-2\.[0123].*\.jar"/>
|
||||||
|
|
||||||
<property name="ivy.default.configuration" value="*"/>
|
<property name="ivy.default.configuration" value="*"/>
|
||||||
|
|
||||||
<!-- Running ant targets in parralel may require this set to false because ivy:retrieve tasks may race with resolve -->
|
<!-- Running ant targets in parralel may require this set to false because ivy:retrieve tasks may race with resolve -->
|
||||||
<property name="ivy.sync" value="true"/>
|
<property name="ivy.sync" value="true"/>
|
||||||
<property name="ivy.resolution-cache.dir" location="${common.build.dir}/ivy-resolution-cache"/>
|
<property name="ivy.resolution-cache.dir" location="${common.build.dir}/ivy-resolution-cache"/>
|
||||||
<property name="ivy.lock-strategy" value="artifact-lock"/>
|
<property name="ivy.lock-strategy" value="artifact-lock-nio"/>
|
||||||
|
|
||||||
<property name="local.caches" location="${common.dir}/../.caches" />
|
<property name="local.caches" location="${common.dir}/../.caches" />
|
||||||
<property name="tests.cachedir" location="${local.caches}/test-stats" />
|
<property name="tests.cachedir" location="${local.caches}/test-stats" />
|
||||||
|
@ -413,12 +415,12 @@
|
||||||
<property name="ivy_bootstrap_url1" value="http://repo1.maven.org/maven2"/>
|
<property name="ivy_bootstrap_url1" value="http://repo1.maven.org/maven2"/>
|
||||||
<!-- you might need to tweak this from china so it works -->
|
<!-- you might need to tweak this from china so it works -->
|
||||||
<property name="ivy_bootstrap_url2" value="http://uk.maven.org/maven2"/>
|
<property name="ivy_bootstrap_url2" value="http://uk.maven.org/maven2"/>
|
||||||
<property name="ivy_checksum_sha1" value="c5ebf1c253ad4959a29f4acfe696ee48cdd9f473"/>
|
<property name="ivy_checksum_sha1" value="5abe4c24bbe992a9ac07ca563d5bd3e8d569e9ed"/>
|
||||||
|
|
||||||
<target name="ivy-availability-check" unless="ivy.available">
|
<target name="ivy-availability-check" unless="ivy.available">
|
||||||
<path id="disallowed.ivy.jars">
|
<path id="disallowed.ivy.jars">
|
||||||
<fileset dir="${ivy_install_path}">
|
<fileset dir="${ivy_install_path}">
|
||||||
<filename regex="ivy-2\.[012].*\.jar"/> <!-- TODO: Update this regex to disallow Ivy versions -->
|
<filename regex="${disallowed_ivy_jars_regex}"/>
|
||||||
</fileset>
|
</fileset>
|
||||||
</path>
|
</path>
|
||||||
<loadresource property="disallowed.ivy.jars.list">
|
<loadresource property="disallowed.ivy.jars.list">
|
||||||
|
@ -482,19 +484,20 @@
|
||||||
<fail>Ivy is not available</fail>
|
<fail>Ivy is not available</fail>
|
||||||
</target>
|
</target>
|
||||||
|
|
||||||
<target name="ivy-bootstrap" description="Download and install Ivy in the users ant lib dir" depends="ivy-bootstrap1,ivy-bootstrap2,ivy-checksum"/>
|
<target name="ivy-bootstrap" description="Download and install Ivy in the users ant lib dir"
|
||||||
|
depends="-ivy-bootstrap1,-ivy-bootstrap2,-ivy-checksum,-ivy-remove-old-versions"/>
|
||||||
|
|
||||||
<!-- try to download from repo1.maven.org -->
|
<!-- try to download from repo1.maven.org -->
|
||||||
<target name="ivy-bootstrap1">
|
<target name="-ivy-bootstrap1">
|
||||||
<ivy-download src="${ivy_bootstrap_url1}" dest="${ivy_install_path}"/>
|
<ivy-download src="${ivy_bootstrap_url1}" dest="${ivy_install_path}"/>
|
||||||
<available file="${ivy_install_path}/ivy-${ivy.bootstrap.version}.jar" property="ivy.bootstrap1.success" />
|
<available file="${ivy_install_path}/ivy-${ivy.bootstrap.version}.jar" property="ivy.bootstrap1.success" />
|
||||||
</target>
|
</target>
|
||||||
|
|
||||||
<target name="ivy-bootstrap2" unless="ivy.bootstrap1.success">
|
<target name="-ivy-bootstrap2" unless="ivy.bootstrap1.success">
|
||||||
<ivy-download src="${ivy_bootstrap_url2}" dest="${ivy_install_path}"/>
|
<ivy-download src="${ivy_bootstrap_url2}" dest="${ivy_install_path}"/>
|
||||||
</target>
|
</target>
|
||||||
|
|
||||||
<target name="ivy-checksum">
|
<target name="-ivy-checksum">
|
||||||
<checksum file="${ivy_install_path}/ivy-${ivy.bootstrap.version}.jar"
|
<checksum file="${ivy_install_path}/ivy-${ivy.bootstrap.version}.jar"
|
||||||
property="${ivy_checksum_sha1}"
|
property="${ivy_checksum_sha1}"
|
||||||
algorithm="SHA"
|
algorithm="SHA"
|
||||||
|
@ -505,6 +508,14 @@
|
||||||
</condition>
|
</condition>
|
||||||
</fail>
|
</fail>
|
||||||
</target>
|
</target>
|
||||||
|
|
||||||
|
<target name="-ivy-remove-old-versions">
|
||||||
|
<delete verbose="true" failonerror="true">
|
||||||
|
<fileset dir="${ivy_install_path}">
|
||||||
|
<filename regex="${disallowed_ivy_jars_regex}"/>
|
||||||
|
</fileset>
|
||||||
|
</delete>
|
||||||
|
</target>
|
||||||
|
|
||||||
<macrodef name="ivy-download">
|
<macrodef name="ivy-download">
|
||||||
<attribute name="src"/>
|
<attribute name="src"/>
|
||||||
|
@ -948,6 +959,12 @@
|
||||||
<condition property="java.security.manager" value="org.apache.lucene.util.TestSecurityManager">
|
<condition property="java.security.manager" value="org.apache.lucene.util.TestSecurityManager">
|
||||||
<istrue value="${tests.useSecurityManager}"/>
|
<istrue value="${tests.useSecurityManager}"/>
|
||||||
</condition>
|
</condition>
|
||||||
|
|
||||||
|
<!-- additional arguments for Java 9+ -->
|
||||||
|
<local name="tests.runtimespecific.args"/>
|
||||||
|
<condition property="tests.runtimespecific.args" value="" else="--illegal-access=deny">
|
||||||
|
<equals arg1="${build.java.runtime}" arg2="1.8"/>
|
||||||
|
</condition>
|
||||||
|
|
||||||
<!-- create a fileset pattern that matches ${tests.class}. -->
|
<!-- create a fileset pattern that matches ${tests.class}. -->
|
||||||
<loadresource property="tests.explicitclass" quiet="true">
|
<loadresource property="tests.explicitclass" quiet="true">
|
||||||
|
@ -1029,6 +1046,7 @@
|
||||||
<jvmarg line="${tests.clover.args}"/>
|
<jvmarg line="${tests.clover.args}"/>
|
||||||
<jvmarg line="@{additional.vm.args}"/>
|
<jvmarg line="@{additional.vm.args}"/>
|
||||||
<jvmarg line="${tests.asserts.args}"/>
|
<jvmarg line="${tests.asserts.args}"/>
|
||||||
|
<jvmarg line="${tests.runtimespecific.args}"/>
|
||||||
|
|
||||||
<!-- set the number of times tests should run -->
|
<!-- set the number of times tests should run -->
|
||||||
<sysproperty key="tests.iters" value="${tests.iters}"/>
|
<sysproperty key="tests.iters" value="${tests.iters}"/>
|
||||||
|
|
|
@ -180,8 +180,9 @@ public final class BlockTreeTermsReader extends FieldsProducer {
|
||||||
if (fieldInfo == null) {
|
if (fieldInfo == null) {
|
||||||
throw new CorruptIndexException("invalid field number: " + field, termsIn);
|
throw new CorruptIndexException("invalid field number: " + field, termsIn);
|
||||||
}
|
}
|
||||||
final long sumTotalTermFreq = fieldInfo.getIndexOptions() == IndexOptions.DOCS ? -1 : termsIn.readVLong();
|
final long sumTotalTermFreq = termsIn.readVLong();
|
||||||
final long sumDocFreq = termsIn.readVLong();
|
// when frequencies are omitted, sumDocFreq=sumTotalTermFreq and only one value is written.
|
||||||
|
final long sumDocFreq = fieldInfo.getIndexOptions() == IndexOptions.DOCS ? sumTotalTermFreq : termsIn.readVLong();
|
||||||
final int docCount = termsIn.readVInt();
|
final int docCount = termsIn.readVInt();
|
||||||
final int longsSize = termsIn.readVInt();
|
final int longsSize = termsIn.readVInt();
|
||||||
if (longsSize < 0) {
|
if (longsSize < 0) {
|
||||||
|
@ -195,7 +196,7 @@ public final class BlockTreeTermsReader extends FieldsProducer {
|
||||||
if (sumDocFreq < docCount) { // #postings must be >= #docs with field
|
if (sumDocFreq < docCount) { // #postings must be >= #docs with field
|
||||||
throw new CorruptIndexException("invalid sumDocFreq: " + sumDocFreq + " docCount: " + docCount, termsIn);
|
throw new CorruptIndexException("invalid sumDocFreq: " + sumDocFreq + " docCount: " + docCount, termsIn);
|
||||||
}
|
}
|
||||||
if (sumTotalTermFreq != -1 && sumTotalTermFreq < sumDocFreq) { // #positions must be >= #postings
|
if (sumTotalTermFreq < sumDocFreq) { // #positions must be >= #postings
|
||||||
throw new CorruptIndexException("invalid sumTotalTermFreq: " + sumTotalTermFreq + " sumDocFreq: " + sumDocFreq, termsIn);
|
throw new CorruptIndexException("invalid sumTotalTermFreq: " + sumTotalTermFreq + " sumDocFreq: " + sumDocFreq, termsIn);
|
||||||
}
|
}
|
||||||
final long indexStartFP = indexIn.readVLong();
|
final long indexStartFP = indexIn.readVLong();
|
||||||
|
|
|
@ -288,7 +288,9 @@ final class IntersectTermsEnumFrame {
|
||||||
|
|
||||||
// stats
|
// stats
|
||||||
termState.docFreq = statsReader.readVInt();
|
termState.docFreq = statsReader.readVInt();
|
||||||
if (ite.fr.fieldInfo.getIndexOptions() != IndexOptions.DOCS) {
|
if (ite.fr.fieldInfo.getIndexOptions() == IndexOptions.DOCS) {
|
||||||
|
termState.totalTermFreq = termState.docFreq; // all postings have freq=1
|
||||||
|
} else {
|
||||||
termState.totalTermFreq = termState.docFreq + statsReader.readVLong();
|
termState.totalTermFreq = termState.docFreq + statsReader.readVLong();
|
||||||
}
|
}
|
||||||
// metadata
|
// metadata
|
||||||
|
|
|
@ -417,7 +417,9 @@ final class SegmentTermsEnumFrame {
|
||||||
// stats
|
// stats
|
||||||
state.docFreq = statsReader.readVInt();
|
state.docFreq = statsReader.readVInt();
|
||||||
//if (DEBUG) System.out.println(" dF=" + state.docFreq);
|
//if (DEBUG) System.out.println(" dF=" + state.docFreq);
|
||||||
if (ste.fr.fieldInfo.getIndexOptions() != IndexOptions.DOCS) {
|
if (ste.fr.fieldInfo.getIndexOptions() == IndexOptions.DOCS) {
|
||||||
|
state.totalTermFreq = state.docFreq; // all postings have freq=1
|
||||||
|
} else {
|
||||||
state.totalTermFreq = state.docFreq + statsReader.readVLong();
|
state.totalTermFreq = state.docFreq + statsReader.readVLong();
|
||||||
//if (DEBUG) System.out.println(" totTF=" + state.totalTermFreq);
|
//if (DEBUG) System.out.println(" totTF=" + state.totalTermFreq);
|
||||||
}
|
}
|
||||||
|
|
|
@ -745,6 +745,7 @@ public final class CompressingTermVectorsReader extends TermVectorsReader implem
|
||||||
private static class TVTerms extends Terms {
|
private static class TVTerms extends Terms {
|
||||||
|
|
||||||
private final int numTerms, flags;
|
private final int numTerms, flags;
|
||||||
|
private final long totalTermFreq;
|
||||||
private final int[] prefixLengths, suffixLengths, termFreqs, positionIndex, positions, startOffsets, lengths, payloadIndex;
|
private final int[] prefixLengths, suffixLengths, termFreqs, positionIndex, positions, startOffsets, lengths, payloadIndex;
|
||||||
private final BytesRef termBytes, payloadBytes;
|
private final BytesRef termBytes, payloadBytes;
|
||||||
|
|
||||||
|
@ -764,6 +765,11 @@ public final class CompressingTermVectorsReader extends TermVectorsReader implem
|
||||||
this.payloadIndex = payloadIndex;
|
this.payloadIndex = payloadIndex;
|
||||||
this.payloadBytes = payloadBytes;
|
this.payloadBytes = payloadBytes;
|
||||||
this.termBytes = termBytes;
|
this.termBytes = termBytes;
|
||||||
|
long ttf = 0;
|
||||||
|
for (int tf : termFreqs) {
|
||||||
|
ttf += tf;
|
||||||
|
}
|
||||||
|
this.totalTermFreq = ttf;
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
@ -782,7 +788,7 @@ public final class CompressingTermVectorsReader extends TermVectorsReader implem
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public long getSumTotalTermFreq() throws IOException {
|
public long getSumTotalTermFreq() throws IOException {
|
||||||
return -1L;
|
return totalTermFreq;
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
|
|
@ -21,6 +21,7 @@ import java.util.Arrays;
|
||||||
import java.util.Objects;
|
import java.util.Objects;
|
||||||
|
|
||||||
import org.apache.lucene.index.FieldInfo;
|
import org.apache.lucene.index.FieldInfo;
|
||||||
|
import org.apache.lucene.index.IndexReader;
|
||||||
import org.apache.lucene.index.LeafReader;
|
import org.apache.lucene.index.LeafReader;
|
||||||
import org.apache.lucene.index.LeafReaderContext;
|
import org.apache.lucene.index.LeafReaderContext;
|
||||||
import org.apache.lucene.index.PointValues;
|
import org.apache.lucene.index.PointValues;
|
||||||
|
@ -356,6 +357,11 @@ abstract class RangeFieldQuery extends Query {
|
||||||
}
|
}
|
||||||
return scorerSupplier.get(Long.MAX_VALUE);
|
return scorerSupplier.get(Long.MAX_VALUE);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public IndexReader.CacheHelper getCacheHelper(LeafReaderContext context) {
|
||||||
|
return context.reader().getCoreCacheHelper();
|
||||||
|
}
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -138,6 +138,11 @@ abstract class SortedNumericDocValuesRangeQuery extends Query {
|
||||||
}
|
}
|
||||||
return new ConstantScoreScorer(this, score(), iterator);
|
return new ConstantScoreScorer(this, score(), iterator);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public IndexReader.CacheHelper getCacheHelper(LeafReaderContext context) {
|
||||||
|
return getDocValuesCacheHelper(field, context);
|
||||||
|
}
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -181,6 +181,11 @@ abstract class SortedSetDocValuesRangeQuery extends Query {
|
||||||
}
|
}
|
||||||
return new ConstantScoreScorer(this, score(), iterator);
|
return new ConstantScoreScorer(this, score(), iterator);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public IndexReader.CacheHelper getCacheHelper(LeafReaderContext context) {
|
||||||
|
return getDocValuesCacheHelper(field, context);
|
||||||
|
}
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -123,7 +123,10 @@ public abstract class BaseCompositeReader<R extends IndexReader> extends Composi
|
||||||
ensureOpen();
|
ensureOpen();
|
||||||
int total = 0; // sum freqs in subreaders
|
int total = 0; // sum freqs in subreaders
|
||||||
for (int i = 0; i < subReaders.length; i++) {
|
for (int i = 0; i < subReaders.length; i++) {
|
||||||
total += subReaders[i].docFreq(term);
|
int sub = subReaders[i].docFreq(term);
|
||||||
|
assert sub >= 0;
|
||||||
|
assert sub <= subReaders[i].getDocCount(term.field());
|
||||||
|
total += sub;
|
||||||
}
|
}
|
||||||
return total;
|
return total;
|
||||||
}
|
}
|
||||||
|
@ -134,9 +137,8 @@ public abstract class BaseCompositeReader<R extends IndexReader> extends Composi
|
||||||
long total = 0; // sum freqs in subreaders
|
long total = 0; // sum freqs in subreaders
|
||||||
for (int i = 0; i < subReaders.length; i++) {
|
for (int i = 0; i < subReaders.length; i++) {
|
||||||
long sub = subReaders[i].totalTermFreq(term);
|
long sub = subReaders[i].totalTermFreq(term);
|
||||||
if (sub == -1) {
|
assert sub >= 0;
|
||||||
return -1;
|
assert sub <= subReaders[i].getSumTotalTermFreq(term.field());
|
||||||
}
|
|
||||||
total += sub;
|
total += sub;
|
||||||
}
|
}
|
||||||
return total;
|
return total;
|
||||||
|
@ -148,9 +150,8 @@ public abstract class BaseCompositeReader<R extends IndexReader> extends Composi
|
||||||
long total = 0; // sum doc freqs in subreaders
|
long total = 0; // sum doc freqs in subreaders
|
||||||
for (R reader : subReaders) {
|
for (R reader : subReaders) {
|
||||||
long sub = reader.getSumDocFreq(field);
|
long sub = reader.getSumDocFreq(field);
|
||||||
if (sub == -1) {
|
assert sub >= 0;
|
||||||
return -1; // if any of the subs doesn't support it, return -1
|
assert sub <= reader.getSumTotalTermFreq(field);
|
||||||
}
|
|
||||||
total += sub;
|
total += sub;
|
||||||
}
|
}
|
||||||
return total;
|
return total;
|
||||||
|
@ -162,9 +163,8 @@ public abstract class BaseCompositeReader<R extends IndexReader> extends Composi
|
||||||
int total = 0; // sum doc counts in subreaders
|
int total = 0; // sum doc counts in subreaders
|
||||||
for (R reader : subReaders) {
|
for (R reader : subReaders) {
|
||||||
int sub = reader.getDocCount(field);
|
int sub = reader.getDocCount(field);
|
||||||
if (sub == -1) {
|
assert sub >= 0;
|
||||||
return -1; // if any of the subs doesn't support it, return -1
|
assert sub <= reader.maxDoc();
|
||||||
}
|
|
||||||
total += sub;
|
total += sub;
|
||||||
}
|
}
|
||||||
return total;
|
return total;
|
||||||
|
@ -176,9 +176,8 @@ public abstract class BaseCompositeReader<R extends IndexReader> extends Composi
|
||||||
long total = 0; // sum doc total term freqs in subreaders
|
long total = 0; // sum doc total term freqs in subreaders
|
||||||
for (R reader : subReaders) {
|
for (R reader : subReaders) {
|
||||||
long sub = reader.getSumTotalTermFreq(field);
|
long sub = reader.getSumTotalTermFreq(field);
|
||||||
if (sub == -1) {
|
assert sub >= 0;
|
||||||
return -1; // if any of the subs doesn't support it, return -1
|
assert sub >= reader.getSumDocFreq(field);
|
||||||
}
|
|
||||||
total += sub;
|
total += sub;
|
||||||
}
|
}
|
||||||
return total;
|
return total;
|
||||||
|
|
|
@ -1253,6 +1253,10 @@ public final class CheckIndex implements Closeable {
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (terms.getDocCount() > maxDoc) {
|
||||||
|
throw new RuntimeException("docCount > maxDoc for field: " + field + ", docCount=" + terms.getDocCount() + ", maxDoc=" + maxDoc);
|
||||||
|
}
|
||||||
|
|
||||||
final boolean hasFreqs = terms.hasFreqs();
|
final boolean hasFreqs = terms.hasFreqs();
|
||||||
final boolean hasPositions = terms.hasPositions();
|
final boolean hasPositions = terms.hasPositions();
|
||||||
final boolean hasPayloads = terms.hasPayloads();
|
final boolean hasPayloads = terms.hasPayloads();
|
||||||
|
@ -1295,12 +1299,6 @@ public final class CheckIndex implements Closeable {
|
||||||
throw new RuntimeException("field \"" + field + "\" should have hasFreqs=" + expectedHasFreqs + " but got " + hasFreqs);
|
throw new RuntimeException("field \"" + field + "\" should have hasFreqs=" + expectedHasFreqs + " but got " + hasFreqs);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (hasFreqs == false) {
|
|
||||||
if (terms.getSumTotalTermFreq() != -1) {
|
|
||||||
throw new RuntimeException("field \"" + field + "\" hasFreqs is false, but Terms.getSumTotalTermFreq()=" + terms.getSumTotalTermFreq() + " (should be -1)");
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if (!isVectors) {
|
if (!isVectors) {
|
||||||
final boolean expectedHasPositions = fieldInfo.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) >= 0;
|
final boolean expectedHasPositions = fieldInfo.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) >= 0;
|
||||||
if (hasPositions != expectedHasPositions) {
|
if (hasPositions != expectedHasPositions) {
|
||||||
|
@ -1375,8 +1373,8 @@ public final class CheckIndex implements Closeable {
|
||||||
postings = termsEnum.postings(postings, PostingsEnum.ALL);
|
postings = termsEnum.postings(postings, PostingsEnum.ALL);
|
||||||
|
|
||||||
if (hasFreqs == false) {
|
if (hasFreqs == false) {
|
||||||
if (termsEnum.totalTermFreq() != -1) {
|
if (termsEnum.totalTermFreq() != termsEnum.docFreq()) {
|
||||||
throw new RuntimeException("field \"" + field + "\" hasFreqs is false, but TermsEnum.totalTermFreq()=" + termsEnum.totalTermFreq() + " (should be -1)");
|
throw new RuntimeException("field \"" + field + "\" hasFreqs is false, but TermsEnum.totalTermFreq()=" + termsEnum.totalTermFreq() + " (should be " + termsEnum.docFreq() + ")");
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1406,14 +1404,11 @@ public final class CheckIndex implements Closeable {
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
visitedDocs.set(doc);
|
visitedDocs.set(doc);
|
||||||
int freq = -1;
|
int freq = postings.freq();
|
||||||
if (hasFreqs) {
|
if (freq <= 0) {
|
||||||
freq = postings.freq();
|
throw new RuntimeException("term " + term + ": doc " + doc + ": freq " + freq + " is out of bounds");
|
||||||
if (freq <= 0) {
|
}
|
||||||
throw new RuntimeException("term " + term + ": doc " + doc + ": freq " + freq + " is out of bounds");
|
if (hasFreqs == false) {
|
||||||
}
|
|
||||||
totalTermFreq += freq;
|
|
||||||
} else {
|
|
||||||
// When a field didn't index freq, it must
|
// When a field didn't index freq, it must
|
||||||
// consistently "lie" and pretend that freq was
|
// consistently "lie" and pretend that freq was
|
||||||
// 1:
|
// 1:
|
||||||
|
@ -1421,6 +1416,8 @@ public final class CheckIndex implements Closeable {
|
||||||
throw new RuntimeException("term " + term + ": doc " + doc + ": freq " + freq + " != 1 when Terms.hasFreqs() is false");
|
throw new RuntimeException("term " + term + ": doc " + doc + ": freq " + freq + " != 1 when Terms.hasFreqs() is false");
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
totalTermFreq += freq;
|
||||||
|
|
||||||
if (liveDocs == null || liveDocs.get(doc)) {
|
if (liveDocs == null || liveDocs.get(doc)) {
|
||||||
hasNonDeletedDocs = true;
|
hasNonDeletedDocs = true;
|
||||||
status.totFreq++;
|
status.totFreq++;
|
||||||
|
@ -1490,19 +1487,25 @@ public final class CheckIndex implements Closeable {
|
||||||
}
|
}
|
||||||
|
|
||||||
final long totalTermFreq2 = termsEnum.totalTermFreq();
|
final long totalTermFreq2 = termsEnum.totalTermFreq();
|
||||||
final boolean hasTotalTermFreq = hasFreqs && totalTermFreq2 != -1;
|
|
||||||
|
|
||||||
if (docCount != docFreq) {
|
if (docCount != docFreq) {
|
||||||
throw new RuntimeException("term " + term + " docFreq=" + docFreq + " != tot docs w/o deletions " + docCount);
|
throw new RuntimeException("term " + term + " docFreq=" + docFreq + " != tot docs w/o deletions " + docCount);
|
||||||
}
|
}
|
||||||
if (hasTotalTermFreq) {
|
if (docFreq > terms.getDocCount()) {
|
||||||
if (totalTermFreq2 <= 0) {
|
throw new RuntimeException("term " + term + " docFreq=" + docFreq + " > docCount=" + terms.getDocCount());
|
||||||
throw new RuntimeException("totalTermFreq: " + totalTermFreq2 + " is out of bounds");
|
}
|
||||||
}
|
if (totalTermFreq2 <= 0) {
|
||||||
sumTotalTermFreq += totalTermFreq;
|
throw new RuntimeException("totalTermFreq: " + totalTermFreq2 + " is out of bounds");
|
||||||
if (totalTermFreq != totalTermFreq2) {
|
}
|
||||||
throw new RuntimeException("term " + term + " totalTermFreq=" + totalTermFreq2 + " != recomputed totalTermFreq=" + totalTermFreq);
|
sumTotalTermFreq += totalTermFreq;
|
||||||
}
|
if (totalTermFreq != totalTermFreq2) {
|
||||||
|
throw new RuntimeException("term " + term + " totalTermFreq=" + totalTermFreq2 + " != recomputed totalTermFreq=" + totalTermFreq);
|
||||||
|
}
|
||||||
|
if (totalTermFreq2 < docFreq) {
|
||||||
|
throw new RuntimeException("totalTermFreq: " + totalTermFreq2 + " is out of bounds, docFreq=" + docFreq);
|
||||||
|
}
|
||||||
|
if (hasFreqs == false && totalTermFreq != docFreq) {
|
||||||
|
throw new RuntimeException("term " + term + " totalTermFreq=" + totalTermFreq + " != docFreq=" + docFreq);
|
||||||
}
|
}
|
||||||
|
|
||||||
// Test skipping
|
// Test skipping
|
||||||
|
@ -1626,22 +1629,22 @@ public final class CheckIndex implements Closeable {
|
||||||
}
|
}
|
||||||
status.blockTreeStats.put(field, stats);
|
status.blockTreeStats.put(field, stats);
|
||||||
|
|
||||||
if (sumTotalTermFreq != 0) {
|
final long actualSumDocFreq = fields.terms(field).getSumDocFreq();
|
||||||
final long v = fields.terms(field).getSumTotalTermFreq();
|
if (sumDocFreq != actualSumDocFreq) {
|
||||||
if (v != -1 && sumTotalTermFreq != v) {
|
throw new RuntimeException("sumDocFreq for field " + field + "=" + actualSumDocFreq + " != recomputed sumDocFreq=" + sumDocFreq);
|
||||||
throw new RuntimeException("sumTotalTermFreq for field " + field + "=" + v + " != recomputed sumTotalTermFreq=" + sumTotalTermFreq);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
final long actualSumTotalTermFreq = fields.terms(field).getSumTotalTermFreq();
|
||||||
|
if (sumTotalTermFreq != actualSumTotalTermFreq) {
|
||||||
|
throw new RuntimeException("sumTotalTermFreq for field " + field + "=" + actualSumTotalTermFreq + " != recomputed sumTotalTermFreq=" + sumTotalTermFreq);
|
||||||
|
}
|
||||||
|
|
||||||
if (sumDocFreq != 0) {
|
if (hasFreqs == false && sumTotalTermFreq != sumDocFreq) {
|
||||||
final long v = fields.terms(field).getSumDocFreq();
|
throw new RuntimeException("sumTotalTermFreq for field " + field + " should be " + sumDocFreq + ", got sumTotalTermFreq=" + sumTotalTermFreq);
|
||||||
if (v != -1 && sumDocFreq != v) {
|
|
||||||
throw new RuntimeException("sumDocFreq for field " + field + "=" + v + " != recomputed sumDocFreq=" + sumDocFreq);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
final int v = fieldTerms.getDocCount();
|
final int v = fieldTerms.getDocCount();
|
||||||
if (v != -1 && visitedDocs.cardinality() != v) {
|
if (visitedDocs.cardinality() != v) {
|
||||||
throw new RuntimeException("docCount for field " + field + "=" + v + " != recomputed docCount=" + visitedDocs.cardinality());
|
throw new RuntimeException("docCount for field " + field + "=" + v + " != recomputed docCount=" + visitedDocs.cardinality());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -450,25 +450,25 @@ public abstract class IndexReader implements Closeable {
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Returns the total number of occurrences of {@code term} across all
|
* Returns the total number of occurrences of {@code term} across all
|
||||||
* documents (the sum of the freq() for each doc that has this term). This
|
* documents (the sum of the freq() for each doc that has this term).
|
||||||
* will be -1 if the codec doesn't support this measure. Note that, like other
|
* Note that, like other term measures, this measure does not take
|
||||||
* term measures, this measure does not take deleted documents into account.
|
* deleted documents into account.
|
||||||
*/
|
*/
|
||||||
public abstract long totalTermFreq(Term term) throws IOException;
|
public abstract long totalTermFreq(Term term) throws IOException;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Returns the sum of {@link TermsEnum#docFreq()} for all terms in this field,
|
* Returns the sum of {@link TermsEnum#docFreq()} for all terms in this field.
|
||||||
* or -1 if this measure isn't stored by the codec. Note that, just like other
|
* Note that, just like other term measures, this measure does not take deleted
|
||||||
* term measures, this measure does not take deleted documents into account.
|
* documents into account.
|
||||||
*
|
*
|
||||||
* @see Terms#getSumDocFreq()
|
* @see Terms#getSumDocFreq()
|
||||||
*/
|
*/
|
||||||
public abstract long getSumDocFreq(String field) throws IOException;
|
public abstract long getSumDocFreq(String field) throws IOException;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Returns the number of documents that have at least one term for this field,
|
* Returns the number of documents that have at least one term for this field.
|
||||||
* or -1 if this measure isn't stored by the codec. Note that, just like other
|
* Note that, just like other term measures, this measure does not take deleted
|
||||||
* term measures, this measure does not take deleted documents into account.
|
* documents into account.
|
||||||
*
|
*
|
||||||
* @see Terms#getDocCount()
|
* @see Terms#getDocCount()
|
||||||
*/
|
*/
|
||||||
|
@ -476,9 +476,8 @@ public abstract class IndexReader implements Closeable {
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Returns the sum of {@link TermsEnum#totalTermFreq} for all terms in this
|
* Returns the sum of {@link TermsEnum#totalTermFreq} for all terms in this
|
||||||
* field, or -1 if this measure isn't stored by the codec (or if this fields
|
* field. Note that, just like other term measures, this measure does not take
|
||||||
* omits term freq and positions). Note that, just like other term measures,
|
* deleted documents into account.
|
||||||
* this measure does not take deleted documents into account.
|
|
||||||
*
|
*
|
||||||
* @see Terms#getSumTotalTermFreq()
|
* @see Terms#getSumTotalTermFreq()
|
||||||
*/
|
*/
|
||||||
|
|
|
@ -149,9 +149,7 @@ public final class MultiTerms extends Terms {
|
||||||
long sum = 0;
|
long sum = 0;
|
||||||
for(Terms terms : subs) {
|
for(Terms terms : subs) {
|
||||||
final long v = terms.getSumTotalTermFreq();
|
final long v = terms.getSumTotalTermFreq();
|
||||||
if (v == -1) {
|
assert v != -1;
|
||||||
return -1;
|
|
||||||
}
|
|
||||||
sum += v;
|
sum += v;
|
||||||
}
|
}
|
||||||
return sum;
|
return sum;
|
||||||
|
@ -162,9 +160,7 @@ public final class MultiTerms extends Terms {
|
||||||
long sum = 0;
|
long sum = 0;
|
||||||
for(Terms terms : subs) {
|
for(Terms terms : subs) {
|
||||||
final long v = terms.getSumDocFreq();
|
final long v = terms.getSumDocFreq();
|
||||||
if (v == -1) {
|
assert v != -1;
|
||||||
return -1;
|
|
||||||
}
|
|
||||||
sum += v;
|
sum += v;
|
||||||
}
|
}
|
||||||
return sum;
|
return sum;
|
||||||
|
@ -175,9 +171,7 @@ public final class MultiTerms extends Terms {
|
||||||
int sum = 0;
|
int sum = 0;
|
||||||
for(Terms terms : subs) {
|
for(Terms terms : subs) {
|
||||||
final int v = terms.getDocCount();
|
final int v = terms.getDocCount();
|
||||||
if (v == -1) {
|
assert v != -1;
|
||||||
return -1;
|
|
||||||
}
|
|
||||||
sum += v;
|
sum += v;
|
||||||
}
|
}
|
||||||
return sum;
|
return sum;
|
||||||
|
|
|
@ -326,9 +326,7 @@ public final class MultiTermsEnum extends TermsEnum {
|
||||||
long sum = 0;
|
long sum = 0;
|
||||||
for(int i=0;i<numTop;i++) {
|
for(int i=0;i<numTop;i++) {
|
||||||
final long v = top[i].terms.totalTermFreq();
|
final long v = top[i].terms.totalTermFreq();
|
||||||
if (v == -1) {
|
assert v != -1;
|
||||||
return v;
|
|
||||||
}
|
|
||||||
sum += v;
|
sum += v;
|
||||||
}
|
}
|
||||||
return sum;
|
return sum;
|
||||||
|
|
|
@ -101,7 +101,7 @@ class SortedDocValuesTermsEnum extends TermsEnum {
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public long totalTermFreq() {
|
public long totalTermFreq() {
|
||||||
return -1;
|
throw new UnsupportedOperationException();
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
|
|
@ -101,7 +101,7 @@ class SortedSetDocValuesTermsEnum extends TermsEnum {
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public long totalTermFreq() {
|
public long totalTermFreq() {
|
||||||
return -1;
|
throw new UnsupportedOperationException();
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
|
|
@ -141,11 +141,11 @@ public final class TermContext {
|
||||||
|
|
||||||
/** Expert: Accumulate term statistics. */
|
/** Expert: Accumulate term statistics. */
|
||||||
public void accumulateStatistics(final int docFreq, final long totalTermFreq) {
|
public void accumulateStatistics(final int docFreq, final long totalTermFreq) {
|
||||||
|
assert docFreq >= 0;
|
||||||
|
assert totalTermFreq >= 0;
|
||||||
|
assert docFreq <= totalTermFreq;
|
||||||
this.docFreq += docFreq;
|
this.docFreq += docFreq;
|
||||||
if (this.totalTermFreq >= 0 && totalTermFreq >= 0)
|
this.totalTermFreq += totalTermFreq;
|
||||||
this.totalTermFreq += totalTermFreq;
|
|
||||||
else
|
|
||||||
this.totalTermFreq = -1;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|
|
@ -99,25 +99,21 @@ public abstract class Terms {
|
||||||
* other term measures, this measure does not take deleted
|
* other term measures, this measure does not take deleted
|
||||||
* documents into account. */
|
* documents into account. */
|
||||||
public abstract long size() throws IOException;
|
public abstract long size() throws IOException;
|
||||||
|
|
||||||
/** Returns the sum of {@link TermsEnum#totalTermFreq} for
|
/** Returns the sum of {@link TermsEnum#totalTermFreq} for
|
||||||
* all terms in this field, or -1 if this measure isn't
|
* all terms in this field. Note that, just like other term
|
||||||
* stored by the codec (or if this fields omits term freq
|
|
||||||
* and positions). Note that, just like other term
|
|
||||||
* measures, this measure does not take deleted documents
|
* measures, this measure does not take deleted documents
|
||||||
* into account. */
|
* into account. */
|
||||||
public abstract long getSumTotalTermFreq() throws IOException;
|
public abstract long getSumTotalTermFreq() throws IOException;
|
||||||
|
|
||||||
/** Returns the sum of {@link TermsEnum#docFreq()} for
|
/** Returns the sum of {@link TermsEnum#docFreq()} for
|
||||||
* all terms in this field, or -1 if this measure isn't
|
* all terms in this field. Note that, just like other term
|
||||||
* stored by the codec. Note that, just like other term
|
|
||||||
* measures, this measure does not take deleted documents
|
* measures, this measure does not take deleted documents
|
||||||
* into account. */
|
* into account. */
|
||||||
public abstract long getSumDocFreq() throws IOException;
|
public abstract long getSumDocFreq() throws IOException;
|
||||||
|
|
||||||
/** Returns the number of documents that have at least one
|
/** Returns the number of documents that have at least one
|
||||||
* term for this field, or -1 if this measure isn't
|
* term for this field. Note that, just like other term
|
||||||
* stored by the codec. Note that, just like other term
|
|
||||||
* measures, this measure does not take deleted documents
|
* measures, this measure does not take deleted documents
|
||||||
* into account. */
|
* into account. */
|
||||||
public abstract int getDocCount() throws IOException;
|
public abstract int getDocCount() throws IOException;
|
||||||
|
|
|
@ -131,8 +131,7 @@ public abstract class TermsEnum implements BytesRefIterator {
|
||||||
|
|
||||||
/** Returns the total number of occurrences of this term
|
/** Returns the total number of occurrences of this term
|
||||||
* across all documents (the sum of the freq() for each
|
* across all documents (the sum of the freq() for each
|
||||||
* doc that has this term). This will be -1 if the
|
* doc that has this term). Note that, like
|
||||||
* codec doesn't support this measure. Note that, like
|
|
||||||
* other term measures, this measure does not take
|
* other term measures, this measure does not take
|
||||||
* deleted documents into account. */
|
* deleted documents into account. */
|
||||||
public abstract long totalTermFreq() throws IOException;
|
public abstract long totalTermFreq() throws IOException;
|
||||||
|
|
|
@ -148,12 +148,8 @@
|
||||||
* deleted documents, when segments are merged the statistic is updated as
|
* deleted documents, when segments are merged the statistic is updated as
|
||||||
* those deleted documents are merged away.
|
* those deleted documents are merged away.
|
||||||
* <li>{@link org.apache.lucene.index.TermsEnum#totalTermFreq}: Returns the number
|
* <li>{@link org.apache.lucene.index.TermsEnum#totalTermFreq}: Returns the number
|
||||||
* of occurrences of this term across all documents. Note that this statistic
|
* of occurrences of this term across all documents. Like docFreq(), it will
|
||||||
* is unavailable (returns <code>-1</code>) if term frequencies were omitted
|
* also count occurrences that appear in deleted documents.
|
||||||
* from the index
|
|
||||||
* ({@link org.apache.lucene.index.IndexOptions#DOCS DOCS})
|
|
||||||
* for the field. Like docFreq(), it will also count occurrences that appear in
|
|
||||||
* deleted documents.
|
|
||||||
* </ul>
|
* </ul>
|
||||||
* <a name="fieldstats"></a>
|
* <a name="fieldstats"></a>
|
||||||
* <h3>
|
* <h3>
|
||||||
|
@ -180,10 +176,7 @@
|
||||||
* of tokens for the field. This can be thought of as the sum of
|
* of tokens for the field. This can be thought of as the sum of
|
||||||
* {@link org.apache.lucene.index.TermsEnum#totalTermFreq} across all terms in the
|
* {@link org.apache.lucene.index.TermsEnum#totalTermFreq} across all terms in the
|
||||||
* field, and like totalTermFreq() it will also count occurrences that appear in
|
* field, and like totalTermFreq() it will also count occurrences that appear in
|
||||||
* deleted documents, and will be unavailable (returns <code>-1</code>) if term
|
* deleted documents.
|
||||||
* frequencies were omitted from the index
|
|
||||||
* ({@link org.apache.lucene.index.IndexOptions#DOCS DOCS})
|
|
||||||
* for the field.
|
|
||||||
* </ul>
|
* </ul>
|
||||||
* <a name="segmentstats"></a>
|
* <a name="segmentstats"></a>
|
||||||
* <h3>
|
* <h3>
|
||||||
|
|
|
@ -277,11 +277,7 @@ public final class BlendedTermQuery extends Query {
|
||||||
long ttf = 0;
|
long ttf = 0;
|
||||||
for (TermContext ctx : contexts) {
|
for (TermContext ctx : contexts) {
|
||||||
df = Math.max(df, ctx.docFreq());
|
df = Math.max(df, ctx.docFreq());
|
||||||
if (ctx.totalTermFreq() == -1L) {
|
ttf += ctx.totalTermFreq();
|
||||||
ttf = -1L;
|
|
||||||
} else if (ttf != -1L) {
|
|
||||||
ttf += ctx.totalTermFreq();
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
for (int i = 0; i < contexts.length; ++i) {
|
for (int i = 0; i < contexts.length; ++i) {
|
||||||
|
|
|
@ -26,6 +26,7 @@ import java.util.List;
|
||||||
import java.util.Map;
|
import java.util.Map;
|
||||||
import java.util.Set;
|
import java.util.Set;
|
||||||
|
|
||||||
|
import org.apache.lucene.index.IndexReader;
|
||||||
import org.apache.lucene.index.LeafReaderContext;
|
import org.apache.lucene.index.LeafReaderContext;
|
||||||
import org.apache.lucene.index.Term;
|
import org.apache.lucene.index.Term;
|
||||||
import org.apache.lucene.search.BooleanClause.Occur;
|
import org.apache.lucene.search.BooleanClause.Occur;
|
||||||
|
@ -299,6 +300,11 @@ final class BooleanWeight extends Weight {
|
||||||
return scorerSupplier.get(Long.MAX_VALUE);
|
return scorerSupplier.get(Long.MAX_VALUE);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public IndexReader.CacheHelper getCacheHelper(LeafReaderContext context) {
|
||||||
|
return getCacheHelper(context, weights);
|
||||||
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public ScorerSupplier scorerSupplier(LeafReaderContext context) throws IOException {
|
public ScorerSupplier scorerSupplier(LeafReaderContext context) throws IOException {
|
||||||
int minShouldMatch = query.getMinimumNumberShouldMatch();
|
int minShouldMatch = query.getMinimumNumberShouldMatch();
|
||||||
|
|
|
@ -23,7 +23,27 @@ import org.apache.lucene.index.Terms; // javadocs
|
||||||
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Contains statistics for a collection (field)
|
* Contains statistics for a collection (field).
|
||||||
|
* <p>
|
||||||
|
* This class holds statistics across all documents for scoring purposes:
|
||||||
|
* <ul>
|
||||||
|
* <li> {@link #maxDoc()}: number of documents.
|
||||||
|
* <li> {@link #docCount()}: number of documents that contain this field.
|
||||||
|
* <li> {@link #sumDocFreq()}: number of postings-list entries.
|
||||||
|
* <li> {@link #sumTotalTermFreq()}: number of tokens.
|
||||||
|
* </ul>
|
||||||
|
* <p>
|
||||||
|
* The following conditions are always true:
|
||||||
|
* <ul>
|
||||||
|
* <li> All statistics are positive integers: never zero or negative.
|
||||||
|
* <li> {@code docCount} <= {@code maxDoc}
|
||||||
|
* <li> {@code docCount} <= {@code sumDocFreq} <= {@code sumTotalTermFreq}
|
||||||
|
* </ul>
|
||||||
|
* <p>
|
||||||
|
* Values may include statistics on deleted documents that have not yet been merged away.
|
||||||
|
* <p>
|
||||||
|
* Be careful when performing calculations on these values because they are represented
|
||||||
|
* as 64-bit integer values, you may need to cast to {@code double} for your use.
|
||||||
* @lucene.experimental
|
* @lucene.experimental
|
||||||
*/
|
*/
|
||||||
public class CollectionStatistics {
|
public class CollectionStatistics {
|
||||||
|
@ -51,33 +71,23 @@ public class CollectionStatistics {
|
||||||
if (maxDoc <= 0) {
|
if (maxDoc <= 0) {
|
||||||
throw new IllegalArgumentException("maxDoc must be positive, maxDoc: " + maxDoc);
|
throw new IllegalArgumentException("maxDoc must be positive, maxDoc: " + maxDoc);
|
||||||
}
|
}
|
||||||
if (docCount != -1) {
|
if (docCount <= 0) {
|
||||||
if (docCount <= 0) {
|
throw new IllegalArgumentException("docCount must be positive, docCount: " + docCount);
|
||||||
throw new IllegalArgumentException("docCount must be positive, docCount: " + docCount);
|
|
||||||
}
|
|
||||||
if (docCount > maxDoc) {
|
|
||||||
throw new IllegalArgumentException("docCount must not exceed maxDoc, docCount: " + docCount + ", maxDoc: " + maxDoc);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
if (sumDocFreq != -1) {
|
if (docCount > maxDoc) {
|
||||||
if (sumDocFreq <= 0) {
|
throw new IllegalArgumentException("docCount must not exceed maxDoc, docCount: " + docCount + ", maxDoc: " + maxDoc);
|
||||||
throw new IllegalArgumentException("sumDocFreq must be positive, sumDocFreq: " + sumDocFreq);
|
|
||||||
}
|
|
||||||
if (docCount != -1) {
|
|
||||||
if (sumDocFreq < docCount) {
|
|
||||||
throw new IllegalArgumentException("sumDocFreq must be at least docCount, sumDocFreq: " + sumDocFreq + ", docCount: " + docCount);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
if (sumTotalTermFreq != -1) {
|
if (sumDocFreq <= 0) {
|
||||||
if (sumTotalTermFreq <= 0) {
|
throw new IllegalArgumentException("sumDocFreq must be positive, sumDocFreq: " + sumDocFreq);
|
||||||
throw new IllegalArgumentException("sumTotalTermFreq must be positive, sumTotalTermFreq: " + sumTotalTermFreq);
|
}
|
||||||
}
|
if (sumDocFreq < docCount) {
|
||||||
if (sumDocFreq != -1) {
|
throw new IllegalArgumentException("sumDocFreq must be at least docCount, sumDocFreq: " + sumDocFreq + ", docCount: " + docCount);
|
||||||
if (sumTotalTermFreq < sumDocFreq) {
|
}
|
||||||
throw new IllegalArgumentException("sumTotalTermFreq must be at least sumDocFreq, sumTotalTermFreq: " + sumTotalTermFreq + ", sumDocFreq: " + sumDocFreq);
|
if (sumTotalTermFreq <= 0) {
|
||||||
}
|
throw new IllegalArgumentException("sumTotalTermFreq must be positive, sumTotalTermFreq: " + sumTotalTermFreq);
|
||||||
}
|
}
|
||||||
|
if (sumTotalTermFreq < sumDocFreq) {
|
||||||
|
throw new IllegalArgumentException("sumTotalTermFreq must be at least sumDocFreq, sumTotalTermFreq: " + sumTotalTermFreq + ", sumDocFreq: " + sumDocFreq);
|
||||||
}
|
}
|
||||||
this.field = field;
|
this.field = field;
|
||||||
this.maxDoc = maxDoc;
|
this.maxDoc = maxDoc;
|
||||||
|
@ -86,33 +96,65 @@ public class CollectionStatistics {
|
||||||
this.sumDocFreq = sumDocFreq;
|
this.sumDocFreq = sumDocFreq;
|
||||||
}
|
}
|
||||||
|
|
||||||
/** returns the field name */
|
/**
|
||||||
|
* The field's name.
|
||||||
|
* <p>
|
||||||
|
* This value is never {@code null}.
|
||||||
|
* @return field's name, not {@code null}
|
||||||
|
*/
|
||||||
public final String field() {
|
public final String field() {
|
||||||
return field;
|
return field;
|
||||||
}
|
}
|
||||||
|
|
||||||
/** returns the total number of documents, regardless of
|
/**
|
||||||
* whether they all contain values for this field.
|
* The total number of documents, regardless of
|
||||||
* @see IndexReader#maxDoc() */
|
* whether they all contain values for this field.
|
||||||
|
* <p>
|
||||||
|
* This value is always a positive number.
|
||||||
|
* @return total number of documents, in the range [1 .. {@link Long#MAX_VALUE}]
|
||||||
|
* @see IndexReader#maxDoc()
|
||||||
|
*/
|
||||||
public final long maxDoc() {
|
public final long maxDoc() {
|
||||||
return maxDoc;
|
return maxDoc;
|
||||||
}
|
}
|
||||||
|
|
||||||
/** returns the total number of documents that
|
/**
|
||||||
* have at least one term for this field.
|
* The total number of documents that have at least
|
||||||
* @see Terms#getDocCount() */
|
* one term for this field.
|
||||||
|
* <p>
|
||||||
|
* This value is always a positive number, and never
|
||||||
|
* exceeds {@link #maxDoc()}.
|
||||||
|
* @return total number of documents containing this field, in the range [1 .. {@link #maxDoc()}]
|
||||||
|
* @see Terms#getDocCount()
|
||||||
|
*/
|
||||||
public final long docCount() {
|
public final long docCount() {
|
||||||
return docCount;
|
return docCount;
|
||||||
}
|
}
|
||||||
|
|
||||||
/** returns the total number of tokens for this field
|
/**
|
||||||
* @see Terms#getSumTotalTermFreq() */
|
* The total number of tokens for this field.
|
||||||
|
* This is the "word count" for this field across all documents.
|
||||||
|
* It is the sum of {@link TermStatistics#totalTermFreq()} across all terms.
|
||||||
|
* It is also the sum of each document's field length across all documents.
|
||||||
|
* <p>
|
||||||
|
* This value is always a positive number, and always at least {@link #sumDocFreq()}.
|
||||||
|
* @return total number of tokens in the field, in the range [{@link #sumDocFreq()} .. {@link Long#MAX_VALUE}]
|
||||||
|
* @see Terms#getSumTotalTermFreq()
|
||||||
|
*/
|
||||||
public final long sumTotalTermFreq() {
|
public final long sumTotalTermFreq() {
|
||||||
return sumTotalTermFreq;
|
return sumTotalTermFreq;
|
||||||
}
|
}
|
||||||
|
|
||||||
/** returns the total number of postings for this field
|
/**
|
||||||
* @see Terms#getSumDocFreq() */
|
* The total number of posting list entries for this field.
|
||||||
|
* This is the sum of term-document pairs: the sum of {@link TermStatistics#docFreq()} across all terms.
|
||||||
|
* It is also the sum of each document's unique term count for this field across all documents.
|
||||||
|
* <p>
|
||||||
|
* This value is always a positive number, always at least {@link #docCount()}, and never
|
||||||
|
* exceeds {@link #sumTotalTermFreq()}.
|
||||||
|
* @return number of posting list entries, in the range [{@link #docCount()} .. {@link #sumTotalTermFreq()}]
|
||||||
|
* @see Terms#getSumDocFreq()
|
||||||
|
*/
|
||||||
public final long sumDocFreq() {
|
public final long sumDocFreq() {
|
||||||
return sumDocFreq;
|
return sumDocFreq;
|
||||||
}
|
}
|
||||||
|
|
|
@ -167,6 +167,11 @@ public final class ConstantScoreQuery extends Query {
|
||||||
return scorerSupplier.get(Long.MAX_VALUE);
|
return scorerSupplier.get(Long.MAX_VALUE);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public IndexReader.CacheHelper getCacheHelper(LeafReaderContext context) {
|
||||||
|
return innerWeight.getCacheHelper(context);
|
||||||
|
}
|
||||||
|
|
||||||
};
|
};
|
||||||
} else {
|
} else {
|
||||||
return innerWeight;
|
return innerWeight;
|
||||||
|
|
|
@ -137,6 +137,11 @@ public final class DisjunctionMaxQuery extends Query implements Iterable<Query>
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public IndexReader.CacheHelper getCacheHelper(LeafReaderContext context) {
|
||||||
|
return getCacheHelper(context, weights);
|
||||||
|
}
|
||||||
|
|
||||||
/** Explain the score we computed for doc */
|
/** Explain the score we computed for doc */
|
||||||
@Override
|
@Override
|
||||||
public Explanation explain(LeafReaderContext context, int doc) throws IOException {
|
public Explanation explain(LeafReaderContext context, int doc) throws IOException {
|
||||||
|
|
|
@ -23,6 +23,7 @@ import java.util.Objects;
|
||||||
import org.apache.lucene.index.DocValuesType;
|
import org.apache.lucene.index.DocValuesType;
|
||||||
import org.apache.lucene.index.FieldInfo;
|
import org.apache.lucene.index.FieldInfo;
|
||||||
import org.apache.lucene.index.FieldInfos;
|
import org.apache.lucene.index.FieldInfos;
|
||||||
|
import org.apache.lucene.index.IndexReader;
|
||||||
import org.apache.lucene.index.LeafReader;
|
import org.apache.lucene.index.LeafReader;
|
||||||
import org.apache.lucene.index.LeafReaderContext;
|
import org.apache.lucene.index.LeafReaderContext;
|
||||||
|
|
||||||
|
@ -97,6 +98,11 @@ public final class DocValuesFieldExistsQuery extends Query {
|
||||||
|
|
||||||
return new ConstantScoreScorer(this, score(), iterator);
|
return new ConstantScoreScorer(this, score(), iterator);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public IndexReader.CacheHelper getCacheHelper(LeafReaderContext context) {
|
||||||
|
return getDocValuesCacheHelper(field, context);
|
||||||
|
}
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -86,17 +86,17 @@ public final class DocValuesRewriteMethod extends MultiTermQuery.RewriteMethod {
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public long getSumTotalTermFreq() {
|
public long getSumTotalTermFreq() {
|
||||||
return -1;
|
throw new UnsupportedOperationException();
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public long getSumDocFreq() {
|
public long getSumDocFreq() {
|
||||||
return -1;
|
throw new UnsupportedOperationException();
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public int getDocCount() {
|
public int getDocCount() {
|
||||||
return -1;
|
throw new UnsupportedOperationException();
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
@ -158,6 +158,11 @@ public final class DocValuesRewriteMethod extends MultiTermQuery.RewriteMethod {
|
||||||
}
|
}
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public IndexReader.CacheHelper getCacheHelper(LeafReaderContext context) {
|
||||||
|
return getDocValuesCacheHelper(query.field, context);
|
||||||
|
}
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -19,6 +19,7 @@ package org.apache.lucene.search;
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.util.Set;
|
import java.util.Set;
|
||||||
|
|
||||||
|
import org.apache.lucene.index.IndexReader;
|
||||||
import org.apache.lucene.index.LeafReaderContext;
|
import org.apache.lucene.index.LeafReaderContext;
|
||||||
import org.apache.lucene.index.Term;
|
import org.apache.lucene.index.Term;
|
||||||
|
|
||||||
|
@ -55,6 +56,11 @@ public abstract class FilterWeight extends Weight {
|
||||||
this.in = weight;
|
this.in = weight;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public IndexReader.CacheHelper getCacheHelper(LeafReaderContext context) {
|
||||||
|
return in.getCacheHelper(context);
|
||||||
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public void extractTerms(Set<Term> terms) {
|
public void extractTerms(Set<Term> terms) {
|
||||||
in.extractTerms(terms);
|
in.extractTerms(terms);
|
||||||
|
|
|
@ -169,6 +169,13 @@ public final class IndexOrDocValuesQuery extends Query {
|
||||||
}
|
}
|
||||||
return scorerSupplier.get(Long.MAX_VALUE);
|
return scorerSupplier.get(Long.MAX_VALUE);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public IndexReader.CacheHelper getCacheHelper(LeafReaderContext context) {
|
||||||
|
// Both index and dv query should return the same values, so we can use
|
||||||
|
// the index query's cachehelper here
|
||||||
|
return indexWeight.getCacheHelper(context);
|
||||||
|
}
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -722,8 +722,7 @@ public class LRUQueryCache implements QueryCache, Accountable {
|
||||||
policy.onUse(getQuery());
|
policy.onUse(getQuery());
|
||||||
}
|
}
|
||||||
|
|
||||||
// TODO: should it be pluggable, eg. for queries that run on doc values?
|
final IndexReader.CacheHelper cacheHelper = in.getCacheHelper(context);
|
||||||
final IndexReader.CacheHelper cacheHelper = context.reader().getCoreCacheHelper();
|
|
||||||
if (cacheHelper == null) {
|
if (cacheHelper == null) {
|
||||||
// this segment is not suitable for caching
|
// this segment is not suitable for caching
|
||||||
return in.scorerSupplier(context);
|
return in.scorerSupplier(context);
|
||||||
|
@ -788,14 +787,18 @@ public class LRUQueryCache implements QueryCache, Accountable {
|
||||||
return scorerSupplier.get(Long.MAX_VALUE);
|
return scorerSupplier.get(Long.MAX_VALUE);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public IndexReader.CacheHelper getCacheHelper(LeafReaderContext context) {
|
||||||
|
return in.getCacheHelper(context);
|
||||||
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public BulkScorer bulkScorer(LeafReaderContext context) throws IOException {
|
public BulkScorer bulkScorer(LeafReaderContext context) throws IOException {
|
||||||
if (used.compareAndSet(false, true)) {
|
if (used.compareAndSet(false, true)) {
|
||||||
policy.onUse(getQuery());
|
policy.onUse(getQuery());
|
||||||
}
|
}
|
||||||
|
|
||||||
// TODO: should it be pluggable, eg. for queries that run on doc values?
|
final IndexReader.CacheHelper cacheHelper = in.getCacheHelper(context);
|
||||||
final IndexReader.CacheHelper cacheHelper = context.reader().getCoreCacheHelper();
|
|
||||||
if (cacheHelper == null) {
|
if (cacheHelper == null) {
|
||||||
// this segment is not suitable for caching
|
// this segment is not suitable for caching
|
||||||
return in.bulkScorer(context);
|
return in.bulkScorer(context);
|
||||||
|
|
|
@ -19,6 +19,7 @@ package org.apache.lucene.search;
|
||||||
|
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
|
|
||||||
|
import org.apache.lucene.index.IndexReader;
|
||||||
import org.apache.lucene.index.LeafReaderContext;
|
import org.apache.lucene.index.LeafReaderContext;
|
||||||
import org.apache.lucene.util.Bits;
|
import org.apache.lucene.util.Bits;
|
||||||
|
|
||||||
|
@ -39,6 +40,12 @@ public final class MatchAllDocsQuery extends Query {
|
||||||
public Scorer scorer(LeafReaderContext context) throws IOException {
|
public Scorer scorer(LeafReaderContext context) throws IOException {
|
||||||
return new ConstantScoreScorer(this, score(), DocIdSetIterator.all(context.reader().maxDoc()));
|
return new ConstantScoreScorer(this, score(), DocIdSetIterator.all(context.reader().maxDoc()));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public IndexReader.CacheHelper getCacheHelper(LeafReaderContext context) {
|
||||||
|
return context.reader().getCoreCacheHelper();
|
||||||
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public BulkScorer bulkScorer(LeafReaderContext context) throws IOException {
|
public BulkScorer bulkScorer(LeafReaderContext context) throws IOException {
|
||||||
final float score = score();
|
final float score = score();
|
||||||
|
|
|
@ -20,6 +20,7 @@ package org.apache.lucene.search;
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.util.Set;
|
import java.util.Set;
|
||||||
|
|
||||||
|
import org.apache.lucene.index.IndexReader;
|
||||||
import org.apache.lucene.index.LeafReaderContext;
|
import org.apache.lucene.index.LeafReaderContext;
|
||||||
import org.apache.lucene.index.Term;
|
import org.apache.lucene.index.Term;
|
||||||
|
|
||||||
|
@ -58,6 +59,11 @@ public class MatchNoDocsQuery extends Query {
|
||||||
return null;
|
return null;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public IndexReader.CacheHelper getCacheHelper(LeafReaderContext context) {
|
||||||
|
return context.reader().getCoreCacheHelper();
|
||||||
|
}
|
||||||
|
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -291,6 +291,11 @@ public class MultiPhraseQuery extends Query {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public IndexReader.CacheHelper getCacheHelper(LeafReaderContext context) {
|
||||||
|
return context.reader().getCoreCacheHelper();
|
||||||
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public Explanation explain(LeafReaderContext context, int doc) throws IOException {
|
public Explanation explain(LeafReaderContext context, int doc) throws IOException {
|
||||||
Scorer scorer = scorer(context);
|
Scorer scorer = scorer(context);
|
||||||
|
|
|
@ -22,6 +22,7 @@ import java.util.ArrayList;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
import java.util.Objects;
|
import java.util.Objects;
|
||||||
|
|
||||||
|
import org.apache.lucene.index.IndexReader;
|
||||||
import org.apache.lucene.index.LeafReaderContext;
|
import org.apache.lucene.index.LeafReaderContext;
|
||||||
import org.apache.lucene.index.PostingsEnum;
|
import org.apache.lucene.index.PostingsEnum;
|
||||||
import org.apache.lucene.index.Term;
|
import org.apache.lucene.index.Term;
|
||||||
|
@ -211,6 +212,11 @@ final class MultiTermQueryConstantScoreWrapper<Q extends MultiTermQuery> extends
|
||||||
return scorer(weightOrBitSet.set);
|
return scorer(weightOrBitSet.set);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public IndexReader.CacheHelper getCacheHelper(LeafReaderContext context) {
|
||||||
|
return context.reader().getCoreCacheHelper();
|
||||||
|
}
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -23,6 +23,7 @@ import java.util.Objects;
|
||||||
import org.apache.lucene.document.StringField;
|
import org.apache.lucene.document.StringField;
|
||||||
import org.apache.lucene.index.FieldInfo;
|
import org.apache.lucene.index.FieldInfo;
|
||||||
import org.apache.lucene.index.FieldInfos;
|
import org.apache.lucene.index.FieldInfos;
|
||||||
|
import org.apache.lucene.index.IndexReader;
|
||||||
import org.apache.lucene.index.LeafReader;
|
import org.apache.lucene.index.LeafReader;
|
||||||
import org.apache.lucene.index.LeafReaderContext;
|
import org.apache.lucene.index.LeafReaderContext;
|
||||||
|
|
||||||
|
@ -75,6 +76,11 @@ public final class NormsFieldExistsQuery extends Query {
|
||||||
DocIdSetIterator iterator = reader.getNormValues(field);
|
DocIdSetIterator iterator = reader.getNormValues(field);
|
||||||
return new ConstantScoreScorer(this, score(), iterator);
|
return new ConstantScoreScorer(this, score(), iterator);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public IndexReader.CacheHelper getCacheHelper(LeafReaderContext context) {
|
||||||
|
return context.reader().getCoreCacheHelper();
|
||||||
|
}
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -441,7 +441,12 @@ public class PhraseQuery extends Query {
|
||||||
needsScores, totalMatchCost);
|
needsScores, totalMatchCost);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public IndexReader.CacheHelper getCacheHelper(LeafReaderContext context) {
|
||||||
|
return context.reader().getCoreCacheHelper();
|
||||||
|
}
|
||||||
|
|
||||||
// only called from assert
|
// only called from assert
|
||||||
private boolean termNotInReader(LeafReader reader, Term term) throws IOException {
|
private boolean termNotInReader(LeafReader reader, Term term) throws IOException {
|
||||||
return reader.docFreq(term) == 0;
|
return reader.docFreq(term) == 0;
|
||||||
|
@ -492,14 +497,13 @@ public class PhraseQuery extends Query {
|
||||||
* of processing the occurrences of a term
|
* of processing the occurrences of a term
|
||||||
* in a document that contains the term.
|
* in a document that contains the term.
|
||||||
* This is for use by {@link TwoPhaseIterator#matchCost} implementations.
|
* This is for use by {@link TwoPhaseIterator#matchCost} implementations.
|
||||||
* <br>This may be inaccurate when {@link TermsEnum#totalTermFreq()} is not available.
|
|
||||||
* @param termsEnum The term is the term at which this TermsEnum is positioned.
|
* @param termsEnum The term is the term at which this TermsEnum is positioned.
|
||||||
*/
|
*/
|
||||||
static float termPositionsCost(TermsEnum termsEnum) throws IOException {
|
static float termPositionsCost(TermsEnum termsEnum) throws IOException {
|
||||||
int docFreq = termsEnum.docFreq();
|
int docFreq = termsEnum.docFreq();
|
||||||
assert docFreq > 0;
|
assert docFreq > 0;
|
||||||
long totalTermFreq = termsEnum.totalTermFreq(); // -1 when not available
|
long totalTermFreq = termsEnum.totalTermFreq();
|
||||||
float expOccurrencesInMatchingDoc = (totalTermFreq < docFreq) ? 1 : (totalTermFreq / (float) docFreq);
|
float expOccurrencesInMatchingDoc = totalTermFreq / (float) docFreq;
|
||||||
return TERM_POSNS_SEEK_OPS_PER_DOC + expOccurrencesInMatchingDoc * TERM_OPS_PER_POS;
|
return TERM_POSNS_SEEK_OPS_PER_DOC + expOccurrencesInMatchingDoc * TERM_OPS_PER_POS;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -23,6 +23,7 @@ import java.util.Collection;
|
||||||
import java.util.Iterator;
|
import java.util.Iterator;
|
||||||
import java.util.NoSuchElementException;
|
import java.util.NoSuchElementException;
|
||||||
import org.apache.lucene.document.IntPoint;
|
import org.apache.lucene.document.IntPoint;
|
||||||
|
import org.apache.lucene.index.IndexReader;
|
||||||
import org.apache.lucene.index.LeafReader;
|
import org.apache.lucene.index.LeafReader;
|
||||||
import org.apache.lucene.index.LeafReaderContext;
|
import org.apache.lucene.index.LeafReaderContext;
|
||||||
import org.apache.lucene.index.PointValues.IntersectVisitor;
|
import org.apache.lucene.index.PointValues.IntersectVisitor;
|
||||||
|
@ -150,6 +151,11 @@ public abstract class PointInSetQuery extends Query {
|
||||||
|
|
||||||
return new ConstantScoreScorer(this, score(), result.build().iterator());
|
return new ConstantScoreScorer(this, score(), result.build().iterator());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public IndexReader.CacheHelper getCacheHelper(LeafReaderContext context) {
|
||||||
|
return context.reader().getCoreCacheHelper();
|
||||||
|
}
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -20,6 +20,7 @@ import java.io.IOException;
|
||||||
import java.util.Arrays;
|
import java.util.Arrays;
|
||||||
import java.util.Objects;
|
import java.util.Objects;
|
||||||
|
|
||||||
|
import org.apache.lucene.index.IndexReader;
|
||||||
import org.apache.lucene.index.PointValues;
|
import org.apache.lucene.index.PointValues;
|
||||||
import org.apache.lucene.index.PointValues.IntersectVisitor;
|
import org.apache.lucene.index.PointValues.IntersectVisitor;
|
||||||
import org.apache.lucene.index.PointValues.Relation;
|
import org.apache.lucene.index.PointValues.Relation;
|
||||||
|
@ -321,6 +322,11 @@ public abstract class PointRangeQuery extends Query {
|
||||||
}
|
}
|
||||||
return scorerSupplier.get(Long.MAX_VALUE);
|
return scorerSupplier.get(Long.MAX_VALUE);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public IndexReader.CacheHelper getCacheHelper(LeafReaderContext context) {
|
||||||
|
return context.reader().getCoreCacheHelper();
|
||||||
|
}
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -140,11 +140,7 @@ public final class SynonymQuery extends Query {
|
||||||
TermStatistics termStats = searcher.termStatistics(terms[i], termContexts[i]);
|
TermStatistics termStats = searcher.termStatistics(terms[i], termContexts[i]);
|
||||||
if (termStats != null) {
|
if (termStats != null) {
|
||||||
docFreq = Math.max(termStats.docFreq(), docFreq);
|
docFreq = Math.max(termStats.docFreq(), docFreq);
|
||||||
if (termStats.totalTermFreq() == -1) {
|
totalTermFreq += termStats.totalTermFreq();
|
||||||
totalTermFreq = -1;
|
|
||||||
} else if (totalTermFreq != -1) {
|
|
||||||
totalTermFreq += termStats.totalTermFreq();
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
this.similarity = searcher.getSimilarity(true);
|
this.similarity = searcher.getSimilarity(true);
|
||||||
|
@ -217,6 +213,11 @@ public final class SynonymQuery extends Query {
|
||||||
return new SynonymScorer(simScorer, this, subScorers);
|
return new SynonymScorer(simScorer, this, subScorers);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public IndexReader.CacheHelper getCacheHelper(LeafReaderContext context) {
|
||||||
|
return context.reader().getCoreCacheHelper();
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
static class SynonymScorer extends DisjunctionScorer {
|
static class SynonymScorer extends DisjunctionScorer {
|
||||||
|
|
|
@ -315,6 +315,11 @@ public class TermInSetQuery extends Query implements Accountable {
|
||||||
return scorer(weightOrBitSet.set);
|
return scorer(weightOrBitSet.set);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public IndexReader.CacheHelper getCacheHelper(LeafReaderContext context) {
|
||||||
|
return context.reader().getCoreCacheHelper();
|
||||||
|
}
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -21,6 +21,7 @@ import java.io.IOException;
|
||||||
import java.util.Objects;
|
import java.util.Objects;
|
||||||
import java.util.Set;
|
import java.util.Set;
|
||||||
|
|
||||||
|
import org.apache.lucene.index.IndexReader;
|
||||||
import org.apache.lucene.index.IndexReaderContext;
|
import org.apache.lucene.index.IndexReaderContext;
|
||||||
import org.apache.lucene.index.LeafReader;
|
import org.apache.lucene.index.LeafReader;
|
||||||
import org.apache.lucene.index.LeafReaderContext;
|
import org.apache.lucene.index.LeafReaderContext;
|
||||||
|
@ -65,9 +66,9 @@ public class TermQuery extends Query {
|
||||||
collectionStats = searcher.collectionStatistics(term.field());
|
collectionStats = searcher.collectionStatistics(term.field());
|
||||||
termStats = searcher.termStatistics(term, termStates);
|
termStats = searcher.termStatistics(term, termStates);
|
||||||
} else {
|
} else {
|
||||||
// we do not need the actual stats, use fake stats with docFreq=maxDoc=1 and ttf=-1
|
// we do not need the actual stats, use fake stats with docFreq=maxDoc=ttf=1
|
||||||
collectionStats = new CollectionStatistics(term.field(), 1, -1, -1, -1);
|
collectionStats = new CollectionStatistics(term.field(), 1, 1, 1, 1);
|
||||||
termStats = new TermStatistics(term.bytes(), 1, -1);
|
termStats = new TermStatistics(term.bytes(), 1, 1);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (termStats == null) {
|
if (termStats == null) {
|
||||||
|
@ -99,6 +100,11 @@ public class TermQuery extends Query {
|
||||||
return new TermScorer(this, docs, similarity.simScorer(stats, context));
|
return new TermScorer(this, docs, similarity.simScorer(stats, context));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public IndexReader.CacheHelper getCacheHelper(LeafReaderContext context) {
|
||||||
|
return context.reader().getCoreCacheHelper();
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Returns a {@link TermsEnum} positioned at this weights Term or null if
|
* Returns a {@link TermsEnum} positioned at this weights Term or null if
|
||||||
* the term does not exist in the given context
|
* the term does not exist in the given context
|
||||||
|
|
|
@ -24,8 +24,29 @@ import org.apache.lucene.index.TermsEnum; // javadocs
|
||||||
import org.apache.lucene.util.BytesRef;
|
import org.apache.lucene.util.BytesRef;
|
||||||
/**
|
/**
|
||||||
* Contains statistics for a specific term
|
* Contains statistics for a specific term
|
||||||
|
* <p>
|
||||||
|
* This class holds statistics for this term across all documents for scoring purposes:
|
||||||
|
* <ul>
|
||||||
|
* <li> {@link #docFreq}: number of documents this term occurs in.
|
||||||
|
* <li> {@link #totalTermFreq}: number of tokens for this term.
|
||||||
|
* </ul>
|
||||||
|
* <p>
|
||||||
|
* The following conditions are always true:
|
||||||
|
* <ul>
|
||||||
|
* <li> All statistics are positive integers: never zero or negative.
|
||||||
|
* <li> {@code docFreq} <= {@code totalTermFreq}
|
||||||
|
* <li> {@code docFreq} <= {@code sumDocFreq} of the collection
|
||||||
|
* <li> {@code totalTermFreq} <= {@code sumTotalTermFreq} of the collection
|
||||||
|
* </ul>
|
||||||
|
* <p>
|
||||||
|
* Values may include statistics on deleted documents that have not yet been merged away.
|
||||||
|
* <p>
|
||||||
|
* Be careful when performing calculations on these values because they are represented
|
||||||
|
* as 64-bit integer values, you may need to cast to {@code double} for your use.
|
||||||
* @lucene.experimental
|
* @lucene.experimental
|
||||||
*/
|
*/
|
||||||
|
// TODO: actually add missing cross-checks to guarantee TermStatistics is in bounds of CollectionStatistics,
|
||||||
|
// otherwise many similarity functions will implode.
|
||||||
public class TermStatistics {
|
public class TermStatistics {
|
||||||
private final BytesRef term;
|
private final BytesRef term;
|
||||||
private final long docFreq;
|
private final long docFreq;
|
||||||
|
@ -45,29 +66,52 @@ public class TermStatistics {
|
||||||
if (docFreq <= 0) {
|
if (docFreq <= 0) {
|
||||||
throw new IllegalArgumentException("docFreq must be positive, docFreq: " + docFreq);
|
throw new IllegalArgumentException("docFreq must be positive, docFreq: " + docFreq);
|
||||||
}
|
}
|
||||||
if (totalTermFreq != -1) {
|
if (totalTermFreq <= 0) {
|
||||||
if (totalTermFreq < docFreq) {
|
throw new IllegalArgumentException("totalTermFreq must be positive, totalTermFreq: " + totalTermFreq);
|
||||||
throw new IllegalArgumentException("totalTermFreq must be at least docFreq, totalTermFreq: " + totalTermFreq + ", docFreq: " + docFreq);
|
}
|
||||||
}
|
if (totalTermFreq < docFreq) {
|
||||||
|
throw new IllegalArgumentException("totalTermFreq must be at least docFreq, totalTermFreq: " + totalTermFreq + ", docFreq: " + docFreq);
|
||||||
}
|
}
|
||||||
this.term = term;
|
this.term = term;
|
||||||
this.docFreq = docFreq;
|
this.docFreq = docFreq;
|
||||||
this.totalTermFreq = totalTermFreq;
|
this.totalTermFreq = totalTermFreq;
|
||||||
}
|
}
|
||||||
|
|
||||||
/** returns the term text */
|
/**
|
||||||
|
* The term text.
|
||||||
|
* <p>
|
||||||
|
* This value is never {@code null}.
|
||||||
|
* @return term's text, not {@code null}
|
||||||
|
*/
|
||||||
public final BytesRef term() {
|
public final BytesRef term() {
|
||||||
return term;
|
return term;
|
||||||
}
|
}
|
||||||
|
|
||||||
/** returns the number of documents this term occurs in
|
/**
|
||||||
* @see TermsEnum#docFreq() */
|
* The number of documents this term occurs in.
|
||||||
|
* <p>
|
||||||
|
* This is the document-frequency for the term: the count of documents
|
||||||
|
* where the term appears at least one time.
|
||||||
|
* <p>
|
||||||
|
* This value is always a positive number, and never
|
||||||
|
* exceeds {@link #totalTermFreq}. It also cannot exceed {@link CollectionStatistics#sumDocFreq()}.
|
||||||
|
* @return document frequency, in the range [1 .. {@link #totalTermFreq()}]
|
||||||
|
* @see TermsEnum#docFreq()
|
||||||
|
*/
|
||||||
public final long docFreq() {
|
public final long docFreq() {
|
||||||
return docFreq;
|
return docFreq;
|
||||||
}
|
}
|
||||||
|
|
||||||
/** returns the total number of occurrences of this term
|
/**
|
||||||
* @see TermsEnum#totalTermFreq() */
|
* The total number of occurrences of this term.
|
||||||
|
* <p>
|
||||||
|
* This is the token count for the term: the number of times it appears in the field across all documents.
|
||||||
|
* <p>
|
||||||
|
* This value is always a positive number, always at least {@link #docFreq()},
|
||||||
|
* and never exceeds {@link CollectionStatistics#sumTotalTermFreq()}.
|
||||||
|
* @return number of occurrences, in the range [{@link #docFreq()} .. {@link CollectionStatistics#sumTotalTermFreq()}]
|
||||||
|
* @see TermsEnum#totalTermFreq()
|
||||||
|
*/
|
||||||
public final long totalTermFreq() {
|
public final long totalTermFreq() {
|
||||||
return totalTermFreq;
|
return totalTermFreq;
|
||||||
}
|
}
|
||||||
|
|
|
@ -18,8 +18,11 @@ package org.apache.lucene.search;
|
||||||
|
|
||||||
|
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
|
import java.util.List;
|
||||||
import java.util.Set;
|
import java.util.Set;
|
||||||
|
|
||||||
|
import org.apache.lucene.index.FieldInfo;
|
||||||
|
import org.apache.lucene.index.IndexReader;
|
||||||
import org.apache.lucene.index.IndexReaderContext;
|
import org.apache.lucene.index.IndexReaderContext;
|
||||||
import org.apache.lucene.index.LeafReader;
|
import org.apache.lucene.index.LeafReader;
|
||||||
import org.apache.lucene.index.LeafReaderContext;
|
import org.apache.lucene.index.LeafReaderContext;
|
||||||
|
@ -102,6 +105,55 @@ public abstract class Weight {
|
||||||
*/
|
*/
|
||||||
public abstract Scorer scorer(LeafReaderContext context) throws IOException;
|
public abstract Scorer scorer(LeafReaderContext context) throws IOException;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Returns an {@link org.apache.lucene.index.IndexReader.CacheHelper} to cache this query against
|
||||||
|
*
|
||||||
|
* Weights that rely only on Terms or Points can return {@code context.reader().getCoreCacheHelper()}.
|
||||||
|
* Weights that use DocValues should call {@link #getDocValuesCacheHelper(String, LeafReaderContext)}
|
||||||
|
* Weights that should not be cached at all should return {@code null}
|
||||||
|
*
|
||||||
|
* @param context the {@link LeafReaderContext} to cache against
|
||||||
|
* @return an {@link org.apache.lucene.index.IndexReader.CacheHelper} indicating the cache level
|
||||||
|
*/
|
||||||
|
public abstract IndexReader.CacheHelper getCacheHelper(LeafReaderContext context);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Given a collection of Weights, return an {@link org.apache.lucene.index.IndexReader.CacheHelper} that will satisfy
|
||||||
|
* the requirements of them all.
|
||||||
|
* @param context the {@link LeafReaderContext} to cache against
|
||||||
|
* @param weights an array of {@link Weight} to be cached
|
||||||
|
* @return an {@link org.apache.lucene.index.IndexReader.CacheHelper} indicating the cache level
|
||||||
|
*/
|
||||||
|
protected static IndexReader.CacheHelper getCacheHelper(LeafReaderContext context, List<? extends Weight> weights) {
|
||||||
|
if (weights.size() == 0)
|
||||||
|
return null;
|
||||||
|
IndexReader.CacheHelper helper = weights.get(0).getCacheHelper(context);
|
||||||
|
if (helper == null)
|
||||||
|
return null;
|
||||||
|
for (int i = 1; i < weights.size(); i++) {
|
||||||
|
IndexReader.CacheHelper nextHelper = weights.get(i).getCacheHelper(context);
|
||||||
|
if (nextHelper == null || nextHelper != helper)
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
return helper;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Returns an {@link org.apache.lucene.index.IndexReader.CacheHelper} for a Weight using doc values
|
||||||
|
*
|
||||||
|
* This will return the core reader for
|
||||||
|
*
|
||||||
|
* @param field the docvalues field
|
||||||
|
* @param ctx the {@link LeafReaderContext} to cache against
|
||||||
|
* @return an {@link org.apache.lucene.index.IndexReader.CacheHelper} indicating the cache level
|
||||||
|
*/
|
||||||
|
public static IndexReader.CacheHelper getDocValuesCacheHelper(String field, LeafReaderContext ctx) {
|
||||||
|
FieldInfo fi = ctx.reader().getFieldInfos().fieldInfo(field);
|
||||||
|
if (fi == null || fi.getDocValuesGen() == -1)
|
||||||
|
return ctx.reader().getCoreCacheHelper();
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Optional method.
|
* Optional method.
|
||||||
* Get a {@link ScorerSupplier}, which allows to know the cost of the {@link Scorer}
|
* Get a {@link ScorerSupplier}, which allows to know the cost of the {@link Scorer}
|
||||||
|
|
|
@ -85,19 +85,7 @@ public class BM25Similarity extends Similarity {
|
||||||
|
|
||||||
/** The default implementation computes the average as <code>sumTotalTermFreq / docCount</code> */
|
/** The default implementation computes the average as <code>sumTotalTermFreq / docCount</code> */
|
||||||
protected float avgFieldLength(CollectionStatistics collectionStats) {
|
protected float avgFieldLength(CollectionStatistics collectionStats) {
|
||||||
final long sumTotalTermFreq;
|
return (float) (collectionStats.sumTotalTermFreq() / (double) collectionStats.docCount());
|
||||||
if (collectionStats.sumTotalTermFreq() == -1) {
|
|
||||||
// frequencies are omitted (tf=1), its # of postings
|
|
||||||
if (collectionStats.sumDocFreq() == -1) {
|
|
||||||
// theoretical case only: remove!
|
|
||||||
return 1f;
|
|
||||||
}
|
|
||||||
sumTotalTermFreq = collectionStats.sumDocFreq();
|
|
||||||
} else {
|
|
||||||
sumTotalTermFreq = collectionStats.sumTotalTermFreq();
|
|
||||||
}
|
|
||||||
final long docCount = collectionStats.docCount() == -1 ? collectionStats.maxDoc() : collectionStats.docCount();
|
|
||||||
return (float) (sumTotalTermFreq / (double) docCount);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -161,7 +149,7 @@ public class BM25Similarity extends Similarity {
|
||||||
*/
|
*/
|
||||||
public Explanation idfExplain(CollectionStatistics collectionStats, TermStatistics termStats) {
|
public Explanation idfExplain(CollectionStatistics collectionStats, TermStatistics termStats) {
|
||||||
final long df = termStats.docFreq();
|
final long df = termStats.docFreq();
|
||||||
final long docCount = collectionStats.docCount() == -1 ? collectionStats.maxDoc() : collectionStats.docCount();
|
final long docCount = collectionStats.docCount();
|
||||||
final float idf = idf(df, docCount);
|
final float idf = idf(df, docCount);
|
||||||
return Explanation.match(idf, "idf, computed as log(1 + (N - n + 0.5) / (n + 0.5)) from:",
|
return Explanation.match(idf, "idf, computed as log(1 + (N - n + 0.5) / (n + 0.5)) from:",
|
||||||
Explanation.match(df, "n, number of documents containing term"),
|
Explanation.match(df, "n, number of documents containing term"),
|
||||||
|
|
|
@ -62,7 +62,7 @@ public class ClassicSimilarity extends TFIDFSimilarity {
|
||||||
@Override
|
@Override
|
||||||
public Explanation idfExplain(CollectionStatistics collectionStats, TermStatistics termStats) {
|
public Explanation idfExplain(CollectionStatistics collectionStats, TermStatistics termStats) {
|
||||||
final long df = termStats.docFreq();
|
final long df = termStats.docFreq();
|
||||||
final long docCount = collectionStats.docCount() == -1 ? collectionStats.maxDoc() : collectionStats.docCount();
|
final long docCount = collectionStats.docCount();
|
||||||
final float idf = idf(df, docCount);
|
final float idf = idf(df, docCount);
|
||||||
return Explanation.match(idf, "idf, computed as log((docCount+1)/(docFreq+1)) + 1 from:",
|
return Explanation.match(idf, "idf, computed as log((docCount+1)/(docFreq+1)) + 1 from:",
|
||||||
Explanation.match(df, "docFreq, number of documents containing term"),
|
Explanation.match(df, "docFreq, number of documents containing term"),
|
||||||
|
|
|
@ -100,42 +100,16 @@ public abstract class SimilarityBase extends Similarity {
|
||||||
/** Fills all member fields defined in {@code BasicStats} in {@code stats}.
|
/** Fills all member fields defined in {@code BasicStats} in {@code stats}.
|
||||||
* Subclasses can override this method to fill additional stats. */
|
* Subclasses can override this method to fill additional stats. */
|
||||||
protected void fillBasicStats(BasicStats stats, CollectionStatistics collectionStats, TermStatistics termStats) {
|
protected void fillBasicStats(BasicStats stats, CollectionStatistics collectionStats, TermStatistics termStats) {
|
||||||
// #positions(field) must be >= #positions(term)
|
// TODO: validate this for real, somewhere else
|
||||||
assert collectionStats.sumTotalTermFreq() == -1 || collectionStats.sumTotalTermFreq() >= termStats.totalTermFreq();
|
assert termStats.totalTermFreq() <= collectionStats.sumTotalTermFreq();
|
||||||
long numberOfDocuments = collectionStats.docCount() == -1 ? collectionStats.maxDoc() : collectionStats.docCount();
|
assert termStats.docFreq() <= collectionStats.sumDocFreq();
|
||||||
|
|
||||||
long docFreq = termStats.docFreq();
|
|
||||||
long totalTermFreq = termStats.totalTermFreq();
|
|
||||||
|
|
||||||
// frequencies are omitted, all postings have tf=1, so totalTermFreq = docFreq
|
|
||||||
if (totalTermFreq == -1) {
|
|
||||||
totalTermFreq = docFreq;
|
|
||||||
}
|
|
||||||
|
|
||||||
final long numberOfFieldTokens;
|
|
||||||
final double avgFieldLength;
|
|
||||||
|
|
||||||
if (collectionStats.sumTotalTermFreq() == -1) {
|
|
||||||
// frequencies are omitted, so sumTotalTermFreq = # postings
|
|
||||||
if (collectionStats.sumDocFreq() == -1) {
|
|
||||||
// theoretical case only: remove!
|
|
||||||
numberOfFieldTokens = docFreq;
|
|
||||||
avgFieldLength = 1f;
|
|
||||||
} else {
|
|
||||||
numberOfFieldTokens = collectionStats.sumDocFreq();
|
|
||||||
avgFieldLength = (float) (collectionStats.sumDocFreq() / (double)numberOfDocuments);
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
numberOfFieldTokens = collectionStats.sumTotalTermFreq();
|
|
||||||
avgFieldLength = (float) (collectionStats.sumTotalTermFreq() / (double)numberOfDocuments);
|
|
||||||
}
|
|
||||||
|
|
||||||
// TODO: add sumDocFreq for field (numberOfFieldPostings)
|
// TODO: add sumDocFreq for field (numberOfFieldPostings)
|
||||||
stats.setNumberOfDocuments(numberOfDocuments);
|
stats.setNumberOfDocuments(collectionStats.docCount());
|
||||||
stats.setNumberOfFieldTokens(numberOfFieldTokens);
|
stats.setNumberOfFieldTokens(collectionStats.sumTotalTermFreq());
|
||||||
stats.setAvgFieldLength(avgFieldLength);
|
stats.setAvgFieldLength(collectionStats.sumTotalTermFreq() / (double) collectionStats.docCount());
|
||||||
stats.setDocFreq(docFreq);
|
stats.setDocFreq(termStats.docFreq());
|
||||||
stats.setTotalTermFreq(totalTermFreq);
|
stats.setTotalTermFreq(termStats.totalTermFreq());
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|
|
@ -448,7 +448,7 @@ public abstract class TFIDFSimilarity extends Similarity {
|
||||||
*/
|
*/
|
||||||
public Explanation idfExplain(CollectionStatistics collectionStats, TermStatistics termStats) {
|
public Explanation idfExplain(CollectionStatistics collectionStats, TermStatistics termStats) {
|
||||||
final long df = termStats.docFreq();
|
final long df = termStats.docFreq();
|
||||||
final long docCount = collectionStats.docCount() == -1 ? collectionStats.maxDoc() : collectionStats.docCount();
|
final long docCount = collectionStats.docCount();
|
||||||
final float idf = idf(df, docCount);
|
final float idf = idf(df, docCount);
|
||||||
return Explanation.match(idf, "idf(docFreq, docCount)",
|
return Explanation.match(idf, "idf(docFreq, docCount)",
|
||||||
Explanation.match(df, "docFreq, number of documents containing term"),
|
Explanation.match(df, "docFreq, number of documents containing term"),
|
||||||
|
|
|
@ -32,13 +32,13 @@
|
||||||
* <a name="sims"></a>
|
* <a name="sims"></a>
|
||||||
* <h2>Summary of the Ranking Methods</h2>
|
* <h2>Summary of the Ranking Methods</h2>
|
||||||
*
|
*
|
||||||
* <p>{@link org.apache.lucene.search.similarities.ClassicSimilarity} is the original Lucene
|
|
||||||
* scoring function. It is based on a highly optimized
|
|
||||||
* <a href="http://en.wikipedia.org/wiki/Vector_Space_Model">Vector Space Model</a>. For more
|
|
||||||
* information, see {@link org.apache.lucene.search.similarities.TFIDFSimilarity}.
|
|
||||||
*
|
|
||||||
* <p>{@link org.apache.lucene.search.similarities.BM25Similarity} is an optimized
|
* <p>{@link org.apache.lucene.search.similarities.BM25Similarity} is an optimized
|
||||||
* implementation of the successful Okapi BM25 model.
|
* implementation of the successful Okapi BM25 model.
|
||||||
|
*
|
||||||
|
* <p>{@link org.apache.lucene.search.similarities.ClassicSimilarity} is the original Lucene
|
||||||
|
* scoring function. It is based on the
|
||||||
|
* <a href="http://en.wikipedia.org/wiki/Vector_Space_Model">Vector Space Model</a>. For more
|
||||||
|
* information, see {@link org.apache.lucene.search.similarities.TFIDFSimilarity}.
|
||||||
*
|
*
|
||||||
* <p>{@link org.apache.lucene.search.similarities.SimilarityBase} provides a basic
|
* <p>{@link org.apache.lucene.search.similarities.SimilarityBase} provides a basic
|
||||||
* implementation of the Similarity contract and exposes a highly simplified
|
* implementation of the Similarity contract and exposes a highly simplified
|
||||||
|
|
|
@ -19,8 +19,10 @@ package org.apache.lucene.search.spans;
|
||||||
|
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.util.ArrayList;
|
import java.util.ArrayList;
|
||||||
|
import java.util.Arrays;
|
||||||
import java.util.Map;
|
import java.util.Map;
|
||||||
|
|
||||||
|
import org.apache.lucene.index.IndexReader;
|
||||||
import org.apache.lucene.index.LeafReaderContext;
|
import org.apache.lucene.index.LeafReaderContext;
|
||||||
import org.apache.lucene.index.Term;
|
import org.apache.lucene.index.Term;
|
||||||
import org.apache.lucene.index.TermContext;
|
import org.apache.lucene.index.TermContext;
|
||||||
|
@ -115,5 +117,10 @@ public final class SpanContainingQuery extends SpanContainQuery {
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public IndexReader.CacheHelper getCacheHelper(LeafReaderContext context) {
|
||||||
|
return getCacheHelper(context, Arrays.asList(bigWeight, littleWeight));
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
|
@ -229,6 +229,11 @@ public class SpanNearQuery extends SpanQuery implements Cloneable {
|
||||||
w.extractTerms(terms);
|
w.extractTerms(terms);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public IndexReader.CacheHelper getCacheHelper(LeafReaderContext context) {
|
||||||
|
return getCacheHelper(context, subWeights);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
@ -319,6 +324,11 @@ public class SpanNearQuery extends SpanQuery implements Cloneable {
|
||||||
public void extractTerms(Set<Term> terms) {
|
public void extractTerms(Set<Term> terms) {
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public IndexReader.CacheHelper getCacheHelper(LeafReaderContext context) {
|
||||||
|
return context.reader().getCoreCacheHelper();
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
|
|
@ -18,6 +18,7 @@ package org.apache.lucene.search.spans;
|
||||||
|
|
||||||
|
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
|
import java.util.Arrays;
|
||||||
import java.util.Map;
|
import java.util.Map;
|
||||||
import java.util.Objects;
|
import java.util.Objects;
|
||||||
import java.util.Set;
|
import java.util.Set;
|
||||||
|
@ -191,6 +192,11 @@ public final class SpanNotQuery extends SpanQuery {
|
||||||
public void extractTerms(Set<Term> terms) {
|
public void extractTerms(Set<Term> terms) {
|
||||||
includeWeight.extractTerms(terms);
|
includeWeight.extractTerms(terms);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public IndexReader.CacheHelper getCacheHelper(LeafReaderContext context) {
|
||||||
|
return getCacheHelper(context, Arrays.asList(includeWeight, excludeWeight));
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
|
|
@ -138,6 +138,11 @@ public final class SpanOrQuery extends SpanQuery {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public IndexReader.CacheHelper getCacheHelper(LeafReaderContext context) {
|
||||||
|
return getCacheHelper(context, subWeights);
|
||||||
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public void extractTermContexts(Map<Term, TermContext> contexts) {
|
public void extractTermContexts(Map<Term, TermContext> contexts) {
|
||||||
for (SpanWeight w : subWeights) {
|
for (SpanWeight w : subWeights) {
|
||||||
|
|
|
@ -86,6 +86,11 @@ public abstract class SpanPositionCheckQuery extends SpanQuery implements Clonea
|
||||||
matchWeight.extractTerms(terms);
|
matchWeight.extractTerms(terms);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public IndexReader.CacheHelper getCacheHelper(LeafReaderContext context) {
|
||||||
|
return matchWeight.getCacheHelper(context);
|
||||||
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public void extractTermContexts(Map<Term, TermContext> contexts) {
|
public void extractTermContexts(Map<Term, TermContext> contexts) {
|
||||||
matchWeight.extractTermContexts(contexts);
|
matchWeight.extractTermContexts(contexts);
|
||||||
|
|
|
@ -23,6 +23,7 @@ import java.util.Map;
|
||||||
import java.util.Objects;
|
import java.util.Objects;
|
||||||
import java.util.Set;
|
import java.util.Set;
|
||||||
|
|
||||||
|
import org.apache.lucene.index.IndexReader;
|
||||||
import org.apache.lucene.index.IndexReaderContext;
|
import org.apache.lucene.index.IndexReaderContext;
|
||||||
import org.apache.lucene.index.LeafReaderContext;
|
import org.apache.lucene.index.LeafReaderContext;
|
||||||
import org.apache.lucene.index.PostingsEnum;
|
import org.apache.lucene.index.PostingsEnum;
|
||||||
|
@ -91,6 +92,11 @@ public class SpanTermQuery extends SpanQuery {
|
||||||
terms.add(term);
|
terms.add(term);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public IndexReader.CacheHelper getCacheHelper(LeafReaderContext context) {
|
||||||
|
return context.reader().getCoreCacheHelper();
|
||||||
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public void extractTermContexts(Map<Term, TermContext> contexts) {
|
public void extractTermContexts(Map<Term, TermContext> contexts) {
|
||||||
contexts.put(term, termContext);
|
contexts.put(term, termContext);
|
||||||
|
@ -135,7 +141,6 @@ public class SpanTermQuery extends SpanQuery {
|
||||||
/** Returns an expected cost in simple operations
|
/** Returns an expected cost in simple operations
|
||||||
* of processing the occurrences of a term
|
* of processing the occurrences of a term
|
||||||
* in a document that contains the term.
|
* in a document that contains the term.
|
||||||
* <br>This may be inaccurate when {@link TermsEnum#totalTermFreq()} is not available.
|
|
||||||
* @param termsEnum The term is the term at which this TermsEnum is positioned.
|
* @param termsEnum The term is the term at which this TermsEnum is positioned.
|
||||||
* <p>
|
* <p>
|
||||||
* This is a copy of org.apache.lucene.search.PhraseQuery.termPositionsCost().
|
* This is a copy of org.apache.lucene.search.PhraseQuery.termPositionsCost().
|
||||||
|
@ -146,8 +151,9 @@ public class SpanTermQuery extends SpanQuery {
|
||||||
static float termPositionsCost(TermsEnum termsEnum) throws IOException {
|
static float termPositionsCost(TermsEnum termsEnum) throws IOException {
|
||||||
int docFreq = termsEnum.docFreq();
|
int docFreq = termsEnum.docFreq();
|
||||||
assert docFreq > 0;
|
assert docFreq > 0;
|
||||||
long totalTermFreq = termsEnum.totalTermFreq(); // -1 when not available
|
long totalTermFreq = termsEnum.totalTermFreq();
|
||||||
float expOccurrencesInMatchingDoc = (totalTermFreq < docFreq) ? 1 : (totalTermFreq / (float) docFreq);
|
assert totalTermFreq > 0;
|
||||||
|
float expOccurrencesInMatchingDoc = totalTermFreq / (float) docFreq;
|
||||||
return TERM_POSNS_SEEK_OPS_PER_DOC + expOccurrencesInMatchingDoc * TERM_OPS_PER_POS;
|
return TERM_POSNS_SEEK_OPS_PER_DOC + expOccurrencesInMatchingDoc * TERM_OPS_PER_POS;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -19,8 +19,10 @@ package org.apache.lucene.search.spans;
|
||||||
|
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.util.ArrayList;
|
import java.util.ArrayList;
|
||||||
|
import java.util.Arrays;
|
||||||
import java.util.Map;
|
import java.util.Map;
|
||||||
|
|
||||||
|
import org.apache.lucene.index.IndexReader;
|
||||||
import org.apache.lucene.index.LeafReaderContext;
|
import org.apache.lucene.index.LeafReaderContext;
|
||||||
import org.apache.lucene.index.Term;
|
import org.apache.lucene.index.Term;
|
||||||
import org.apache.lucene.index.TermContext;
|
import org.apache.lucene.index.TermContext;
|
||||||
|
@ -116,6 +118,11 @@ public final class SpanWithinQuery extends SpanContainQuery {
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public IndexReader.CacheHelper getCacheHelper(LeafReaderContext context) {
|
||||||
|
return getCacheHelper(context, Arrays.asList(littleWeight, bigWeight));
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
|
@ -172,10 +172,11 @@ public class TestBlockPostingsFormat3 extends LuceneTestCase {
|
||||||
|
|
||||||
// NOTE: we don't assert hasOffsets/hasPositions/hasPayloads because they are allowed to be different
|
// NOTE: we don't assert hasOffsets/hasPositions/hasPayloads because they are allowed to be different
|
||||||
|
|
||||||
|
boolean bothHaveFreqs = leftTerms.hasFreqs() && rightTerms.hasFreqs();
|
||||||
boolean bothHavePositions = leftTerms.hasPositions() && rightTerms.hasPositions();
|
boolean bothHavePositions = leftTerms.hasPositions() && rightTerms.hasPositions();
|
||||||
TermsEnum leftTermsEnum = leftTerms.iterator();
|
TermsEnum leftTermsEnum = leftTerms.iterator();
|
||||||
TermsEnum rightTermsEnum = rightTerms.iterator();
|
TermsEnum rightTermsEnum = rightTerms.iterator();
|
||||||
assertTermsEnum(leftTermsEnum, rightTermsEnum, true, bothHavePositions);
|
assertTermsEnum(leftTermsEnum, rightTermsEnum, true, bothHaveFreqs, bothHavePositions);
|
||||||
|
|
||||||
assertTermsSeeking(leftTerms, rightTerms);
|
assertTermsSeeking(leftTerms, rightTerms);
|
||||||
|
|
||||||
|
@ -188,7 +189,7 @@ public class TestBlockPostingsFormat3 extends LuceneTestCase {
|
||||||
// TODO: test start term too
|
// TODO: test start term too
|
||||||
TermsEnum leftIntersection = leftTerms.intersect(automaton, null);
|
TermsEnum leftIntersection = leftTerms.intersect(automaton, null);
|
||||||
TermsEnum rightIntersection = rightTerms.intersect(automaton, null);
|
TermsEnum rightIntersection = rightTerms.intersect(automaton, null);
|
||||||
assertTermsEnum(leftIntersection, rightIntersection, rarely(), bothHavePositions);
|
assertTermsEnum(leftIntersection, rightIntersection, rarely(), bothHaveFreqs, bothHavePositions);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -263,13 +264,9 @@ public class TestBlockPostingsFormat3 extends LuceneTestCase {
|
||||||
* checks collection-level statistics on Terms
|
* checks collection-level statistics on Terms
|
||||||
*/
|
*/
|
||||||
public void assertTermsStatistics(Terms leftTerms, Terms rightTerms) throws Exception {
|
public void assertTermsStatistics(Terms leftTerms, Terms rightTerms) throws Exception {
|
||||||
if (leftTerms.getDocCount() != -1 && rightTerms.getDocCount() != -1) {
|
assertEquals(leftTerms.getDocCount(), rightTerms.getDocCount());
|
||||||
assertEquals(leftTerms.getDocCount(), rightTerms.getDocCount());
|
assertEquals(leftTerms.getSumDocFreq(), rightTerms.getSumDocFreq());
|
||||||
}
|
if (leftTerms.hasFreqs() && rightTerms.hasFreqs()) {
|
||||||
if (leftTerms.getSumDocFreq() != -1 && rightTerms.getSumDocFreq() != -1) {
|
|
||||||
assertEquals(leftTerms.getSumDocFreq(), rightTerms.getSumDocFreq());
|
|
||||||
}
|
|
||||||
if (leftTerms.getSumTotalTermFreq() != -1 && rightTerms.getSumTotalTermFreq() != -1) {
|
|
||||||
assertEquals(leftTerms.getSumTotalTermFreq(), rightTerms.getSumTotalTermFreq());
|
assertEquals(leftTerms.getSumTotalTermFreq(), rightTerms.getSumTotalTermFreq());
|
||||||
}
|
}
|
||||||
if (leftTerms.size() != -1 && rightTerms.size() != -1) {
|
if (leftTerms.size() != -1 && rightTerms.size() != -1) {
|
||||||
|
@ -281,7 +278,7 @@ public class TestBlockPostingsFormat3 extends LuceneTestCase {
|
||||||
* checks the terms enum sequentially
|
* checks the terms enum sequentially
|
||||||
* if deep is false, it does a 'shallow' test that doesnt go down to the docsenums
|
* if deep is false, it does a 'shallow' test that doesnt go down to the docsenums
|
||||||
*/
|
*/
|
||||||
public void assertTermsEnum(TermsEnum leftTermsEnum, TermsEnum rightTermsEnum, boolean deep, boolean hasPositions) throws Exception {
|
public void assertTermsEnum(TermsEnum leftTermsEnum, TermsEnum rightTermsEnum, boolean deep, boolean hasFreqs, boolean hasPositions) throws Exception {
|
||||||
BytesRef term;
|
BytesRef term;
|
||||||
PostingsEnum leftPositions = null;
|
PostingsEnum leftPositions = null;
|
||||||
PostingsEnum rightPositions = null;
|
PostingsEnum rightPositions = null;
|
||||||
|
@ -290,7 +287,7 @@ public class TestBlockPostingsFormat3 extends LuceneTestCase {
|
||||||
|
|
||||||
while ((term = leftTermsEnum.next()) != null) {
|
while ((term = leftTermsEnum.next()) != null) {
|
||||||
assertEquals(term, rightTermsEnum.next());
|
assertEquals(term, rightTermsEnum.next());
|
||||||
assertTermStats(leftTermsEnum, rightTermsEnum);
|
assertTermStats(leftTermsEnum, rightTermsEnum, hasFreqs);
|
||||||
if (deep) {
|
if (deep) {
|
||||||
if (hasPositions) {
|
if (hasPositions) {
|
||||||
// with payloads + off
|
// with payloads + off
|
||||||
|
@ -350,9 +347,9 @@ public class TestBlockPostingsFormat3 extends LuceneTestCase {
|
||||||
/**
|
/**
|
||||||
* checks term-level statistics
|
* checks term-level statistics
|
||||||
*/
|
*/
|
||||||
public void assertTermStats(TermsEnum leftTermsEnum, TermsEnum rightTermsEnum) throws Exception {
|
public void assertTermStats(TermsEnum leftTermsEnum, TermsEnum rightTermsEnum, boolean bothHaveFreqs) throws Exception {
|
||||||
assertEquals(leftTermsEnum.docFreq(), rightTermsEnum.docFreq());
|
assertEquals(leftTermsEnum.docFreq(), rightTermsEnum.docFreq());
|
||||||
if (leftTermsEnum.totalTermFreq() != -1 && rightTermsEnum.totalTermFreq() != -1) {
|
if (bothHaveFreqs) {
|
||||||
assertEquals(leftTermsEnum.totalTermFreq(), rightTermsEnum.totalTermFreq());
|
assertEquals(leftTermsEnum.totalTermFreq(), rightTermsEnum.totalTermFreq());
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -148,26 +148,22 @@ public class TestMultiTermsEnum extends LuceneTestCase {
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public long size() throws IOException {
|
public long size() throws IOException {
|
||||||
// Docs say we can return -1 if we don't know.
|
throw new UnsupportedOperationException();
|
||||||
return -1;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public long getSumTotalTermFreq() throws IOException {
|
public long getSumTotalTermFreq() throws IOException {
|
||||||
// Docs say we can return -1 if we don't know.
|
throw new UnsupportedOperationException();
|
||||||
return -1;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public long getSumDocFreq() throws IOException {
|
public long getSumDocFreq() throws IOException {
|
||||||
// Docs say we can return -1 if we don't know.
|
throw new UnsupportedOperationException();
|
||||||
return -1;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public int getDocCount() throws IOException {
|
public int getDocCount() throws IOException {
|
||||||
// Docs say we can return -1 if we don't know.
|
throw new UnsupportedOperationException();
|
||||||
return -1;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
|
|
@ -445,7 +445,7 @@ public class TestOmitTf extends LuceneTestCase {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/** test that when freqs are omitted, that totalTermFreq and sumTotalTermFreq are -1 */
|
/** test that when freqs are omitted, that totalTermFreq and sumTotalTermFreq are docFreq, and sumDocFreq */
|
||||||
public void testStats() throws Exception {
|
public void testStats() throws Exception {
|
||||||
Directory dir = newDirectory();
|
Directory dir = newDirectory();
|
||||||
RandomIndexWriter iw = new RandomIndexWriter(random(), dir,
|
RandomIndexWriter iw = new RandomIndexWriter(random(), dir,
|
||||||
|
@ -459,8 +459,8 @@ public class TestOmitTf extends LuceneTestCase {
|
||||||
iw.addDocument(doc);
|
iw.addDocument(doc);
|
||||||
IndexReader ir = iw.getReader();
|
IndexReader ir = iw.getReader();
|
||||||
iw.close();
|
iw.close();
|
||||||
assertEquals(-1, ir.totalTermFreq(new Term("foo", new BytesRef("bar"))));
|
assertEquals(ir.docFreq(new Term("foo", new BytesRef("bar"))), ir.totalTermFreq(new Term("foo", new BytesRef("bar"))));
|
||||||
assertEquals(-1, ir.getSumTotalTermFreq("foo"));
|
assertEquals(ir.getSumDocFreq("foo"), ir.getSumTotalTermFreq("foo"));
|
||||||
ir.close();
|
ir.close();
|
||||||
dir.close();
|
dir.close();
|
||||||
}
|
}
|
||||||
|
|
|
@ -21,6 +21,7 @@ import java.io.IOException;
|
||||||
import java.util.Set;
|
import java.util.Set;
|
||||||
|
|
||||||
import org.apache.lucene.index.FieldInvertState;
|
import org.apache.lucene.index.FieldInvertState;
|
||||||
|
import org.apache.lucene.index.IndexReader;
|
||||||
import org.apache.lucene.index.LeafReaderContext;
|
import org.apache.lucene.index.LeafReaderContext;
|
||||||
import org.apache.lucene.index.Term;
|
import org.apache.lucene.index.Term;
|
||||||
import org.apache.lucene.search.similarities.Similarity;
|
import org.apache.lucene.search.similarities.Similarity;
|
||||||
|
@ -262,6 +263,11 @@ final class JustCompileSearch {
|
||||||
throw new UnsupportedOperationException(UNSUPPORTED_MSG);
|
throw new UnsupportedOperationException(UNSUPPORTED_MSG);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public IndexReader.CacheHelper getCacheHelper(LeafReaderContext context) {
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -94,6 +94,11 @@ public class TestBooleanScorer extends LuceneTestCase {
|
||||||
throw new UnsupportedOperationException();
|
throw new UnsupportedOperationException();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public IndexReader.CacheHelper getCacheHelper(LeafReaderContext context) {
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public BulkScorer bulkScorer(LeafReaderContext context) {
|
public BulkScorer bulkScorer(LeafReaderContext context) {
|
||||||
return new BulkScorer() {
|
return new BulkScorer() {
|
||||||
|
|
|
@ -29,6 +29,7 @@ import java.util.HashSet;
|
||||||
import java.util.LinkedHashMap;
|
import java.util.LinkedHashMap;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
import java.util.Map;
|
import java.util.Map;
|
||||||
|
import java.util.Set;
|
||||||
import java.util.concurrent.ExecutionException;
|
import java.util.concurrent.ExecutionException;
|
||||||
import java.util.concurrent.atomic.AtomicBoolean;
|
import java.util.concurrent.atomic.AtomicBoolean;
|
||||||
import java.util.concurrent.atomic.AtomicInteger;
|
import java.util.concurrent.atomic.AtomicInteger;
|
||||||
|
@ -36,15 +37,16 @@ import java.util.concurrent.atomic.AtomicLong;
|
||||||
import java.util.concurrent.atomic.AtomicReference;
|
import java.util.concurrent.atomic.AtomicReference;
|
||||||
|
|
||||||
import com.carrotsearch.randomizedtesting.generators.RandomPicks;
|
import com.carrotsearch.randomizedtesting.generators.RandomPicks;
|
||||||
|
|
||||||
import org.apache.lucene.document.Document;
|
import org.apache.lucene.document.Document;
|
||||||
import org.apache.lucene.document.Field.Store;
|
import org.apache.lucene.document.Field.Store;
|
||||||
|
import org.apache.lucene.document.NumericDocValuesField;
|
||||||
import org.apache.lucene.document.StringField;
|
import org.apache.lucene.document.StringField;
|
||||||
import org.apache.lucene.document.TextField;
|
import org.apache.lucene.document.TextField;
|
||||||
import org.apache.lucene.index.DirectoryReader;
|
import org.apache.lucene.index.DirectoryReader;
|
||||||
import org.apache.lucene.index.FilterDirectoryReader;
|
import org.apache.lucene.index.FilterDirectoryReader;
|
||||||
import org.apache.lucene.index.FilterLeafReader;
|
import org.apache.lucene.index.FilterLeafReader;
|
||||||
import org.apache.lucene.index.IndexReader;
|
import org.apache.lucene.index.IndexReader;
|
||||||
|
import org.apache.lucene.index.IndexWriter;
|
||||||
import org.apache.lucene.index.IndexWriterConfig;
|
import org.apache.lucene.index.IndexWriterConfig;
|
||||||
import org.apache.lucene.index.LeafReader;
|
import org.apache.lucene.index.LeafReader;
|
||||||
import org.apache.lucene.index.LeafReaderContext;
|
import org.apache.lucene.index.LeafReaderContext;
|
||||||
|
@ -358,6 +360,11 @@ public class TestLRUQueryCache extends LuceneTestCase {
|
||||||
public Scorer scorer(LeafReaderContext context) throws IOException {
|
public Scorer scorer(LeafReaderContext context) throws IOException {
|
||||||
return null;
|
return null;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public IndexReader.CacheHelper getCacheHelper(LeafReaderContext context) {
|
||||||
|
return context.reader().getCoreCacheHelper();
|
||||||
|
}
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -947,6 +954,11 @@ public class TestLRUQueryCache extends LuceneTestCase {
|
||||||
public Scorer scorer(LeafReaderContext context) throws IOException {
|
public Scorer scorer(LeafReaderContext context) throws IOException {
|
||||||
return null;
|
return null;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public IndexReader.CacheHelper getCacheHelper(LeafReaderContext context) {
|
||||||
|
return context.reader().getCoreCacheHelper();
|
||||||
|
}
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1276,6 +1288,78 @@ public class TestLRUQueryCache extends LuceneTestCase {
|
||||||
dir.close();
|
dir.close();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// A query that returns null from Weight.getCacheHelper
|
||||||
|
private static class NoCacheQuery extends Query {
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public Weight createWeight(IndexSearcher searcher, boolean needsScores, float boost) throws IOException {
|
||||||
|
return new Weight(this) {
|
||||||
|
@Override
|
||||||
|
public void extractTerms(Set<Term> terms) {
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public Explanation explain(LeafReaderContext context, int doc) throws IOException {
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public Scorer scorer(LeafReaderContext context) throws IOException {
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public IndexReader.CacheHelper getCacheHelper(LeafReaderContext context) {
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public String toString(String field) {
|
||||||
|
return "NoCacheQuery";
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public boolean equals(Object obj) {
|
||||||
|
return sameClassAs(obj);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public int hashCode() {
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public void testQueryNotSuitedForCaching() throws IOException {
|
||||||
|
Directory dir = newDirectory();
|
||||||
|
IndexWriterConfig iwc = newIndexWriterConfig().setMergePolicy(NoMergePolicy.INSTANCE);
|
||||||
|
RandomIndexWriter w = new RandomIndexWriter(random(), dir, iwc);
|
||||||
|
w.addDocument(new Document());
|
||||||
|
DirectoryReader reader = w.getReader();
|
||||||
|
IndexSearcher searcher = newSearcher(reader);
|
||||||
|
searcher.setQueryCachingPolicy(QueryCachingPolicy.ALWAYS_CACHE);
|
||||||
|
|
||||||
|
LRUQueryCache cache = new LRUQueryCache(2, 10000, context -> true);
|
||||||
|
searcher.setQueryCache(cache);
|
||||||
|
|
||||||
|
assertEquals(0, searcher.count(new NoCacheQuery()));
|
||||||
|
assertEquals(0, cache.getCacheCount());
|
||||||
|
|
||||||
|
// BooleanQuery wrapping an uncacheable query should also not be cached
|
||||||
|
BooleanQuery bq = new BooleanQuery.Builder()
|
||||||
|
.add(new NoCacheQuery(), Occur.MUST)
|
||||||
|
.add(new TermQuery(new Term("field", "term")), Occur.MUST).build();
|
||||||
|
assertEquals(0, searcher.count(bq));
|
||||||
|
assertEquals(0, cache.getCacheCount());
|
||||||
|
|
||||||
|
reader.close();
|
||||||
|
w.close();
|
||||||
|
dir.close();
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
private static class DummyQuery2 extends Query {
|
private static class DummyQuery2 extends Query {
|
||||||
|
|
||||||
private final AtomicBoolean scorerCreated;
|
private final AtomicBoolean scorerCreated;
|
||||||
|
@ -1291,6 +1375,12 @@ public class TestLRUQueryCache extends LuceneTestCase {
|
||||||
public Scorer scorer(LeafReaderContext context) throws IOException {
|
public Scorer scorer(LeafReaderContext context) throws IOException {
|
||||||
return scorerSupplier(context).get(Long.MAX_VALUE);
|
return scorerSupplier(context).get(Long.MAX_VALUE);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public IndexReader.CacheHelper getCacheHelper(LeafReaderContext context) {
|
||||||
|
return context.reader().getCoreCacheHelper();
|
||||||
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public ScorerSupplier scorerSupplier(LeafReaderContext context) throws IOException {
|
public ScorerSupplier scorerSupplier(LeafReaderContext context) throws IOException {
|
||||||
final Weight weight = this;
|
final Weight weight = this;
|
||||||
|
@ -1351,4 +1441,110 @@ public class TestLRUQueryCache extends LuceneTestCase {
|
||||||
w.close();
|
w.close();
|
||||||
dir.close();
|
dir.close();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static class DVCacheQuery extends Query {
|
||||||
|
|
||||||
|
final String field;
|
||||||
|
|
||||||
|
AtomicInteger scorerCreatedCount = new AtomicInteger(0);
|
||||||
|
|
||||||
|
DVCacheQuery(String field) {
|
||||||
|
this.field = field;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public String toString(String field) {
|
||||||
|
return "DVCacheQuery";
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public boolean equals(Object obj) {
|
||||||
|
return sameClassAs(obj);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public int hashCode() {
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public Weight createWeight(IndexSearcher searcher, boolean needsScores, float boost) throws IOException {
|
||||||
|
return new ConstantScoreWeight(this, 1) {
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public Scorer scorer(LeafReaderContext context) throws IOException {
|
||||||
|
scorerCreatedCount.incrementAndGet();
|
||||||
|
return new ConstantScoreScorer(this, 1, DocIdSetIterator.all(context.reader().maxDoc()));
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public IndexReader.CacheHelper getCacheHelper(LeafReaderContext context) {
|
||||||
|
return getDocValuesCacheHelper(field, context);
|
||||||
|
}
|
||||||
|
|
||||||
|
};
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public void testDocValuesUpdatesDontBreakCache() throws IOException {
|
||||||
|
Directory dir = newDirectory();
|
||||||
|
IndexWriterConfig iwc = newIndexWriterConfig().setMergePolicy(NoMergePolicy.INSTANCE);
|
||||||
|
//RandomIndexWriter w = new RandomIndexWriter(random(), dir, iwc);
|
||||||
|
IndexWriter w = new IndexWriter(dir, iwc);
|
||||||
|
w.addDocument(new Document());
|
||||||
|
w.commit();
|
||||||
|
DirectoryReader reader = DirectoryReader.open(w);
|
||||||
|
IndexSearcher searcher = newSearcher(reader);
|
||||||
|
searcher.setQueryCachingPolicy(QueryCachingPolicy.ALWAYS_CACHE);
|
||||||
|
|
||||||
|
LRUQueryCache cache = new LRUQueryCache(1, 1000, context -> true);
|
||||||
|
searcher.setQueryCache(cache);
|
||||||
|
|
||||||
|
DVCacheQuery query = new DVCacheQuery("field");
|
||||||
|
assertEquals(1, searcher.count(query));
|
||||||
|
assertEquals(1, query.scorerCreatedCount.get());
|
||||||
|
assertEquals(1, searcher.count(query));
|
||||||
|
assertEquals(1, query.scorerCreatedCount.get()); // should be cached
|
||||||
|
|
||||||
|
Document doc = new Document();
|
||||||
|
doc.add(new NumericDocValuesField("field", 1));
|
||||||
|
doc.add(newTextField("text", "text", Store.NO));
|
||||||
|
w.addDocument(doc);
|
||||||
|
reader.close();
|
||||||
|
reader = DirectoryReader.open(w);
|
||||||
|
searcher = newSearcher(reader);
|
||||||
|
searcher.setQueryCachingPolicy(QueryCachingPolicy.ALWAYS_CACHE);
|
||||||
|
searcher.setQueryCache(cache);
|
||||||
|
|
||||||
|
assertEquals(2, searcher.count(query));
|
||||||
|
assertEquals(2, query.scorerCreatedCount.get()); // first segment cached
|
||||||
|
|
||||||
|
reader.close();
|
||||||
|
reader = DirectoryReader.open(w);
|
||||||
|
searcher = newSearcher(reader);
|
||||||
|
searcher.setQueryCachingPolicy(QueryCachingPolicy.ALWAYS_CACHE);
|
||||||
|
searcher.setQueryCache(cache);
|
||||||
|
|
||||||
|
assertEquals(2, searcher.count(query));
|
||||||
|
assertEquals(2, query.scorerCreatedCount.get()); // both segments cached
|
||||||
|
|
||||||
|
|
||||||
|
w.updateNumericDocValue(new Term("text", "text"), "field", 2l);
|
||||||
|
reader.close();
|
||||||
|
reader = DirectoryReader.open(w);
|
||||||
|
searcher = newSearcher(reader);
|
||||||
|
searcher.setQueryCachingPolicy(QueryCachingPolicy.ALWAYS_CACHE);
|
||||||
|
searcher.setQueryCache(cache);
|
||||||
|
|
||||||
|
assertEquals(2, searcher.count(query));
|
||||||
|
assertEquals(3, query.scorerCreatedCount.get()); // second segment no longer cached due to DV update
|
||||||
|
|
||||||
|
assertEquals(2, searcher.count(query));
|
||||||
|
assertEquals(4, query.scorerCreatedCount.get()); // still no caching
|
||||||
|
|
||||||
|
reader.close();
|
||||||
|
w.close();
|
||||||
|
dir.close();
|
||||||
|
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -18,7 +18,6 @@ package org.apache.lucene.search;
|
||||||
|
|
||||||
|
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.util.Set;
|
|
||||||
import java.util.Objects;
|
import java.util.Objects;
|
||||||
|
|
||||||
import org.apache.lucene.document.Document;
|
import org.apache.lucene.document.Document;
|
||||||
|
@ -103,17 +102,7 @@ public class TestNeedsScores extends LuceneTestCase {
|
||||||
@Override
|
@Override
|
||||||
public Weight createWeight(IndexSearcher searcher, boolean needsScores, float boost) throws IOException {
|
public Weight createWeight(IndexSearcher searcher, boolean needsScores, float boost) throws IOException {
|
||||||
final Weight w = in.createWeight(searcher, needsScores, boost);
|
final Weight w = in.createWeight(searcher, needsScores, boost);
|
||||||
return new Weight(AssertNeedsScores.this) {
|
return new FilterWeight(w) {
|
||||||
@Override
|
|
||||||
public void extractTerms(Set<Term> terms) {
|
|
||||||
w.extractTerms(terms);
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public Explanation explain(LeafReaderContext context, int doc) throws IOException {
|
|
||||||
return w.explain(context, doc);
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public Scorer scorer(LeafReaderContext context) throws IOException {
|
public Scorer scorer(LeafReaderContext context) throws IOException {
|
||||||
assertEquals("query=" + in, value, needsScores);
|
assertEquals("query=" + in, value, needsScores);
|
||||||
|
|
|
@ -487,6 +487,11 @@ public class TestQueryRescorer extends LuceneTestCase {
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public IndexReader.CacheHelper getCacheHelper(LeafReaderContext context) {
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public Explanation explain(LeafReaderContext context, int doc) throws IOException {
|
public Explanation explain(LeafReaderContext context, int doc) throws IOException {
|
||||||
return null;
|
return null;
|
||||||
|
|
|
@ -155,6 +155,11 @@ public class TestScorerPerf extends LuceneTestCase {
|
||||||
public Scorer scorer(LeafReaderContext context) throws IOException {
|
public Scorer scorer(LeafReaderContext context) throws IOException {
|
||||||
return new ConstantScoreScorer(this, score(), new BitSetIterator(docs, docs.approximateCardinality()));
|
return new ConstantScoreScorer(this, score(), new BitSetIterator(docs, docs.approximateCardinality()));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public IndexReader.CacheHelper getCacheHelper(LeafReaderContext context) {
|
||||||
|
return null;
|
||||||
|
}
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -249,6 +249,11 @@ public class TestSortRandom extends LuceneTestCase {
|
||||||
|
|
||||||
return new ConstantScoreScorer(this, score(), new BitSetIterator(bits, bits.approximateCardinality()));
|
return new ConstantScoreScorer(this, score(), new BitSetIterator(bits, bits.approximateCardinality()));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public IndexReader.CacheHelper getCacheHelper(LeafReaderContext context) {
|
||||||
|
return null;
|
||||||
|
}
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -124,6 +124,11 @@ public class TestUsageTrackingFilterCachingPolicy extends LuceneTestCase {
|
||||||
public Scorer scorer(LeafReaderContext context) throws IOException {
|
public Scorer scorer(LeafReaderContext context) throws IOException {
|
||||||
return new ConstantScoreScorer(this, score(), DocIdSetIterator.all(1));
|
return new ConstantScoreScorer(this, score(), DocIdSetIterator.all(1));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public IndexReader.CacheHelper getCacheHelper(LeafReaderContext context) {
|
||||||
|
return context.reader().getCoreCacheHelper();
|
||||||
|
}
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -40,6 +40,7 @@ import org.apache.lucene.search.TopDocs;
|
||||||
import org.apache.lucene.store.Directory;
|
import org.apache.lucene.store.Directory;
|
||||||
import org.apache.lucene.util.BytesRef;
|
import org.apache.lucene.util.BytesRef;
|
||||||
import org.apache.lucene.util.LuceneTestCase;
|
import org.apache.lucene.util.LuceneTestCase;
|
||||||
|
import org.apache.lucene.util.TestUtil;
|
||||||
import org.apache.lucene.util.Version;
|
import org.apache.lucene.util.Version;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -183,7 +184,17 @@ public class TestSimilarityBase extends LuceneTestCase {
|
||||||
}
|
}
|
||||||
|
|
||||||
private CollectionStatistics toCollectionStats(BasicStats stats) {
|
private CollectionStatistics toCollectionStats(BasicStats stats) {
|
||||||
return new CollectionStatistics(stats.field, stats.getNumberOfDocuments(), -1, stats.getNumberOfFieldTokens(), -1);
|
long sumTtf = stats.getNumberOfFieldTokens();
|
||||||
|
long sumDf;
|
||||||
|
if (sumTtf == -1) {
|
||||||
|
sumDf = TestUtil.nextLong(random(), stats.getNumberOfDocuments(), 2L * stats.getNumberOfDocuments());
|
||||||
|
} else {
|
||||||
|
sumDf = TestUtil.nextLong(random(), Math.min(stats.getNumberOfDocuments(), sumTtf), sumTtf);
|
||||||
|
}
|
||||||
|
int docCount = Math.toIntExact(Math.min(sumDf, stats.getNumberOfDocuments()));
|
||||||
|
int maxDoc = TestUtil.nextInt(random(), docCount, docCount + 10);
|
||||||
|
|
||||||
|
return new CollectionStatistics(stats.field, maxDoc, docCount, sumTtf, sumDf);
|
||||||
}
|
}
|
||||||
|
|
||||||
private TermStatistics toTermStats(BasicStats stats) {
|
private TermStatistics toTermStats(BasicStats stats) {
|
||||||
|
|
|
@ -17,8 +17,10 @@
|
||||||
package org.apache.lucene.facet;
|
package org.apache.lucene.facet;
|
||||||
|
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
|
import java.util.ArrayList;
|
||||||
import java.util.Arrays;
|
import java.util.Arrays;
|
||||||
import java.util.Comparator;
|
import java.util.Comparator;
|
||||||
|
import java.util.List;
|
||||||
import java.util.Objects;
|
import java.util.Objects;
|
||||||
import java.util.Set;
|
import java.util.Set;
|
||||||
|
|
||||||
|
@ -101,6 +103,14 @@ class DrillSidewaysQuery extends Query {
|
||||||
throw new UnsupportedOperationException();
|
throw new UnsupportedOperationException();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public IndexReader.CacheHelper getCacheHelper(LeafReaderContext context) {
|
||||||
|
List<Weight> weights = new ArrayList<>();
|
||||||
|
weights.add(baseWeight);
|
||||||
|
weights.addAll(Arrays.asList(drillDowns));
|
||||||
|
return getCacheHelper(context, weights);
|
||||||
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public BulkScorer bulkScorer(LeafReaderContext context) throws IOException {
|
public BulkScorer bulkScorer(LeafReaderContext context) throws IOException {
|
||||||
Scorer baseScorer = baseWeight.scorer(context);
|
Scorer baseScorer = baseWeight.scorer(context);
|
||||||
|
|
|
@ -171,6 +171,11 @@ public final class DoubleRange extends Range {
|
||||||
};
|
};
|
||||||
return new ConstantScoreScorer(this, score(), twoPhase);
|
return new ConstantScoreScorer(this, score(), twoPhase);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public IndexReader.CacheHelper getCacheHelper(LeafReaderContext context) {
|
||||||
|
return null; // TODO delegate to LongValuesSource?
|
||||||
|
}
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -163,6 +163,11 @@ public final class LongRange extends Range {
|
||||||
};
|
};
|
||||||
return new ConstantScoreScorer(this, score(), twoPhase);
|
return new ConstantScoreScorer(this, score(), twoPhase);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public IndexReader.CacheHelper getCacheHelper(LeafReaderContext context) {
|
||||||
|
return null; // TODO delegate to LongValuesSource?
|
||||||
|
}
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -16,6 +16,16 @@
|
||||||
*/
|
*/
|
||||||
package org.apache.lucene.facet;
|
package org.apache.lucene.facet;
|
||||||
|
|
||||||
|
import java.io.IOException;
|
||||||
|
import java.util.ArrayList;
|
||||||
|
import java.util.Arrays;
|
||||||
|
import java.util.Collections;
|
||||||
|
import java.util.HashMap;
|
||||||
|
import java.util.HashSet;
|
||||||
|
import java.util.List;
|
||||||
|
import java.util.Map;
|
||||||
|
import java.util.Set;
|
||||||
|
|
||||||
import org.apache.lucene.analysis.MockAnalyzer;
|
import org.apache.lucene.analysis.MockAnalyzer;
|
||||||
import org.apache.lucene.document.Document;
|
import org.apache.lucene.document.Document;
|
||||||
import org.apache.lucene.document.Field;
|
import org.apache.lucene.document.Field;
|
||||||
|
@ -58,16 +68,6 @@ import org.apache.lucene.util.InPlaceMergeSorter;
|
||||||
import org.apache.lucene.util.InfoStream;
|
import org.apache.lucene.util.InfoStream;
|
||||||
import org.apache.lucene.util.TestUtil;
|
import org.apache.lucene.util.TestUtil;
|
||||||
|
|
||||||
import java.io.IOException;
|
|
||||||
import java.util.ArrayList;
|
|
||||||
import java.util.Arrays;
|
|
||||||
import java.util.Collections;
|
|
||||||
import java.util.HashMap;
|
|
||||||
import java.util.HashSet;
|
|
||||||
import java.util.List;
|
|
||||||
import java.util.Map;
|
|
||||||
import java.util.Set;
|
|
||||||
|
|
||||||
public class TestDrillSideways extends FacetTestCase {
|
public class TestDrillSideways extends FacetTestCase {
|
||||||
|
|
||||||
protected DrillSideways getNewDrillSideways(IndexSearcher searcher, FacetsConfig config,
|
protected DrillSideways getNewDrillSideways(IndexSearcher searcher, FacetsConfig config,
|
||||||
|
@ -740,6 +740,11 @@ public class TestDrillSideways extends FacetTestCase {
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public IndexReader.CacheHelper getCacheHelper(LeafReaderContext context) {
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -20,7 +20,6 @@ import java.io.IOException;
|
||||||
import java.util.HashMap;
|
import java.util.HashMap;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
import java.util.Map;
|
import java.util.Map;
|
||||||
import java.util.Set;
|
|
||||||
import java.util.concurrent.atomic.AtomicBoolean;
|
import java.util.concurrent.atomic.AtomicBoolean;
|
||||||
|
|
||||||
import org.apache.lucene.document.Document;
|
import org.apache.lucene.document.Document;
|
||||||
|
@ -29,8 +28,8 @@ import org.apache.lucene.document.DoublePoint;
|
||||||
import org.apache.lucene.document.LongPoint;
|
import org.apache.lucene.document.LongPoint;
|
||||||
import org.apache.lucene.document.NumericDocValuesField;
|
import org.apache.lucene.document.NumericDocValuesField;
|
||||||
import org.apache.lucene.facet.DrillDownQuery;
|
import org.apache.lucene.facet.DrillDownQuery;
|
||||||
import org.apache.lucene.facet.DrillSideways.DrillSidewaysResult;
|
|
||||||
import org.apache.lucene.facet.DrillSideways;
|
import org.apache.lucene.facet.DrillSideways;
|
||||||
|
import org.apache.lucene.facet.DrillSideways.DrillSidewaysResult;
|
||||||
import org.apache.lucene.facet.FacetField;
|
import org.apache.lucene.facet.FacetField;
|
||||||
import org.apache.lucene.facet.FacetResult;
|
import org.apache.lucene.facet.FacetResult;
|
||||||
import org.apache.lucene.facet.FacetTestCase;
|
import org.apache.lucene.facet.FacetTestCase;
|
||||||
|
@ -46,10 +45,10 @@ import org.apache.lucene.index.IndexReader;
|
||||||
import org.apache.lucene.index.IndexWriterConfig;
|
import org.apache.lucene.index.IndexWriterConfig;
|
||||||
import org.apache.lucene.index.LeafReaderContext;
|
import org.apache.lucene.index.LeafReaderContext;
|
||||||
import org.apache.lucene.index.RandomIndexWriter;
|
import org.apache.lucene.index.RandomIndexWriter;
|
||||||
import org.apache.lucene.index.Term;
|
|
||||||
import org.apache.lucene.search.DoubleValues;
|
import org.apache.lucene.search.DoubleValues;
|
||||||
import org.apache.lucene.search.DoubleValuesSource;
|
import org.apache.lucene.search.DoubleValuesSource;
|
||||||
import org.apache.lucene.search.Explanation;
|
import org.apache.lucene.search.Explanation;
|
||||||
|
import org.apache.lucene.search.FilterWeight;
|
||||||
import org.apache.lucene.search.IndexSearcher;
|
import org.apache.lucene.search.IndexSearcher;
|
||||||
import org.apache.lucene.search.LongValuesSource;
|
import org.apache.lucene.search.LongValuesSource;
|
||||||
import org.apache.lucene.search.MatchAllDocsQuery;
|
import org.apache.lucene.search.MatchAllDocsQuery;
|
||||||
|
@ -717,24 +716,12 @@ public class TestRangeFacetCounts extends FacetTestCase {
|
||||||
@Override
|
@Override
|
||||||
public Weight createWeight(IndexSearcher searcher, boolean needsScores, float boost) throws IOException {
|
public Weight createWeight(IndexSearcher searcher, boolean needsScores, float boost) throws IOException {
|
||||||
final Weight in = this.in.createWeight(searcher, needsScores, boost);
|
final Weight in = this.in.createWeight(searcher, needsScores, boost);
|
||||||
return new Weight(in.getQuery()) {
|
return new FilterWeight(in) {
|
||||||
|
|
||||||
@Override
|
|
||||||
public void extractTerms(Set<Term> terms) {
|
|
||||||
in.extractTerms(terms);
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public Explanation explain(LeafReaderContext context, int doc) throws IOException {
|
|
||||||
return in.explain(context, doc);
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public Scorer scorer(LeafReaderContext context) throws IOException {
|
public Scorer scorer(LeafReaderContext context) throws IOException {
|
||||||
used.set(true);
|
used.set(true);
|
||||||
return in.scorer(context);
|
return in.scorer(context);
|
||||||
}
|
}
|
||||||
|
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -238,13 +238,8 @@ public final class TokenStreamFromTermVector extends TokenStream {
|
||||||
// Estimate the number of position slots we need from term stats. We use some estimation factors taken from
|
// Estimate the number of position slots we need from term stats. We use some estimation factors taken from
|
||||||
// Wikipedia that reduce the likelihood of needing to expand the array.
|
// Wikipedia that reduce the likelihood of needing to expand the array.
|
||||||
int sumTotalTermFreq = (int) vector.getSumTotalTermFreq();
|
int sumTotalTermFreq = (int) vector.getSumTotalTermFreq();
|
||||||
if (sumTotalTermFreq == -1) {//unfortunately term vectors seem to not have this stat
|
assert sumTotalTermFreq != -1;
|
||||||
int size = (int) vector.size();
|
|
||||||
if (size == -1) {//doesn't happen with term vectors, it seems, but pick a default any way
|
|
||||||
size = 128;
|
|
||||||
}
|
|
||||||
sumTotalTermFreq = (int)(size * 2.4);
|
|
||||||
}
|
|
||||||
final int originalPositionEstimate = (int) (sumTotalTermFreq * 1.5);//less than 1 in 10 docs exceed this
|
final int originalPositionEstimate = (int) (sumTotalTermFreq * 1.5);//less than 1 in 10 docs exceed this
|
||||||
|
|
||||||
// This estimate is based on maxStartOffset. Err on the side of this being larger than needed.
|
// This estimate is based on maxStartOffset. Err on the side of this being larger than needed.
|
||||||
|
|
|
@ -153,7 +153,7 @@ org.apache.hadoop.version = 2.7.4
|
||||||
/org.apache.httpcomponents/httpcore = 4.4.6
|
/org.apache.httpcomponents/httpcore = 4.4.6
|
||||||
/org.apache.httpcomponents/httpmime = 4.5.3
|
/org.apache.httpcomponents/httpmime = 4.5.3
|
||||||
|
|
||||||
/org.apache.ivy/ivy = 2.3.0
|
/org.apache.ivy/ivy = 2.4.0
|
||||||
|
|
||||||
org.apache.james.apache.mime4j.version = 0.7.2
|
org.apache.james.apache.mime4j.version = 0.7.2
|
||||||
/org.apache.james/apache-mime4j-core = ${org.apache.james.apache.mime4j.version}
|
/org.apache.james/apache-mime4j-core = ${org.apache.james.apache.mime4j.version}
|
||||||
|
|
|
@ -20,6 +20,7 @@ import java.io.IOException;
|
||||||
import java.util.Set;
|
import java.util.Set;
|
||||||
|
|
||||||
import org.apache.lucene.index.DocValues;
|
import org.apache.lucene.index.DocValues;
|
||||||
|
import org.apache.lucene.index.IndexReader;
|
||||||
import org.apache.lucene.index.LeafReaderContext;
|
import org.apache.lucene.index.LeafReaderContext;
|
||||||
import org.apache.lucene.index.OrdinalMap;
|
import org.apache.lucene.index.OrdinalMap;
|
||||||
import org.apache.lucene.index.SortedDocValues;
|
import org.apache.lucene.index.SortedDocValues;
|
||||||
|
@ -154,6 +155,11 @@ final class GlobalOrdinalsQuery extends Query {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public IndexReader.CacheHelper getCacheHelper(LeafReaderContext context) {
|
||||||
|
return getDocValuesCacheHelper(joinField, context);
|
||||||
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
final static class OrdinalMapScorer extends BaseGlobalOrdinalScorer {
|
final static class OrdinalMapScorer extends BaseGlobalOrdinalScorer {
|
||||||
|
|
|
@ -194,6 +194,11 @@ public class ParentChildrenBlockJoinQuery extends Query {
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public IndexReader.CacheHelper getCacheHelper(LeafReaderContext context) {
|
||||||
|
return null; // TODO delegate to BitSetProducer?
|
||||||
|
}
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -29,6 +29,7 @@ import org.apache.lucene.document.FloatPoint;
|
||||||
import org.apache.lucene.document.IntPoint;
|
import org.apache.lucene.document.IntPoint;
|
||||||
import org.apache.lucene.document.LongPoint;
|
import org.apache.lucene.document.LongPoint;
|
||||||
import org.apache.lucene.index.FieldInfo;
|
import org.apache.lucene.index.FieldInfo;
|
||||||
|
import org.apache.lucene.index.IndexReader;
|
||||||
import org.apache.lucene.index.LeafReader;
|
import org.apache.lucene.index.LeafReader;
|
||||||
import org.apache.lucene.index.LeafReaderContext;
|
import org.apache.lucene.index.LeafReaderContext;
|
||||||
import org.apache.lucene.index.PointValues;
|
import org.apache.lucene.index.PointValues;
|
||||||
|
@ -186,6 +187,11 @@ abstract class PointInSetIncludingScoreQuery extends Query {
|
||||||
|
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public IndexReader.CacheHelper getCacheHelper(LeafReaderContext context) {
|
||||||
|
return context.reader().getCoreCacheHelper();
|
||||||
|
}
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -21,6 +21,7 @@ import java.util.Locale;
|
||||||
import java.util.Objects;
|
import java.util.Objects;
|
||||||
import java.util.Set;
|
import java.util.Set;
|
||||||
|
|
||||||
|
import org.apache.lucene.index.IndexReader;
|
||||||
import org.apache.lucene.index.LeafReaderContext;
|
import org.apache.lucene.index.LeafReaderContext;
|
||||||
import org.apache.lucene.index.PostingsEnum;
|
import org.apache.lucene.index.PostingsEnum;
|
||||||
import org.apache.lucene.index.Term;
|
import org.apache.lucene.index.Term;
|
||||||
|
@ -140,6 +141,11 @@ class TermsIncludingScoreQuery extends Query {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public IndexReader.CacheHelper getCacheHelper(LeafReaderContext context) {
|
||||||
|
return context.reader().getCoreCacheHelper();
|
||||||
|
}
|
||||||
|
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -562,6 +562,11 @@ public class TestJoinUtil extends LuceneTestCase {
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public IndexReader.CacheHelper getCacheHelper(LeafReaderContext context) {
|
||||||
|
return null;
|
||||||
|
}
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -1 +0,0 @@
|
||||||
c5ebf1c253ad4959a29f4acfe696ee48cdd9f473
|
|
|
@ -0,0 +1 @@
|
||||||
|
5abe4c24bbe992a9ac07ca563d5bd3e8d569e9ed
|
|
@ -17,6 +17,7 @@
|
||||||
package org.apache.lucene.queries;
|
package org.apache.lucene.queries;
|
||||||
|
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
|
import java.util.Arrays;
|
||||||
import java.util.Objects;
|
import java.util.Objects;
|
||||||
import java.util.Set;
|
import java.util.Set;
|
||||||
|
|
||||||
|
@ -121,6 +122,11 @@ public class BoostingQuery extends Query {
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public IndexReader.CacheHelper getCacheHelper(LeafReaderContext context) {
|
||||||
|
return getCacheHelper(context, Arrays.asList(matchWeight, contextWeight));
|
||||||
|
}
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -17,9 +17,11 @@
|
||||||
package org.apache.lucene.queries;
|
package org.apache.lucene.queries;
|
||||||
|
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
|
import java.util.ArrayList;
|
||||||
import java.util.Arrays;
|
import java.util.Arrays;
|
||||||
import java.util.Collection;
|
import java.util.Collection;
|
||||||
import java.util.Collections;
|
import java.util.Collections;
|
||||||
|
import java.util.List;
|
||||||
import java.util.Set;
|
import java.util.Set;
|
||||||
|
|
||||||
import org.apache.lucene.index.IndexReader;
|
import org.apache.lucene.index.IndexReader;
|
||||||
|
@ -207,6 +209,14 @@ public class CustomScoreQuery extends Query implements Cloneable {
|
||||||
return new CustomScorer(CustomScoreQuery.this.getCustomScoreProvider(context), this, queryWeight, subQueryScorer, valSrcScorers);
|
return new CustomScorer(CustomScoreQuery.this.getCustomScoreProvider(context), this, queryWeight, subQueryScorer, valSrcScorers);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public IndexReader.CacheHelper getCacheHelper(LeafReaderContext context) {
|
||||||
|
List<Weight> weights = new ArrayList<>();
|
||||||
|
weights.add(subQueryWeight);
|
||||||
|
weights.addAll(Arrays.asList(valSrcWeights));
|
||||||
|
return getCacheHelper(context, weights);
|
||||||
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public Explanation explain(LeafReaderContext context, int doc) throws IOException {
|
public Explanation explain(LeafReaderContext context, int doc) throws IOException {
|
||||||
Explanation explain = doExplain(context, doc);
|
Explanation explain = doExplain(context, doc);
|
||||||
|
|
|
@ -88,6 +88,11 @@ public final class BoostedQuery extends Query {
|
||||||
return new BoostedQuery.CustomScorer(context, this, subQueryScorer, boostVal);
|
return new BoostedQuery.CustomScorer(context, this, subQueryScorer, boostVal);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public IndexReader.CacheHelper getCacheHelper(LeafReaderContext context) {
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public Explanation explain(LeafReaderContext readerContext, int doc) throws IOException {
|
public Explanation explain(LeafReaderContext readerContext, int doc) throws IOException {
|
||||||
Explanation subQueryExpl = qWeight.explain(readerContext,doc);
|
Explanation subQueryExpl = qWeight.explain(readerContext,doc);
|
||||||
|
|
|
@ -21,6 +21,7 @@ import java.io.IOException;
|
||||||
import java.util.Objects;
|
import java.util.Objects;
|
||||||
import java.util.function.DoublePredicate;
|
import java.util.function.DoublePredicate;
|
||||||
|
|
||||||
|
import org.apache.lucene.index.IndexReader;
|
||||||
import org.apache.lucene.index.LeafReaderContext;
|
import org.apache.lucene.index.LeafReaderContext;
|
||||||
import org.apache.lucene.search.ConstantScoreScorer;
|
import org.apache.lucene.search.ConstantScoreScorer;
|
||||||
import org.apache.lucene.search.ConstantScoreWeight;
|
import org.apache.lucene.search.ConstantScoreWeight;
|
||||||
|
@ -80,6 +81,11 @@ public final class FunctionMatchQuery extends Query {
|
||||||
};
|
};
|
||||||
return new ConstantScoreScorer(this, score(), twoPhase);
|
return new ConstantScoreScorer(this, score(), twoPhase);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public IndexReader.CacheHelper getCacheHelper(LeafReaderContext context) {
|
||||||
|
return null; // TODO delegate to DoubleValuesSource?
|
||||||
|
}
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -74,6 +74,11 @@ public class FunctionQuery extends Query {
|
||||||
return new AllScorer(context, this, boost);
|
return new AllScorer(context, this, boost);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public IndexReader.CacheHelper getCacheHelper(LeafReaderContext context) {
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public Explanation explain(LeafReaderContext context, int doc) throws IOException {
|
public Explanation explain(LeafReaderContext context, int doc) throws IOException {
|
||||||
return ((AllScorer)scorer(context)).explain(doc);
|
return ((AllScorer)scorer(context)).explain(doc);
|
||||||
|
|
|
@ -21,6 +21,7 @@ import java.util.Map;
|
||||||
import java.util.Objects;
|
import java.util.Objects;
|
||||||
import java.util.Set;
|
import java.util.Set;
|
||||||
|
|
||||||
|
import org.apache.lucene.index.IndexReader;
|
||||||
import org.apache.lucene.index.LeafReaderContext;
|
import org.apache.lucene.index.LeafReaderContext;
|
||||||
import org.apache.lucene.index.Term;
|
import org.apache.lucene.index.Term;
|
||||||
import org.apache.lucene.search.Explanation;
|
import org.apache.lucene.search.Explanation;
|
||||||
|
@ -153,5 +154,10 @@ public class FunctionRangeQuery extends Query {
|
||||||
// getRangeScorer takes String args and parses them. Weird.
|
// getRangeScorer takes String args and parses them. Weird.
|
||||||
return functionValues.getRangeScorer(context, lowerVal, upperVal, includeLower, includeUpper);
|
return functionValues.getRangeScorer(context, lowerVal, upperVal, includeLower, includeUpper);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public IndexReader.CacheHelper getCacheHelper(LeafReaderContext context) {
|
||||||
|
return null;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -137,5 +137,10 @@ public final class FunctionScoreQuery extends Query {
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public IndexReader.CacheHelper getCacheHelper(LeafReaderContext context) {
|
||||||
|
return null; // TODO delegate to DoubleValuesSource
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -29,8 +29,6 @@ import java.util.Map;
|
||||||
/**
|
/**
|
||||||
* <code>SumTotalTermFreqValueSource</code> returns the number of tokens.
|
* <code>SumTotalTermFreqValueSource</code> returns the number of tokens.
|
||||||
* (sum of term freqs across all documents, across all terms).
|
* (sum of term freqs across all documents, across all terms).
|
||||||
* Returns -1 if frequencies were omitted for the field, or if
|
|
||||||
* the codec doesn't support this statistic.
|
|
||||||
* @lucene.internal
|
* @lucene.internal
|
||||||
*/
|
*/
|
||||||
public class SumTotalTermFreqValueSource extends ValueSource {
|
public class SumTotalTermFreqValueSource extends ValueSource {
|
||||||
|
@ -61,12 +59,8 @@ public class SumTotalTermFreqValueSource extends ValueSource {
|
||||||
Terms terms = readerContext.reader().terms(indexedField);
|
Terms terms = readerContext.reader().terms(indexedField);
|
||||||
if (terms == null) continue;
|
if (terms == null) continue;
|
||||||
long v = terms.getSumTotalTermFreq();
|
long v = terms.getSumTotalTermFreq();
|
||||||
if (v == -1) {
|
assert v != -1;
|
||||||
sumTotalTermFreq = -1;
|
sumTotalTermFreq += v;
|
||||||
break;
|
|
||||||
} else {
|
|
||||||
sumTotalTermFreq += v;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
final long ttf = sumTotalTermFreq;
|
final long ttf = sumTotalTermFreq;
|
||||||
context.put(this, new LongDocValues(this) {
|
context.put(this, new LongDocValues(this) {
|
||||||
|
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue