This commit is contained in:
Karl Wright 2017-11-08 07:29:28 -05:00
commit dae5c570b9
238 changed files with 5402 additions and 895 deletions

View File

@ -186,20 +186,26 @@
} }
def checkLicenseHeaderPrecedes = { f, description, contentPattern, commentPattern, text, ratDocument -> def checkLicenseHeaderPrecedes = { f, description, contentPattern, commentPattern, text, ratDocument ->
def contentMatcher = contentPattern.matcher(text); def contentMatcher = contentPattern.matcher(text);
if (contentMatcher.find()) { if (contentMatcher.find()) {
def contentStartPos = contentMatcher.start(); def contentStartPos = contentMatcher.start();
def commentMatcher = commentPattern.matcher(text); def commentMatcher = commentPattern.matcher(text);
while (commentMatcher.find()) { while (commentMatcher.find()) {
if (isLicense(commentMatcher, ratDocument)) { if (isLicense(commentMatcher, ratDocument)) {
if (commentMatcher.start() < contentStartPos) { if (commentMatcher.start() < contentStartPos) {
break; // This file is all good, so break loop: license header precedes 'description' definition break; // This file is all good, so break loop: license header precedes 'description' definition
} else { } else {
reportViolation(f, description+' declaration precedes license header'); reportViolation(f, description+' declaration precedes license header');
}
} }
} }
} }
}
}
def checkMockitoAssume = { f, text ->
if (text.contains("mockito") && !text.contains("assumeWorkingMockito()")) {
reportViolation(f, 'File uses Mockito but has no assumeWorkingMockito() call');
}
} }
def checkForUnescapedSymbolSubstitutions = { f, text -> def checkForUnescapedSymbolSubstitutions = { f, text ->
@ -265,18 +271,21 @@
ratDocument.getMetaData().value(MetaData.RAT_URL_LICENSE_FAMILY_NAME))); ratDocument.getMetaData().value(MetaData.RAT_URL_LICENSE_FAMILY_NAME)));
} }
} }
if (f.toString().endsWith('.java')) { if (f.name.endsWith('.java')) {
if (text.contains('org.slf4j.LoggerFactory')) { if (text.contains('org.slf4j.LoggerFactory')) {
if (!validLoggerPattern.matcher(text).find()) { if (!validLoggerPattern.matcher(text).find()) {
reportViolation(f, 'invalid logging pattern [not private static final, uses static class name]'); reportViolation(f, 'invalid logging pattern [not private static final, uses static class name]');
} }
} }
checkLicenseHeaderPrecedes(f, 'package', packagePattern, javaCommentPattern, text, ratDocument); checkLicenseHeaderPrecedes(f, 'package', packagePattern, javaCommentPattern, text, ratDocument);
if (f.name.contains("Test")) {
checkMockitoAssume(f, text);
}
} }
if (f.toString().endsWith('.xml') || f.toString().endsWith('.xml.template')) { if (f.name.endsWith('.xml') || f.name.endsWith('.xml.template')) {
checkLicenseHeaderPrecedes(f, '<tag>', xmlTagPattern, xmlCommentPattern, text, ratDocument); checkLicenseHeaderPrecedes(f, '<tag>', xmlTagPattern, xmlCommentPattern, text, ratDocument);
} }
if (f.toString().endsWith('.adoc')) { if (f.name.endsWith('.adoc')) {
checkForUnescapedSymbolSubstitutions(f, text); checkForUnescapedSymbolSubstitutions(f, text);
} }
}; };

View File

@ -12,6 +12,16 @@
</content> </content>
<orderEntry type="inheritedJdk" /> <orderEntry type="inheritedJdk" />
<orderEntry type="sourceFolder" forTests="false" /> <orderEntry type="sourceFolder" forTests="false" />
<orderEntry type="module-library" exported="">
<library>
<CLASSES>
<root url="file://$MODULE_DIR$/lib" />
</CLASSES>
<JAVADOC />
<SOURCES />
<jarDirectory url="file://$MODULE_DIR$/lib" recursive="false" />
</library>
</orderEntry>
<orderEntry type="library" scope="TEST" name="JUnit" level="project" /> <orderEntry type="library" scope="TEST" name="JUnit" level="project" />
<orderEntry type="module" scope="TEST" module-name="lucene-test-framework" /> <orderEntry type="module" scope="TEST" module-name="lucene-test-framework" />
<orderEntry type="module" module-name="analysis-common" /> <orderEntry type="module" module-name="analysis-common" />

View File

@ -5,6 +5,14 @@ http://s.apache.org/luceneversions
======================= Lucene 8.0.0 ======================= ======================= Lucene 8.0.0 =======================
API Changes
* LUCENE-8007: Index statistics Terms.getSumDocFreq(), Terms.getDocCount() are
now required to be stored by codecs. Additionally, TermsEnum.totalTermFreq()
and Terms.getSumTotalTermFreq() are now required: if frequencies are not
stored they are equal to TermsEnum.docFreq() and Terms.getSumDocFreq(),
respectively, because all freq() values equal 1. (Adrien Grand, Robert Muir)
Changes in Runtime Behavior Changes in Runtime Behavior
* LUCENE-7837: Indices that were created before the previous major version * LUCENE-7837: Indices that were created before the previous major version
@ -25,6 +33,11 @@ Improvements
======================= Lucene 7.2.0 ======================= ======================= Lucene 7.2.0 =======================
API Changes
* LUCENE-8017: Weight now exposes a getCacheHelper() method to help query caches
determine whether or not a query can be cached. (Alan Woodward)
Bug Fixes Bug Fixes
* LUCENE-7991: KNearestNeighborDocumentClassifier.knnSearch no longer applies * LUCENE-7991: KNearestNeighborDocumentClassifier.knnSearch no longer applies
@ -49,6 +62,16 @@ Optimizations
* LUCENE-7994: Use int/int scatter map to gather facet counts when the * LUCENE-7994: Use int/int scatter map to gather facet counts when the
number of hits is small relative to the number of unique facet labels number of hits is small relative to the number of unique facet labels
(Dawid Weiss, Robert Muir, Mike McCandless) (Dawid Weiss, Robert Muir, Mike McCandless)
Tests
* LUCENE-8035: Run tests with JDK-specific options: --illegal-access=deny
on Java 9+. (Uwe Schindler)
Build
* LUCENE-6144: Upgrade Ivy to 2.4.0; 'ant ivy-bootstrap' now removes old Ivy
jars in ~/.ant/lib/. (Shawn Heisey, Steve Rowe)
======================= Lucene 7.1.0 ======================= ======================= Lucene 7.1.0 =======================

View File

@ -139,8 +139,9 @@ public class BlockTermsReader extends FieldsProducer {
assert numTerms >= 0; assert numTerms >= 0;
final long termsStartPointer = in.readVLong(); final long termsStartPointer = in.readVLong();
final FieldInfo fieldInfo = state.fieldInfos.fieldInfo(field); final FieldInfo fieldInfo = state.fieldInfos.fieldInfo(field);
final long sumTotalTermFreq = fieldInfo.getIndexOptions() == IndexOptions.DOCS ? -1 : in.readVLong(); final long sumTotalTermFreq = in.readVLong();
final long sumDocFreq = in.readVLong(); // when frequencies are omitted, sumDocFreq=totalTermFreq and we only write one value
final long sumDocFreq = fieldInfo.getIndexOptions() == IndexOptions.DOCS ? sumTotalTermFreq : in.readVLong();
final int docCount = in.readVInt(); final int docCount = in.readVInt();
final int longsSize = in.readVInt(); final int longsSize = in.readVInt();
if (docCount < 0 || docCount > state.segmentInfo.maxDoc()) { // #docs with field must be <= #docs if (docCount < 0 || docCount > state.segmentInfo.maxDoc()) { // #docs with field must be <= #docs
@ -149,7 +150,7 @@ public class BlockTermsReader extends FieldsProducer {
if (sumDocFreq < docCount) { // #postings must be >= #docs with field if (sumDocFreq < docCount) { // #postings must be >= #docs with field
throw new CorruptIndexException("invalid sumDocFreq: " + sumDocFreq + " docCount: " + docCount, in); throw new CorruptIndexException("invalid sumDocFreq: " + sumDocFreq + " docCount: " + docCount, in);
} }
if (sumTotalTermFreq != -1 && sumTotalTermFreq < sumDocFreq) { // #positions must be >= #postings if (sumTotalTermFreq < sumDocFreq) { // #positions must be >= #postings
throw new CorruptIndexException("invalid sumTotalTermFreq: " + sumTotalTermFreq + " sumDocFreq: " + sumDocFreq, in); throw new CorruptIndexException("invalid sumTotalTermFreq: " + sumTotalTermFreq + " sumDocFreq: " + sumDocFreq, in);
} }
FieldReader previous = fields.put(fieldInfo.name, new FieldReader(fieldInfo, numTerms, termsStartPointer, sumTotalTermFreq, sumDocFreq, docCount, longsSize)); FieldReader previous = fields.put(fieldInfo.name, new FieldReader(fieldInfo, numTerms, termsStartPointer, sumTotalTermFreq, sumDocFreq, docCount, longsSize));
@ -810,7 +811,9 @@ public class BlockTermsReader extends FieldsProducer {
// docFreq, totalTermFreq // docFreq, totalTermFreq
state.docFreq = freqReader.readVInt(); state.docFreq = freqReader.readVInt();
//System.out.println(" dF=" + state.docFreq); //System.out.println(" dF=" + state.docFreq);
if (fieldInfo.getIndexOptions() != IndexOptions.DOCS) { if (fieldInfo.getIndexOptions() == IndexOptions.DOCS) {
state.totalTermFreq = state.docFreq; // all postings have tf=1
} else {
state.totalTermFreq = state.docFreq + freqReader.readVLong(); state.totalTermFreq = state.docFreq + freqReader.readVLong();
//System.out.println(" totTF=" + state.totalTermFreq); //System.out.println(" totTF=" + state.totalTermFreq);
} }

View File

@ -126,8 +126,9 @@ public final class OrdsBlockTreeTermsReader extends FieldsProducer {
final FieldInfo fieldInfo = state.fieldInfos.fieldInfo(field); final FieldInfo fieldInfo = state.fieldInfos.fieldInfo(field);
assert fieldInfo != null: "field=" + field; assert fieldInfo != null: "field=" + field;
assert numTerms <= Integer.MAX_VALUE; assert numTerms <= Integer.MAX_VALUE;
final long sumTotalTermFreq = fieldInfo.getIndexOptions() == IndexOptions.DOCS ? -1 : in.readVLong(); final long sumTotalTermFreq = in.readVLong();
final long sumDocFreq = in.readVLong(); // when frequencies are omitted, sumDocFreq=totalTermFreq and we only write one value
final long sumDocFreq = fieldInfo.getIndexOptions() == IndexOptions.DOCS ? sumTotalTermFreq : in.readVLong();
final int docCount = in.readVInt(); final int docCount = in.readVInt();
final int longsSize = in.readVInt(); final int longsSize = in.readVInt();
// System.out.println(" longsSize=" + longsSize); // System.out.println(" longsSize=" + longsSize);
@ -140,7 +141,7 @@ public final class OrdsBlockTreeTermsReader extends FieldsProducer {
if (sumDocFreq < docCount) { // #postings must be >= #docs with field if (sumDocFreq < docCount) { // #postings must be >= #docs with field
throw new CorruptIndexException("invalid sumDocFreq: " + sumDocFreq + " docCount: " + docCount, in); throw new CorruptIndexException("invalid sumDocFreq: " + sumDocFreq + " docCount: " + docCount, in);
} }
if (sumTotalTermFreq != -1 && sumTotalTermFreq < sumDocFreq) { // #positions must be >= #postings if (sumTotalTermFreq < sumDocFreq) { // #positions must be >= #postings
throw new CorruptIndexException("invalid sumTotalTermFreq: " + sumTotalTermFreq + " sumDocFreq: " + sumDocFreq, in); throw new CorruptIndexException("invalid sumTotalTermFreq: " + sumTotalTermFreq + " sumDocFreq: " + sumDocFreq, in);
} }
final long indexStartFP = indexIn.readVLong(); final long indexStartFP = indexIn.readVLong();

View File

@ -292,7 +292,9 @@ final class OrdsIntersectTermsEnumFrame {
// stats // stats
termState.docFreq = statsReader.readVInt(); termState.docFreq = statsReader.readVInt();
//if (DEBUG) System.out.println(" dF=" + state.docFreq); //if (DEBUG) System.out.println(" dF=" + state.docFreq);
if (ite.fr.fieldInfo.getIndexOptions() != IndexOptions.DOCS) { if (ite.fr.fieldInfo.getIndexOptions() == IndexOptions.DOCS) {
termState.totalTermFreq = termState.docFreq; // all tf values are 1
} else {
termState.totalTermFreq = termState.docFreq + statsReader.readVLong(); termState.totalTermFreq = termState.docFreq + statsReader.readVLong();
//if (DEBUG) System.out.println(" totTF=" + state.totalTermFreq); //if (DEBUG) System.out.println(" totTF=" + state.totalTermFreq);
} }

View File

@ -499,7 +499,9 @@ final class OrdsSegmentTermsEnumFrame {
// stats // stats
state.docFreq = statsReader.readVInt(); state.docFreq = statsReader.readVInt();
//if (DEBUG) System.out.println(" dF=" + state.docFreq); //if (DEBUG) System.out.println(" dF=" + state.docFreq);
if (ste.fr.fieldInfo.getIndexOptions() != IndexOptions.DOCS) { if (ste.fr.fieldInfo.getIndexOptions() == IndexOptions.DOCS) {
state.totalTermFreq = state.docFreq; // all tf values are 1
} else {
state.totalTermFreq = state.docFreq + statsReader.readVLong(); state.totalTermFreq = state.docFreq + statsReader.readVLong();
//if (DEBUG) System.out.println(" totTF=" + state.totalTermFreq); //if (DEBUG) System.out.println(" totTF=" + state.totalTermFreq);
} }

View File

@ -19,7 +19,6 @@ package org.apache.lucene.codecs.memory;
import java.io.IOException; import java.io.IOException;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.Arrays;
import java.util.BitSet; import java.util.BitSet;
import java.util.Collection; import java.util.Collection;
import java.util.Collections; import java.util.Collections;
@ -111,8 +110,9 @@ public class FSTOrdTermsReader extends FieldsProducer {
FieldInfo fieldInfo = fieldInfos.fieldInfo(blockIn.readVInt()); FieldInfo fieldInfo = fieldInfos.fieldInfo(blockIn.readVInt());
boolean hasFreq = fieldInfo.getIndexOptions() != IndexOptions.DOCS; boolean hasFreq = fieldInfo.getIndexOptions() != IndexOptions.DOCS;
long numTerms = blockIn.readVLong(); long numTerms = blockIn.readVLong();
long sumTotalTermFreq = hasFreq ? blockIn.readVLong() : -1; long sumTotalTermFreq = blockIn.readVLong();
long sumDocFreq = blockIn.readVLong(); // if freqs are omitted, sumDocFreq=sumTotalTermFreq and we only write one value
long sumDocFreq = hasFreq ? blockIn.readVLong() : sumTotalTermFreq;
int docCount = blockIn.readVInt(); int docCount = blockIn.readVInt();
int longsSize = blockIn.readVInt(); int longsSize = blockIn.readVInt();
FST<Long> index = new FST<>(indexIn, PositiveIntOutputs.getSingleton()); FST<Long> index = new FST<>(indexIn, PositiveIntOutputs.getSingleton());
@ -146,7 +146,7 @@ public class FSTOrdTermsReader extends FieldsProducer {
throw new CorruptIndexException("invalid sumDocFreq: " + field.sumDocFreq + " docCount: " + field.docCount + " (blockIn=" + blockIn + ")", indexIn); throw new CorruptIndexException("invalid sumDocFreq: " + field.sumDocFreq + " docCount: " + field.docCount + " (blockIn=" + blockIn + ")", indexIn);
} }
// #positions must be >= #postings // #positions must be >= #postings
if (field.sumTotalTermFreq != -1 && field.sumTotalTermFreq < field.sumDocFreq) { if (field.sumTotalTermFreq < field.sumDocFreq) {
throw new CorruptIndexException("invalid sumTotalTermFreq: " + field.sumTotalTermFreq + " sumDocFreq: " + field.sumDocFreq + " (blockIn=" + blockIn + ")", indexIn); throw new CorruptIndexException("invalid sumTotalTermFreq: " + field.sumTotalTermFreq + " sumDocFreq: " + field.sumDocFreq + " (blockIn=" + blockIn + ")", indexIn);
} }
if (previous != null) { if (previous != null) {
@ -343,9 +343,6 @@ public class FSTOrdTermsReader extends FieldsProducer {
this.totalTermFreq = new long[INTERVAL]; this.totalTermFreq = new long[INTERVAL];
this.statsBlockOrd = -1; this.statsBlockOrd = -1;
this.metaBlockOrd = -1; this.metaBlockOrd = -1;
if (!hasFreqs()) {
Arrays.fill(totalTermFreq, -1);
}
} }
/** Decodes stats data into term state */ /** Decodes stats data into term state */
@ -388,6 +385,7 @@ public class FSTOrdTermsReader extends FieldsProducer {
} }
} else { } else {
docFreq[i] = code; docFreq[i] = code;
totalTermFreq[i] = code;
} }
} }
} }

View File

@ -94,8 +94,9 @@ public class FSTTermsReader extends FieldsProducer {
int fieldNumber = in.readVInt(); int fieldNumber = in.readVInt();
FieldInfo fieldInfo = fieldInfos.fieldInfo(fieldNumber); FieldInfo fieldInfo = fieldInfos.fieldInfo(fieldNumber);
long numTerms = in.readVLong(); long numTerms = in.readVLong();
long sumTotalTermFreq = fieldInfo.getIndexOptions() == IndexOptions.DOCS ? -1 : in.readVLong(); long sumTotalTermFreq = in.readVLong();
long sumDocFreq = in.readVLong(); // if frequencies are omitted, sumTotalTermFreq=sumDocFreq and we only write one value
long sumDocFreq = fieldInfo.getIndexOptions() == IndexOptions.DOCS ? sumTotalTermFreq : in.readVLong();
int docCount = in.readVInt(); int docCount = in.readVInt();
int longsSize = in.readVInt(); int longsSize = in.readVInt();
TermsReader current = new TermsReader(fieldInfo, in, numTerms, sumTotalTermFreq, sumDocFreq, docCount, longsSize); TermsReader current = new TermsReader(fieldInfo, in, numTerms, sumTotalTermFreq, sumDocFreq, docCount, longsSize);
@ -126,7 +127,7 @@ public class FSTTermsReader extends FieldsProducer {
throw new CorruptIndexException("invalid sumDocFreq: " + field.sumDocFreq + " docCount: " + field.docCount, in); throw new CorruptIndexException("invalid sumDocFreq: " + field.sumDocFreq + " docCount: " + field.docCount, in);
} }
// #positions must be >= #postings // #positions must be >= #postings
if (field.sumTotalTermFreq != -1 && field.sumTotalTermFreq < field.sumDocFreq) { if (field.sumTotalTermFreq < field.sumDocFreq) {
throw new CorruptIndexException("invalid sumTotalTermFreq: " + field.sumTotalTermFreq + " sumDocFreq: " + field.sumDocFreq, in); throw new CorruptIndexException("invalid sumTotalTermFreq: " + field.sumTotalTermFreq + " sumDocFreq: " + field.sumDocFreq, in);
} }
if (previous != null) { if (previous != null) {
@ -288,7 +289,7 @@ public class FSTTermsReader extends FieldsProducer {
@Override @Override
public long totalTermFreq() throws IOException { public long totalTermFreq() throws IOException {
return state.totalTermFreq; return state.totalTermFreq == -1 ? state.docFreq : state.totalTermFreq;
} }
@Override @Override

View File

@ -733,10 +733,10 @@ public final class MemoryPostingsFormat extends PostingsFormat {
if (!didDecode) { if (!didDecode) {
buffer.reset(current.output.bytes, current.output.offset, current.output.length); buffer.reset(current.output.bytes, current.output.offset, current.output.length);
docFreq = buffer.readVInt(); docFreq = buffer.readVInt();
if (field.getIndexOptions() != IndexOptions.DOCS) { if (field.getIndexOptions() == IndexOptions.DOCS) {
totalTermFreq = docFreq + buffer.readVLong(); totalTermFreq = docFreq;
} else { } else {
totalTermFreq = -1; totalTermFreq = docFreq + buffer.readVLong();
} }
postingsSpare.bytes = current.output.bytes; postingsSpare.bytes = current.output.bytes;
postingsSpare.offset = buffer.getPosition(); postingsSpare.offset = buffer.getPosition();
@ -873,12 +873,15 @@ public final class MemoryPostingsFormat extends PostingsFormat {
field = fieldInfos.fieldInfo(fieldNumber); field = fieldInfos.fieldInfo(fieldNumber);
if (field == null) { if (field == null) {
throw new CorruptIndexException("invalid field number: " + fieldNumber, in); throw new CorruptIndexException("invalid field number: " + fieldNumber, in);
} else if (field.getIndexOptions() != IndexOptions.DOCS) {
sumTotalTermFreq = in.readVLong();
} else { } else {
sumTotalTermFreq = -1; sumTotalTermFreq = in.readVLong();
}
// if frequencies are omitted, sumDocFreq = sumTotalTermFreq and we only write one value.
if (field.getIndexOptions() == IndexOptions.DOCS) {
sumDocFreq = sumTotalTermFreq;
} else {
sumDocFreq = in.readVLong();
} }
sumDocFreq = in.readVLong();
docCount = in.readVInt(); docCount = in.readVInt();
fst = new FST<>(in, outputs); fst = new FST<>(in, outputs);

View File

@ -202,7 +202,7 @@ class SimpleTextFieldsReader extends FieldsProducer {
@Override @Override
public long totalTermFreq() { public long totalTermFreq() {
return indexOptions == IndexOptions.DOCS ? -1 : totalTermFreq; return indexOptions == IndexOptions.DOCS ? docFreq : totalTermFreq;
} }
@Override @Override
@ -568,12 +568,13 @@ class SimpleTextFieldsReader extends FieldsProducer {
} else if (StringHelper.startsWith(scratch.get(), DOC)) { } else if (StringHelper.startsWith(scratch.get(), DOC)) {
docFreq++; docFreq++;
sumDocFreq++; sumDocFreq++;
totalTermFreq++;
scratchUTF16.copyUTF8Bytes(scratch.bytes(), DOC.length, scratch.length()-DOC.length); scratchUTF16.copyUTF8Bytes(scratch.bytes(), DOC.length, scratch.length()-DOC.length);
int docID = ArrayUtil.parseInt(scratchUTF16.chars(), 0, scratchUTF16.length()); int docID = ArrayUtil.parseInt(scratchUTF16.chars(), 0, scratchUTF16.length());
visitedDocs.set(docID); visitedDocs.set(docID);
} else if (StringHelper.startsWith(scratch.get(), FREQ)) { } else if (StringHelper.startsWith(scratch.get(), FREQ)) {
scratchUTF16.copyUTF8Bytes(scratch.bytes(), FREQ.length, scratch.length()-FREQ.length); scratchUTF16.copyUTF8Bytes(scratch.bytes(), FREQ.length, scratch.length()-FREQ.length);
totalTermFreq += ArrayUtil.parseInt(scratchUTF16.chars(), 0, scratchUTF16.length()); totalTermFreq += ArrayUtil.parseInt(scratchUTF16.chars(), 0, scratchUTF16.length()) - 1;
} else if (StringHelper.startsWith(scratch.get(), TERM)) { } else if (StringHelper.startsWith(scratch.get(), TERM)) {
if (lastDocsStart != -1) { if (lastDocsStart != -1) {
b.add(Util.toIntsRef(lastTerm.get(), scratchIntsRef), outputs.newPair(lastDocsStart, b.add(Util.toIntsRef(lastTerm.get(), scratchIntsRef), outputs.newPair(lastDocsStart,
@ -637,7 +638,7 @@ class SimpleTextFieldsReader extends FieldsProducer {
@Override @Override
public long getSumTotalTermFreq() { public long getSumTotalTermFreq() {
return fieldInfo.getIndexOptions() == IndexOptions.DOCS ? -1 : sumTotalTermFreq; return sumTotalTermFreq;
} }
@Override @Override

View File

@ -288,7 +288,13 @@ public class SimpleTextTermVectorsReader extends TermVectorsReader {
@Override @Override
public long getSumTotalTermFreq() throws IOException { public long getSumTotalTermFreq() throws IOException {
return -1; // TODO: make it constant-time
long ttf = 0;
TermsEnum iterator = iterator();
for (BytesRef b = iterator.next(); b != null; b = iterator.next()) {
ttf += iterator.totalTermFreq();
}
return ttf;
} }
@Override @Override

View File

@ -80,13 +80,15 @@
<!-- Needed in case a module needs the original build, also for compile-tools to be called from a module --> <!-- Needed in case a module needs the original build, also for compile-tools to be called from a module -->
<property name="common.build.dir" location="${common.dir}/build"/> <property name="common.build.dir" location="${common.dir}/build"/>
<property name="ivy.bootstrap.version" value="2.3.0" /> <!-- UPGRADE NOTE: update disallowed.ivy.jars regex in ivy-availability-check --> <property name="ivy.bootstrap.version" value="2.4.0" /> <!-- UPGRADE NOTE: update disallowed_ivy_jars_regex below -->
<property name="disallowed_ivy_jars_regex" value="ivy-2\.[0123].*\.jar"/>
<property name="ivy.default.configuration" value="*"/> <property name="ivy.default.configuration" value="*"/>
<!-- Running ant targets in parralel may require this set to false because ivy:retrieve tasks may race with resolve --> <!-- Running ant targets in parralel may require this set to false because ivy:retrieve tasks may race with resolve -->
<property name="ivy.sync" value="true"/> <property name="ivy.sync" value="true"/>
<property name="ivy.resolution-cache.dir" location="${common.build.dir}/ivy-resolution-cache"/> <property name="ivy.resolution-cache.dir" location="${common.build.dir}/ivy-resolution-cache"/>
<property name="ivy.lock-strategy" value="artifact-lock"/> <property name="ivy.lock-strategy" value="artifact-lock-nio"/>
<property name="local.caches" location="${common.dir}/../.caches" /> <property name="local.caches" location="${common.dir}/../.caches" />
<property name="tests.cachedir" location="${local.caches}/test-stats" /> <property name="tests.cachedir" location="${local.caches}/test-stats" />
@ -413,12 +415,12 @@
<property name="ivy_bootstrap_url1" value="http://repo1.maven.org/maven2"/> <property name="ivy_bootstrap_url1" value="http://repo1.maven.org/maven2"/>
<!-- you might need to tweak this from china so it works --> <!-- you might need to tweak this from china so it works -->
<property name="ivy_bootstrap_url2" value="http://uk.maven.org/maven2"/> <property name="ivy_bootstrap_url2" value="http://uk.maven.org/maven2"/>
<property name="ivy_checksum_sha1" value="c5ebf1c253ad4959a29f4acfe696ee48cdd9f473"/> <property name="ivy_checksum_sha1" value="5abe4c24bbe992a9ac07ca563d5bd3e8d569e9ed"/>
<target name="ivy-availability-check" unless="ivy.available"> <target name="ivy-availability-check" unless="ivy.available">
<path id="disallowed.ivy.jars"> <path id="disallowed.ivy.jars">
<fileset dir="${ivy_install_path}"> <fileset dir="${ivy_install_path}">
<filename regex="ivy-2\.[012].*\.jar"/> <!-- TODO: Update this regex to disallow Ivy versions --> <filename regex="${disallowed_ivy_jars_regex}"/>
</fileset> </fileset>
</path> </path>
<loadresource property="disallowed.ivy.jars.list"> <loadresource property="disallowed.ivy.jars.list">
@ -482,19 +484,20 @@
<fail>Ivy is not available</fail> <fail>Ivy is not available</fail>
</target> </target>
<target name="ivy-bootstrap" description="Download and install Ivy in the users ant lib dir" depends="ivy-bootstrap1,ivy-bootstrap2,ivy-checksum"/> <target name="ivy-bootstrap" description="Download and install Ivy in the users ant lib dir"
depends="-ivy-bootstrap1,-ivy-bootstrap2,-ivy-checksum,-ivy-remove-old-versions"/>
<!-- try to download from repo1.maven.org --> <!-- try to download from repo1.maven.org -->
<target name="ivy-bootstrap1"> <target name="-ivy-bootstrap1">
<ivy-download src="${ivy_bootstrap_url1}" dest="${ivy_install_path}"/> <ivy-download src="${ivy_bootstrap_url1}" dest="${ivy_install_path}"/>
<available file="${ivy_install_path}/ivy-${ivy.bootstrap.version}.jar" property="ivy.bootstrap1.success" /> <available file="${ivy_install_path}/ivy-${ivy.bootstrap.version}.jar" property="ivy.bootstrap1.success" />
</target> </target>
<target name="ivy-bootstrap2" unless="ivy.bootstrap1.success"> <target name="-ivy-bootstrap2" unless="ivy.bootstrap1.success">
<ivy-download src="${ivy_bootstrap_url2}" dest="${ivy_install_path}"/> <ivy-download src="${ivy_bootstrap_url2}" dest="${ivy_install_path}"/>
</target> </target>
<target name="ivy-checksum"> <target name="-ivy-checksum">
<checksum file="${ivy_install_path}/ivy-${ivy.bootstrap.version}.jar" <checksum file="${ivy_install_path}/ivy-${ivy.bootstrap.version}.jar"
property="${ivy_checksum_sha1}" property="${ivy_checksum_sha1}"
algorithm="SHA" algorithm="SHA"
@ -505,6 +508,14 @@
</condition> </condition>
</fail> </fail>
</target> </target>
<target name="-ivy-remove-old-versions">
<delete verbose="true" failonerror="true">
<fileset dir="${ivy_install_path}">
<filename regex="${disallowed_ivy_jars_regex}"/>
</fileset>
</delete>
</target>
<macrodef name="ivy-download"> <macrodef name="ivy-download">
<attribute name="src"/> <attribute name="src"/>
@ -948,6 +959,12 @@
<condition property="java.security.manager" value="org.apache.lucene.util.TestSecurityManager"> <condition property="java.security.manager" value="org.apache.lucene.util.TestSecurityManager">
<istrue value="${tests.useSecurityManager}"/> <istrue value="${tests.useSecurityManager}"/>
</condition> </condition>
<!-- additional arguments for Java 9+ -->
<local name="tests.runtimespecific.args"/>
<condition property="tests.runtimespecific.args" value="" else="--illegal-access=deny">
<equals arg1="${build.java.runtime}" arg2="1.8"/>
</condition>
<!-- create a fileset pattern that matches ${tests.class}. --> <!-- create a fileset pattern that matches ${tests.class}. -->
<loadresource property="tests.explicitclass" quiet="true"> <loadresource property="tests.explicitclass" quiet="true">
@ -1029,6 +1046,7 @@
<jvmarg line="${tests.clover.args}"/> <jvmarg line="${tests.clover.args}"/>
<jvmarg line="@{additional.vm.args}"/> <jvmarg line="@{additional.vm.args}"/>
<jvmarg line="${tests.asserts.args}"/> <jvmarg line="${tests.asserts.args}"/>
<jvmarg line="${tests.runtimespecific.args}"/>
<!-- set the number of times tests should run --> <!-- set the number of times tests should run -->
<sysproperty key="tests.iters" value="${tests.iters}"/> <sysproperty key="tests.iters" value="${tests.iters}"/>

View File

@ -180,8 +180,9 @@ public final class BlockTreeTermsReader extends FieldsProducer {
if (fieldInfo == null) { if (fieldInfo == null) {
throw new CorruptIndexException("invalid field number: " + field, termsIn); throw new CorruptIndexException("invalid field number: " + field, termsIn);
} }
final long sumTotalTermFreq = fieldInfo.getIndexOptions() == IndexOptions.DOCS ? -1 : termsIn.readVLong(); final long sumTotalTermFreq = termsIn.readVLong();
final long sumDocFreq = termsIn.readVLong(); // when frequencies are omitted, sumDocFreq=sumTotalTermFreq and only one value is written.
final long sumDocFreq = fieldInfo.getIndexOptions() == IndexOptions.DOCS ? sumTotalTermFreq : termsIn.readVLong();
final int docCount = termsIn.readVInt(); final int docCount = termsIn.readVInt();
final int longsSize = termsIn.readVInt(); final int longsSize = termsIn.readVInt();
if (longsSize < 0) { if (longsSize < 0) {
@ -195,7 +196,7 @@ public final class BlockTreeTermsReader extends FieldsProducer {
if (sumDocFreq < docCount) { // #postings must be >= #docs with field if (sumDocFreq < docCount) { // #postings must be >= #docs with field
throw new CorruptIndexException("invalid sumDocFreq: " + sumDocFreq + " docCount: " + docCount, termsIn); throw new CorruptIndexException("invalid sumDocFreq: " + sumDocFreq + " docCount: " + docCount, termsIn);
} }
if (sumTotalTermFreq != -1 && sumTotalTermFreq < sumDocFreq) { // #positions must be >= #postings if (sumTotalTermFreq < sumDocFreq) { // #positions must be >= #postings
throw new CorruptIndexException("invalid sumTotalTermFreq: " + sumTotalTermFreq + " sumDocFreq: " + sumDocFreq, termsIn); throw new CorruptIndexException("invalid sumTotalTermFreq: " + sumTotalTermFreq + " sumDocFreq: " + sumDocFreq, termsIn);
} }
final long indexStartFP = indexIn.readVLong(); final long indexStartFP = indexIn.readVLong();

View File

@ -288,7 +288,9 @@ final class IntersectTermsEnumFrame {
// stats // stats
termState.docFreq = statsReader.readVInt(); termState.docFreq = statsReader.readVInt();
if (ite.fr.fieldInfo.getIndexOptions() != IndexOptions.DOCS) { if (ite.fr.fieldInfo.getIndexOptions() == IndexOptions.DOCS) {
termState.totalTermFreq = termState.docFreq; // all postings have freq=1
} else {
termState.totalTermFreq = termState.docFreq + statsReader.readVLong(); termState.totalTermFreq = termState.docFreq + statsReader.readVLong();
} }
// metadata // metadata

View File

@ -417,7 +417,9 @@ final class SegmentTermsEnumFrame {
// stats // stats
state.docFreq = statsReader.readVInt(); state.docFreq = statsReader.readVInt();
//if (DEBUG) System.out.println(" dF=" + state.docFreq); //if (DEBUG) System.out.println(" dF=" + state.docFreq);
if (ste.fr.fieldInfo.getIndexOptions() != IndexOptions.DOCS) { if (ste.fr.fieldInfo.getIndexOptions() == IndexOptions.DOCS) {
state.totalTermFreq = state.docFreq; // all postings have freq=1
} else {
state.totalTermFreq = state.docFreq + statsReader.readVLong(); state.totalTermFreq = state.docFreq + statsReader.readVLong();
//if (DEBUG) System.out.println(" totTF=" + state.totalTermFreq); //if (DEBUG) System.out.println(" totTF=" + state.totalTermFreq);
} }

View File

@ -745,6 +745,7 @@ public final class CompressingTermVectorsReader extends TermVectorsReader implem
private static class TVTerms extends Terms { private static class TVTerms extends Terms {
private final int numTerms, flags; private final int numTerms, flags;
private final long totalTermFreq;
private final int[] prefixLengths, suffixLengths, termFreqs, positionIndex, positions, startOffsets, lengths, payloadIndex; private final int[] prefixLengths, suffixLengths, termFreqs, positionIndex, positions, startOffsets, lengths, payloadIndex;
private final BytesRef termBytes, payloadBytes; private final BytesRef termBytes, payloadBytes;
@ -764,6 +765,11 @@ public final class CompressingTermVectorsReader extends TermVectorsReader implem
this.payloadIndex = payloadIndex; this.payloadIndex = payloadIndex;
this.payloadBytes = payloadBytes; this.payloadBytes = payloadBytes;
this.termBytes = termBytes; this.termBytes = termBytes;
long ttf = 0;
for (int tf : termFreqs) {
ttf += tf;
}
this.totalTermFreq = ttf;
} }
@Override @Override
@ -782,7 +788,7 @@ public final class CompressingTermVectorsReader extends TermVectorsReader implem
@Override @Override
public long getSumTotalTermFreq() throws IOException { public long getSumTotalTermFreq() throws IOException {
return -1L; return totalTermFreq;
} }
@Override @Override

View File

@ -21,6 +21,7 @@ import java.util.Arrays;
import java.util.Objects; import java.util.Objects;
import org.apache.lucene.index.FieldInfo; import org.apache.lucene.index.FieldInfo;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.LeafReader; import org.apache.lucene.index.LeafReader;
import org.apache.lucene.index.LeafReaderContext; import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.index.PointValues; import org.apache.lucene.index.PointValues;
@ -356,6 +357,11 @@ abstract class RangeFieldQuery extends Query {
} }
return scorerSupplier.get(Long.MAX_VALUE); return scorerSupplier.get(Long.MAX_VALUE);
} }
@Override
public IndexReader.CacheHelper getCacheHelper(LeafReaderContext context) {
return context.reader().getCoreCacheHelper();
}
}; };
} }

View File

@ -138,6 +138,11 @@ abstract class SortedNumericDocValuesRangeQuery extends Query {
} }
return new ConstantScoreScorer(this, score(), iterator); return new ConstantScoreScorer(this, score(), iterator);
} }
@Override
public IndexReader.CacheHelper getCacheHelper(LeafReaderContext context) {
return getDocValuesCacheHelper(field, context);
}
}; };
} }

View File

@ -181,6 +181,11 @@ abstract class SortedSetDocValuesRangeQuery extends Query {
} }
return new ConstantScoreScorer(this, score(), iterator); return new ConstantScoreScorer(this, score(), iterator);
} }
@Override
public IndexReader.CacheHelper getCacheHelper(LeafReaderContext context) {
return getDocValuesCacheHelper(field, context);
}
}; };
} }

View File

@ -123,7 +123,10 @@ public abstract class BaseCompositeReader<R extends IndexReader> extends Composi
ensureOpen(); ensureOpen();
int total = 0; // sum freqs in subreaders int total = 0; // sum freqs in subreaders
for (int i = 0; i < subReaders.length; i++) { for (int i = 0; i < subReaders.length; i++) {
total += subReaders[i].docFreq(term); int sub = subReaders[i].docFreq(term);
assert sub >= 0;
assert sub <= subReaders[i].getDocCount(term.field());
total += sub;
} }
return total; return total;
} }
@ -134,9 +137,8 @@ public abstract class BaseCompositeReader<R extends IndexReader> extends Composi
long total = 0; // sum freqs in subreaders long total = 0; // sum freqs in subreaders
for (int i = 0; i < subReaders.length; i++) { for (int i = 0; i < subReaders.length; i++) {
long sub = subReaders[i].totalTermFreq(term); long sub = subReaders[i].totalTermFreq(term);
if (sub == -1) { assert sub >= 0;
return -1; assert sub <= subReaders[i].getSumTotalTermFreq(term.field());
}
total += sub; total += sub;
} }
return total; return total;
@ -148,9 +150,8 @@ public abstract class BaseCompositeReader<R extends IndexReader> extends Composi
long total = 0; // sum doc freqs in subreaders long total = 0; // sum doc freqs in subreaders
for (R reader : subReaders) { for (R reader : subReaders) {
long sub = reader.getSumDocFreq(field); long sub = reader.getSumDocFreq(field);
if (sub == -1) { assert sub >= 0;
return -1; // if any of the subs doesn't support it, return -1 assert sub <= reader.getSumTotalTermFreq(field);
}
total += sub; total += sub;
} }
return total; return total;
@ -162,9 +163,8 @@ public abstract class BaseCompositeReader<R extends IndexReader> extends Composi
int total = 0; // sum doc counts in subreaders int total = 0; // sum doc counts in subreaders
for (R reader : subReaders) { for (R reader : subReaders) {
int sub = reader.getDocCount(field); int sub = reader.getDocCount(field);
if (sub == -1) { assert sub >= 0;
return -1; // if any of the subs doesn't support it, return -1 assert sub <= reader.maxDoc();
}
total += sub; total += sub;
} }
return total; return total;
@ -176,9 +176,8 @@ public abstract class BaseCompositeReader<R extends IndexReader> extends Composi
long total = 0; // sum doc total term freqs in subreaders long total = 0; // sum doc total term freqs in subreaders
for (R reader : subReaders) { for (R reader : subReaders) {
long sub = reader.getSumTotalTermFreq(field); long sub = reader.getSumTotalTermFreq(field);
if (sub == -1) { assert sub >= 0;
return -1; // if any of the subs doesn't support it, return -1 assert sub >= reader.getSumDocFreq(field);
}
total += sub; total += sub;
} }
return total; return total;

View File

@ -1253,6 +1253,10 @@ public final class CheckIndex implements Closeable {
continue; continue;
} }
if (terms.getDocCount() > maxDoc) {
throw new RuntimeException("docCount > maxDoc for field: " + field + ", docCount=" + terms.getDocCount() + ", maxDoc=" + maxDoc);
}
final boolean hasFreqs = terms.hasFreqs(); final boolean hasFreqs = terms.hasFreqs();
final boolean hasPositions = terms.hasPositions(); final boolean hasPositions = terms.hasPositions();
final boolean hasPayloads = terms.hasPayloads(); final boolean hasPayloads = terms.hasPayloads();
@ -1295,12 +1299,6 @@ public final class CheckIndex implements Closeable {
throw new RuntimeException("field \"" + field + "\" should have hasFreqs=" + expectedHasFreqs + " but got " + hasFreqs); throw new RuntimeException("field \"" + field + "\" should have hasFreqs=" + expectedHasFreqs + " but got " + hasFreqs);
} }
if (hasFreqs == false) {
if (terms.getSumTotalTermFreq() != -1) {
throw new RuntimeException("field \"" + field + "\" hasFreqs is false, but Terms.getSumTotalTermFreq()=" + terms.getSumTotalTermFreq() + " (should be -1)");
}
}
if (!isVectors) { if (!isVectors) {
final boolean expectedHasPositions = fieldInfo.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) >= 0; final boolean expectedHasPositions = fieldInfo.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) >= 0;
if (hasPositions != expectedHasPositions) { if (hasPositions != expectedHasPositions) {
@ -1375,8 +1373,8 @@ public final class CheckIndex implements Closeable {
postings = termsEnum.postings(postings, PostingsEnum.ALL); postings = termsEnum.postings(postings, PostingsEnum.ALL);
if (hasFreqs == false) { if (hasFreqs == false) {
if (termsEnum.totalTermFreq() != -1) { if (termsEnum.totalTermFreq() != termsEnum.docFreq()) {
throw new RuntimeException("field \"" + field + "\" hasFreqs is false, but TermsEnum.totalTermFreq()=" + termsEnum.totalTermFreq() + " (should be -1)"); throw new RuntimeException("field \"" + field + "\" hasFreqs is false, but TermsEnum.totalTermFreq()=" + termsEnum.totalTermFreq() + " (should be " + termsEnum.docFreq() + ")");
} }
} }
@ -1406,14 +1404,11 @@ public final class CheckIndex implements Closeable {
break; break;
} }
visitedDocs.set(doc); visitedDocs.set(doc);
int freq = -1; int freq = postings.freq();
if (hasFreqs) { if (freq <= 0) {
freq = postings.freq(); throw new RuntimeException("term " + term + ": doc " + doc + ": freq " + freq + " is out of bounds");
if (freq <= 0) { }
throw new RuntimeException("term " + term + ": doc " + doc + ": freq " + freq + " is out of bounds"); if (hasFreqs == false) {
}
totalTermFreq += freq;
} else {
// When a field didn't index freq, it must // When a field didn't index freq, it must
// consistently "lie" and pretend that freq was // consistently "lie" and pretend that freq was
// 1: // 1:
@ -1421,6 +1416,8 @@ public final class CheckIndex implements Closeable {
throw new RuntimeException("term " + term + ": doc " + doc + ": freq " + freq + " != 1 when Terms.hasFreqs() is false"); throw new RuntimeException("term " + term + ": doc " + doc + ": freq " + freq + " != 1 when Terms.hasFreqs() is false");
} }
} }
totalTermFreq += freq;
if (liveDocs == null || liveDocs.get(doc)) { if (liveDocs == null || liveDocs.get(doc)) {
hasNonDeletedDocs = true; hasNonDeletedDocs = true;
status.totFreq++; status.totFreq++;
@ -1490,19 +1487,25 @@ public final class CheckIndex implements Closeable {
} }
final long totalTermFreq2 = termsEnum.totalTermFreq(); final long totalTermFreq2 = termsEnum.totalTermFreq();
final boolean hasTotalTermFreq = hasFreqs && totalTermFreq2 != -1;
if (docCount != docFreq) { if (docCount != docFreq) {
throw new RuntimeException("term " + term + " docFreq=" + docFreq + " != tot docs w/o deletions " + docCount); throw new RuntimeException("term " + term + " docFreq=" + docFreq + " != tot docs w/o deletions " + docCount);
} }
if (hasTotalTermFreq) { if (docFreq > terms.getDocCount()) {
if (totalTermFreq2 <= 0) { throw new RuntimeException("term " + term + " docFreq=" + docFreq + " > docCount=" + terms.getDocCount());
throw new RuntimeException("totalTermFreq: " + totalTermFreq2 + " is out of bounds"); }
} if (totalTermFreq2 <= 0) {
sumTotalTermFreq += totalTermFreq; throw new RuntimeException("totalTermFreq: " + totalTermFreq2 + " is out of bounds");
if (totalTermFreq != totalTermFreq2) { }
throw new RuntimeException("term " + term + " totalTermFreq=" + totalTermFreq2 + " != recomputed totalTermFreq=" + totalTermFreq); sumTotalTermFreq += totalTermFreq;
} if (totalTermFreq != totalTermFreq2) {
throw new RuntimeException("term " + term + " totalTermFreq=" + totalTermFreq2 + " != recomputed totalTermFreq=" + totalTermFreq);
}
if (totalTermFreq2 < docFreq) {
throw new RuntimeException("totalTermFreq: " + totalTermFreq2 + " is out of bounds, docFreq=" + docFreq);
}
if (hasFreqs == false && totalTermFreq != docFreq) {
throw new RuntimeException("term " + term + " totalTermFreq=" + totalTermFreq + " != docFreq=" + docFreq);
} }
// Test skipping // Test skipping
@ -1626,22 +1629,22 @@ public final class CheckIndex implements Closeable {
} }
status.blockTreeStats.put(field, stats); status.blockTreeStats.put(field, stats);
if (sumTotalTermFreq != 0) { final long actualSumDocFreq = fields.terms(field).getSumDocFreq();
final long v = fields.terms(field).getSumTotalTermFreq(); if (sumDocFreq != actualSumDocFreq) {
if (v != -1 && sumTotalTermFreq != v) { throw new RuntimeException("sumDocFreq for field " + field + "=" + actualSumDocFreq + " != recomputed sumDocFreq=" + sumDocFreq);
throw new RuntimeException("sumTotalTermFreq for field " + field + "=" + v + " != recomputed sumTotalTermFreq=" + sumTotalTermFreq);
}
} }
final long actualSumTotalTermFreq = fields.terms(field).getSumTotalTermFreq();
if (sumTotalTermFreq != actualSumTotalTermFreq) {
throw new RuntimeException("sumTotalTermFreq for field " + field + "=" + actualSumTotalTermFreq + " != recomputed sumTotalTermFreq=" + sumTotalTermFreq);
}
if (sumDocFreq != 0) { if (hasFreqs == false && sumTotalTermFreq != sumDocFreq) {
final long v = fields.terms(field).getSumDocFreq(); throw new RuntimeException("sumTotalTermFreq for field " + field + " should be " + sumDocFreq + ", got sumTotalTermFreq=" + sumTotalTermFreq);
if (v != -1 && sumDocFreq != v) {
throw new RuntimeException("sumDocFreq for field " + field + "=" + v + " != recomputed sumDocFreq=" + sumDocFreq);
}
} }
final int v = fieldTerms.getDocCount(); final int v = fieldTerms.getDocCount();
if (v != -1 && visitedDocs.cardinality() != v) { if (visitedDocs.cardinality() != v) {
throw new RuntimeException("docCount for field " + field + "=" + v + " != recomputed docCount=" + visitedDocs.cardinality()); throw new RuntimeException("docCount for field " + field + "=" + v + " != recomputed docCount=" + visitedDocs.cardinality());
} }

View File

@ -450,25 +450,25 @@ public abstract class IndexReader implements Closeable {
/** /**
* Returns the total number of occurrences of {@code term} across all * Returns the total number of occurrences of {@code term} across all
* documents (the sum of the freq() for each doc that has this term). This * documents (the sum of the freq() for each doc that has this term).
* will be -1 if the codec doesn't support this measure. Note that, like other * Note that, like other term measures, this measure does not take
* term measures, this measure does not take deleted documents into account. * deleted documents into account.
*/ */
public abstract long totalTermFreq(Term term) throws IOException; public abstract long totalTermFreq(Term term) throws IOException;
/** /**
* Returns the sum of {@link TermsEnum#docFreq()} for all terms in this field, * Returns the sum of {@link TermsEnum#docFreq()} for all terms in this field.
* or -1 if this measure isn't stored by the codec. Note that, just like other * Note that, just like other term measures, this measure does not take deleted
* term measures, this measure does not take deleted documents into account. * documents into account.
* *
* @see Terms#getSumDocFreq() * @see Terms#getSumDocFreq()
*/ */
public abstract long getSumDocFreq(String field) throws IOException; public abstract long getSumDocFreq(String field) throws IOException;
/** /**
* Returns the number of documents that have at least one term for this field, * Returns the number of documents that have at least one term for this field.
* or -1 if this measure isn't stored by the codec. Note that, just like other * Note that, just like other term measures, this measure does not take deleted
* term measures, this measure does not take deleted documents into account. * documents into account.
* *
* @see Terms#getDocCount() * @see Terms#getDocCount()
*/ */
@ -476,9 +476,8 @@ public abstract class IndexReader implements Closeable {
/** /**
* Returns the sum of {@link TermsEnum#totalTermFreq} for all terms in this * Returns the sum of {@link TermsEnum#totalTermFreq} for all terms in this
* field, or -1 if this measure isn't stored by the codec (or if this fields * field. Note that, just like other term measures, this measure does not take
* omits term freq and positions). Note that, just like other term measures, * deleted documents into account.
* this measure does not take deleted documents into account.
* *
* @see Terms#getSumTotalTermFreq() * @see Terms#getSumTotalTermFreq()
*/ */

View File

@ -149,9 +149,7 @@ public final class MultiTerms extends Terms {
long sum = 0; long sum = 0;
for(Terms terms : subs) { for(Terms terms : subs) {
final long v = terms.getSumTotalTermFreq(); final long v = terms.getSumTotalTermFreq();
if (v == -1) { assert v != -1;
return -1;
}
sum += v; sum += v;
} }
return sum; return sum;
@ -162,9 +160,7 @@ public final class MultiTerms extends Terms {
long sum = 0; long sum = 0;
for(Terms terms : subs) { for(Terms terms : subs) {
final long v = terms.getSumDocFreq(); final long v = terms.getSumDocFreq();
if (v == -1) { assert v != -1;
return -1;
}
sum += v; sum += v;
} }
return sum; return sum;
@ -175,9 +171,7 @@ public final class MultiTerms extends Terms {
int sum = 0; int sum = 0;
for(Terms terms : subs) { for(Terms terms : subs) {
final int v = terms.getDocCount(); final int v = terms.getDocCount();
if (v == -1) { assert v != -1;
return -1;
}
sum += v; sum += v;
} }
return sum; return sum;

View File

@ -326,9 +326,7 @@ public final class MultiTermsEnum extends TermsEnum {
long sum = 0; long sum = 0;
for(int i=0;i<numTop;i++) { for(int i=0;i<numTop;i++) {
final long v = top[i].terms.totalTermFreq(); final long v = top[i].terms.totalTermFreq();
if (v == -1) { assert v != -1;
return v;
}
sum += v; sum += v;
} }
return sum; return sum;

View File

@ -101,7 +101,7 @@ class SortedDocValuesTermsEnum extends TermsEnum {
@Override @Override
public long totalTermFreq() { public long totalTermFreq() {
return -1; throw new UnsupportedOperationException();
} }
@Override @Override

View File

@ -101,7 +101,7 @@ class SortedSetDocValuesTermsEnum extends TermsEnum {
@Override @Override
public long totalTermFreq() { public long totalTermFreq() {
return -1; throw new UnsupportedOperationException();
} }
@Override @Override

View File

@ -141,11 +141,11 @@ public final class TermContext {
/** Expert: Accumulate term statistics. */ /** Expert: Accumulate term statistics. */
public void accumulateStatistics(final int docFreq, final long totalTermFreq) { public void accumulateStatistics(final int docFreq, final long totalTermFreq) {
assert docFreq >= 0;
assert totalTermFreq >= 0;
assert docFreq <= totalTermFreq;
this.docFreq += docFreq; this.docFreq += docFreq;
if (this.totalTermFreq >= 0 && totalTermFreq >= 0) this.totalTermFreq += totalTermFreq;
this.totalTermFreq += totalTermFreq;
else
this.totalTermFreq = -1;
} }
/** /**

View File

@ -99,25 +99,21 @@ public abstract class Terms {
* other term measures, this measure does not take deleted * other term measures, this measure does not take deleted
* documents into account. */ * documents into account. */
public abstract long size() throws IOException; public abstract long size() throws IOException;
/** Returns the sum of {@link TermsEnum#totalTermFreq} for /** Returns the sum of {@link TermsEnum#totalTermFreq} for
* all terms in this field, or -1 if this measure isn't * all terms in this field. Note that, just like other term
* stored by the codec (or if this fields omits term freq
* and positions). Note that, just like other term
* measures, this measure does not take deleted documents * measures, this measure does not take deleted documents
* into account. */ * into account. */
public abstract long getSumTotalTermFreq() throws IOException; public abstract long getSumTotalTermFreq() throws IOException;
/** Returns the sum of {@link TermsEnum#docFreq()} for /** Returns the sum of {@link TermsEnum#docFreq()} for
* all terms in this field, or -1 if this measure isn't * all terms in this field. Note that, just like other term
* stored by the codec. Note that, just like other term
* measures, this measure does not take deleted documents * measures, this measure does not take deleted documents
* into account. */ * into account. */
public abstract long getSumDocFreq() throws IOException; public abstract long getSumDocFreq() throws IOException;
/** Returns the number of documents that have at least one /** Returns the number of documents that have at least one
* term for this field, or -1 if this measure isn't * term for this field. Note that, just like other term
* stored by the codec. Note that, just like other term
* measures, this measure does not take deleted documents * measures, this measure does not take deleted documents
* into account. */ * into account. */
public abstract int getDocCount() throws IOException; public abstract int getDocCount() throws IOException;

View File

@ -131,8 +131,7 @@ public abstract class TermsEnum implements BytesRefIterator {
/** Returns the total number of occurrences of this term /** Returns the total number of occurrences of this term
* across all documents (the sum of the freq() for each * across all documents (the sum of the freq() for each
* doc that has this term). This will be -1 if the * doc that has this term). Note that, like
* codec doesn't support this measure. Note that, like
* other term measures, this measure does not take * other term measures, this measure does not take
* deleted documents into account. */ * deleted documents into account. */
public abstract long totalTermFreq() throws IOException; public abstract long totalTermFreq() throws IOException;

View File

@ -148,12 +148,8 @@
* deleted documents, when segments are merged the statistic is updated as * deleted documents, when segments are merged the statistic is updated as
* those deleted documents are merged away. * those deleted documents are merged away.
* <li>{@link org.apache.lucene.index.TermsEnum#totalTermFreq}: Returns the number * <li>{@link org.apache.lucene.index.TermsEnum#totalTermFreq}: Returns the number
* of occurrences of this term across all documents. Note that this statistic * of occurrences of this term across all documents. Like docFreq(), it will
* is unavailable (returns <code>-1</code>) if term frequencies were omitted * also count occurrences that appear in deleted documents.
* from the index
* ({@link org.apache.lucene.index.IndexOptions#DOCS DOCS})
* for the field. Like docFreq(), it will also count occurrences that appear in
* deleted documents.
* </ul> * </ul>
* <a name="fieldstats"></a> * <a name="fieldstats"></a>
* <h3> * <h3>
@ -180,10 +176,7 @@
* of tokens for the field. This can be thought of as the sum of * of tokens for the field. This can be thought of as the sum of
* {@link org.apache.lucene.index.TermsEnum#totalTermFreq} across all terms in the * {@link org.apache.lucene.index.TermsEnum#totalTermFreq} across all terms in the
* field, and like totalTermFreq() it will also count occurrences that appear in * field, and like totalTermFreq() it will also count occurrences that appear in
* deleted documents, and will be unavailable (returns <code>-1</code>) if term * deleted documents.
* frequencies were omitted from the index
* ({@link org.apache.lucene.index.IndexOptions#DOCS DOCS})
* for the field.
* </ul> * </ul>
* <a name="segmentstats"></a> * <a name="segmentstats"></a>
* <h3> * <h3>

View File

@ -277,11 +277,7 @@ public final class BlendedTermQuery extends Query {
long ttf = 0; long ttf = 0;
for (TermContext ctx : contexts) { for (TermContext ctx : contexts) {
df = Math.max(df, ctx.docFreq()); df = Math.max(df, ctx.docFreq());
if (ctx.totalTermFreq() == -1L) { ttf += ctx.totalTermFreq();
ttf = -1L;
} else if (ttf != -1L) {
ttf += ctx.totalTermFreq();
}
} }
for (int i = 0; i < contexts.length; ++i) { for (int i = 0; i < contexts.length; ++i) {

View File

@ -26,6 +26,7 @@ import java.util.List;
import java.util.Map; import java.util.Map;
import java.util.Set; import java.util.Set;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.LeafReaderContext; import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.index.Term; import org.apache.lucene.index.Term;
import org.apache.lucene.search.BooleanClause.Occur; import org.apache.lucene.search.BooleanClause.Occur;
@ -299,6 +300,11 @@ final class BooleanWeight extends Weight {
return scorerSupplier.get(Long.MAX_VALUE); return scorerSupplier.get(Long.MAX_VALUE);
} }
@Override
public IndexReader.CacheHelper getCacheHelper(LeafReaderContext context) {
return getCacheHelper(context, weights);
}
@Override @Override
public ScorerSupplier scorerSupplier(LeafReaderContext context) throws IOException { public ScorerSupplier scorerSupplier(LeafReaderContext context) throws IOException {
int minShouldMatch = query.getMinimumNumberShouldMatch(); int minShouldMatch = query.getMinimumNumberShouldMatch();

View File

@ -23,7 +23,27 @@ import org.apache.lucene.index.Terms; // javadocs
/** /**
* Contains statistics for a collection (field) * Contains statistics for a collection (field).
* <p>
* This class holds statistics across all documents for scoring purposes:
* <ul>
* <li> {@link #maxDoc()}: number of documents.
* <li> {@link #docCount()}: number of documents that contain this field.
* <li> {@link #sumDocFreq()}: number of postings-list entries.
* <li> {@link #sumTotalTermFreq()}: number of tokens.
* </ul>
* <p>
* The following conditions are always true:
* <ul>
* <li> All statistics are positive integers: never zero or negative.
* <li> {@code docCount} &lt;= {@code maxDoc}
* <li> {@code docCount} &lt;= {@code sumDocFreq} &lt;= {@code sumTotalTermFreq}
* </ul>
* <p>
* Values may include statistics on deleted documents that have not yet been merged away.
* <p>
* Be careful when performing calculations on these values because they are represented
* as 64-bit integer values, you may need to cast to {@code double} for your use.
* @lucene.experimental * @lucene.experimental
*/ */
public class CollectionStatistics { public class CollectionStatistics {
@ -51,33 +71,23 @@ public class CollectionStatistics {
if (maxDoc <= 0) { if (maxDoc <= 0) {
throw new IllegalArgumentException("maxDoc must be positive, maxDoc: " + maxDoc); throw new IllegalArgumentException("maxDoc must be positive, maxDoc: " + maxDoc);
} }
if (docCount != -1) { if (docCount <= 0) {
if (docCount <= 0) { throw new IllegalArgumentException("docCount must be positive, docCount: " + docCount);
throw new IllegalArgumentException("docCount must be positive, docCount: " + docCount);
}
if (docCount > maxDoc) {
throw new IllegalArgumentException("docCount must not exceed maxDoc, docCount: " + docCount + ", maxDoc: " + maxDoc);
}
} }
if (sumDocFreq != -1) { if (docCount > maxDoc) {
if (sumDocFreq <= 0) { throw new IllegalArgumentException("docCount must not exceed maxDoc, docCount: " + docCount + ", maxDoc: " + maxDoc);
throw new IllegalArgumentException("sumDocFreq must be positive, sumDocFreq: " + sumDocFreq);
}
if (docCount != -1) {
if (sumDocFreq < docCount) {
throw new IllegalArgumentException("sumDocFreq must be at least docCount, sumDocFreq: " + sumDocFreq + ", docCount: " + docCount);
}
}
} }
if (sumTotalTermFreq != -1) { if (sumDocFreq <= 0) {
if (sumTotalTermFreq <= 0) { throw new IllegalArgumentException("sumDocFreq must be positive, sumDocFreq: " + sumDocFreq);
throw new IllegalArgumentException("sumTotalTermFreq must be positive, sumTotalTermFreq: " + sumTotalTermFreq); }
} if (sumDocFreq < docCount) {
if (sumDocFreq != -1) { throw new IllegalArgumentException("sumDocFreq must be at least docCount, sumDocFreq: " + sumDocFreq + ", docCount: " + docCount);
if (sumTotalTermFreq < sumDocFreq) { }
throw new IllegalArgumentException("sumTotalTermFreq must be at least sumDocFreq, sumTotalTermFreq: " + sumTotalTermFreq + ", sumDocFreq: " + sumDocFreq); if (sumTotalTermFreq <= 0) {
} throw new IllegalArgumentException("sumTotalTermFreq must be positive, sumTotalTermFreq: " + sumTotalTermFreq);
} }
if (sumTotalTermFreq < sumDocFreq) {
throw new IllegalArgumentException("sumTotalTermFreq must be at least sumDocFreq, sumTotalTermFreq: " + sumTotalTermFreq + ", sumDocFreq: " + sumDocFreq);
} }
this.field = field; this.field = field;
this.maxDoc = maxDoc; this.maxDoc = maxDoc;
@ -86,33 +96,65 @@ public class CollectionStatistics {
this.sumDocFreq = sumDocFreq; this.sumDocFreq = sumDocFreq;
} }
/** returns the field name */ /**
* The field's name.
* <p>
* This value is never {@code null}.
* @return field's name, not {@code null}
*/
public final String field() { public final String field() {
return field; return field;
} }
/** returns the total number of documents, regardless of /**
* whether they all contain values for this field. * The total number of documents, regardless of
* @see IndexReader#maxDoc() */ * whether they all contain values for this field.
* <p>
* This value is always a positive number.
* @return total number of documents, in the range [1 .. {@link Long#MAX_VALUE}]
* @see IndexReader#maxDoc()
*/
public final long maxDoc() { public final long maxDoc() {
return maxDoc; return maxDoc;
} }
/** returns the total number of documents that /**
* have at least one term for this field. * The total number of documents that have at least
* @see Terms#getDocCount() */ * one term for this field.
* <p>
* This value is always a positive number, and never
* exceeds {@link #maxDoc()}.
* @return total number of documents containing this field, in the range [1 .. {@link #maxDoc()}]
* @see Terms#getDocCount()
*/
public final long docCount() { public final long docCount() {
return docCount; return docCount;
} }
/** returns the total number of tokens for this field /**
* @see Terms#getSumTotalTermFreq() */ * The total number of tokens for this field.
* This is the "word count" for this field across all documents.
* It is the sum of {@link TermStatistics#totalTermFreq()} across all terms.
* It is also the sum of each document's field length across all documents.
* <p>
* This value is always a positive number, and always at least {@link #sumDocFreq()}.
* @return total number of tokens in the field, in the range [{@link #sumDocFreq()} .. {@link Long#MAX_VALUE}]
* @see Terms#getSumTotalTermFreq()
*/
public final long sumTotalTermFreq() { public final long sumTotalTermFreq() {
return sumTotalTermFreq; return sumTotalTermFreq;
} }
/** returns the total number of postings for this field /**
* @see Terms#getSumDocFreq() */ * The total number of posting list entries for this field.
* This is the sum of term-document pairs: the sum of {@link TermStatistics#docFreq()} across all terms.
* It is also the sum of each document's unique term count for this field across all documents.
* <p>
* This value is always a positive number, always at least {@link #docCount()}, and never
* exceeds {@link #sumTotalTermFreq()}.
* @return number of posting list entries, in the range [{@link #docCount()} .. {@link #sumTotalTermFreq()}]
* @see Terms#getSumDocFreq()
*/
public final long sumDocFreq() { public final long sumDocFreq() {
return sumDocFreq; return sumDocFreq;
} }

View File

@ -167,6 +167,11 @@ public final class ConstantScoreQuery extends Query {
return scorerSupplier.get(Long.MAX_VALUE); return scorerSupplier.get(Long.MAX_VALUE);
} }
@Override
public IndexReader.CacheHelper getCacheHelper(LeafReaderContext context) {
return innerWeight.getCacheHelper(context);
}
}; };
} else { } else {
return innerWeight; return innerWeight;

View File

@ -137,6 +137,11 @@ public final class DisjunctionMaxQuery extends Query implements Iterable<Query>
} }
} }
@Override
public IndexReader.CacheHelper getCacheHelper(LeafReaderContext context) {
return getCacheHelper(context, weights);
}
/** Explain the score we computed for doc */ /** Explain the score we computed for doc */
@Override @Override
public Explanation explain(LeafReaderContext context, int doc) throws IOException { public Explanation explain(LeafReaderContext context, int doc) throws IOException {

View File

@ -23,6 +23,7 @@ import java.util.Objects;
import org.apache.lucene.index.DocValuesType; import org.apache.lucene.index.DocValuesType;
import org.apache.lucene.index.FieldInfo; import org.apache.lucene.index.FieldInfo;
import org.apache.lucene.index.FieldInfos; import org.apache.lucene.index.FieldInfos;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.LeafReader; import org.apache.lucene.index.LeafReader;
import org.apache.lucene.index.LeafReaderContext; import org.apache.lucene.index.LeafReaderContext;
@ -97,6 +98,11 @@ public final class DocValuesFieldExistsQuery extends Query {
return new ConstantScoreScorer(this, score(), iterator); return new ConstantScoreScorer(this, score(), iterator);
} }
@Override
public IndexReader.CacheHelper getCacheHelper(LeafReaderContext context) {
return getDocValuesCacheHelper(field, context);
}
}; };
} }
} }

View File

@ -86,17 +86,17 @@ public final class DocValuesRewriteMethod extends MultiTermQuery.RewriteMethod {
@Override @Override
public long getSumTotalTermFreq() { public long getSumTotalTermFreq() {
return -1; throw new UnsupportedOperationException();
} }
@Override @Override
public long getSumDocFreq() { public long getSumDocFreq() {
return -1; throw new UnsupportedOperationException();
} }
@Override @Override
public int getDocCount() { public int getDocCount() {
return -1; throw new UnsupportedOperationException();
} }
@Override @Override
@ -158,6 +158,11 @@ public final class DocValuesRewriteMethod extends MultiTermQuery.RewriteMethod {
} }
}); });
} }
@Override
public IndexReader.CacheHelper getCacheHelper(LeafReaderContext context) {
return getDocValuesCacheHelper(query.field, context);
}
}; };
} }
} }

View File

@ -19,6 +19,7 @@ package org.apache.lucene.search;
import java.io.IOException; import java.io.IOException;
import java.util.Set; import java.util.Set;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.LeafReaderContext; import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.index.Term; import org.apache.lucene.index.Term;
@ -55,6 +56,11 @@ public abstract class FilterWeight extends Weight {
this.in = weight; this.in = weight;
} }
@Override
public IndexReader.CacheHelper getCacheHelper(LeafReaderContext context) {
return in.getCacheHelper(context);
}
@Override @Override
public void extractTerms(Set<Term> terms) { public void extractTerms(Set<Term> terms) {
in.extractTerms(terms); in.extractTerms(terms);

View File

@ -169,6 +169,13 @@ public final class IndexOrDocValuesQuery extends Query {
} }
return scorerSupplier.get(Long.MAX_VALUE); return scorerSupplier.get(Long.MAX_VALUE);
} }
@Override
public IndexReader.CacheHelper getCacheHelper(LeafReaderContext context) {
// Both index and dv query should return the same values, so we can use
// the index query's cachehelper here
return indexWeight.getCacheHelper(context);
}
}; };
} }

View File

@ -722,8 +722,7 @@ public class LRUQueryCache implements QueryCache, Accountable {
policy.onUse(getQuery()); policy.onUse(getQuery());
} }
// TODO: should it be pluggable, eg. for queries that run on doc values? final IndexReader.CacheHelper cacheHelper = in.getCacheHelper(context);
final IndexReader.CacheHelper cacheHelper = context.reader().getCoreCacheHelper();
if (cacheHelper == null) { if (cacheHelper == null) {
// this segment is not suitable for caching // this segment is not suitable for caching
return in.scorerSupplier(context); return in.scorerSupplier(context);
@ -788,14 +787,18 @@ public class LRUQueryCache implements QueryCache, Accountable {
return scorerSupplier.get(Long.MAX_VALUE); return scorerSupplier.get(Long.MAX_VALUE);
} }
@Override
public IndexReader.CacheHelper getCacheHelper(LeafReaderContext context) {
return in.getCacheHelper(context);
}
@Override @Override
public BulkScorer bulkScorer(LeafReaderContext context) throws IOException { public BulkScorer bulkScorer(LeafReaderContext context) throws IOException {
if (used.compareAndSet(false, true)) { if (used.compareAndSet(false, true)) {
policy.onUse(getQuery()); policy.onUse(getQuery());
} }
// TODO: should it be pluggable, eg. for queries that run on doc values? final IndexReader.CacheHelper cacheHelper = in.getCacheHelper(context);
final IndexReader.CacheHelper cacheHelper = context.reader().getCoreCacheHelper();
if (cacheHelper == null) { if (cacheHelper == null) {
// this segment is not suitable for caching // this segment is not suitable for caching
return in.bulkScorer(context); return in.bulkScorer(context);

View File

@ -19,6 +19,7 @@ package org.apache.lucene.search;
import java.io.IOException; import java.io.IOException;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.LeafReaderContext; import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.util.Bits; import org.apache.lucene.util.Bits;
@ -39,6 +40,12 @@ public final class MatchAllDocsQuery extends Query {
public Scorer scorer(LeafReaderContext context) throws IOException { public Scorer scorer(LeafReaderContext context) throws IOException {
return new ConstantScoreScorer(this, score(), DocIdSetIterator.all(context.reader().maxDoc())); return new ConstantScoreScorer(this, score(), DocIdSetIterator.all(context.reader().maxDoc()));
} }
@Override
public IndexReader.CacheHelper getCacheHelper(LeafReaderContext context) {
return context.reader().getCoreCacheHelper();
}
@Override @Override
public BulkScorer bulkScorer(LeafReaderContext context) throws IOException { public BulkScorer bulkScorer(LeafReaderContext context) throws IOException {
final float score = score(); final float score = score();

View File

@ -20,6 +20,7 @@ package org.apache.lucene.search;
import java.io.IOException; import java.io.IOException;
import java.util.Set; import java.util.Set;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.LeafReaderContext; import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.index.Term; import org.apache.lucene.index.Term;
@ -58,6 +59,11 @@ public class MatchNoDocsQuery extends Query {
return null; return null;
} }
@Override
public IndexReader.CacheHelper getCacheHelper(LeafReaderContext context) {
return context.reader().getCoreCacheHelper();
}
}; };
} }

View File

@ -291,6 +291,11 @@ public class MultiPhraseQuery extends Query {
} }
} }
@Override
public IndexReader.CacheHelper getCacheHelper(LeafReaderContext context) {
return context.reader().getCoreCacheHelper();
}
@Override @Override
public Explanation explain(LeafReaderContext context, int doc) throws IOException { public Explanation explain(LeafReaderContext context, int doc) throws IOException {
Scorer scorer = scorer(context); Scorer scorer = scorer(context);

View File

@ -22,6 +22,7 @@ import java.util.ArrayList;
import java.util.List; import java.util.List;
import java.util.Objects; import java.util.Objects;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.LeafReaderContext; import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.index.PostingsEnum; import org.apache.lucene.index.PostingsEnum;
import org.apache.lucene.index.Term; import org.apache.lucene.index.Term;
@ -211,6 +212,11 @@ final class MultiTermQueryConstantScoreWrapper<Q extends MultiTermQuery> extends
return scorer(weightOrBitSet.set); return scorer(weightOrBitSet.set);
} }
} }
@Override
public IndexReader.CacheHelper getCacheHelper(LeafReaderContext context) {
return context.reader().getCoreCacheHelper();
}
}; };
} }
} }

View File

@ -23,6 +23,7 @@ import java.util.Objects;
import org.apache.lucene.document.StringField; import org.apache.lucene.document.StringField;
import org.apache.lucene.index.FieldInfo; import org.apache.lucene.index.FieldInfo;
import org.apache.lucene.index.FieldInfos; import org.apache.lucene.index.FieldInfos;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.LeafReader; import org.apache.lucene.index.LeafReader;
import org.apache.lucene.index.LeafReaderContext; import org.apache.lucene.index.LeafReaderContext;
@ -75,6 +76,11 @@ public final class NormsFieldExistsQuery extends Query {
DocIdSetIterator iterator = reader.getNormValues(field); DocIdSetIterator iterator = reader.getNormValues(field);
return new ConstantScoreScorer(this, score(), iterator); return new ConstantScoreScorer(this, score(), iterator);
} }
@Override
public IndexReader.CacheHelper getCacheHelper(LeafReaderContext context) {
return context.reader().getCoreCacheHelper();
}
}; };
} }
} }

View File

@ -441,7 +441,12 @@ public class PhraseQuery extends Query {
needsScores, totalMatchCost); needsScores, totalMatchCost);
} }
} }
@Override
public IndexReader.CacheHelper getCacheHelper(LeafReaderContext context) {
return context.reader().getCoreCacheHelper();
}
// only called from assert // only called from assert
private boolean termNotInReader(LeafReader reader, Term term) throws IOException { private boolean termNotInReader(LeafReader reader, Term term) throws IOException {
return reader.docFreq(term) == 0; return reader.docFreq(term) == 0;
@ -492,14 +497,13 @@ public class PhraseQuery extends Query {
* of processing the occurrences of a term * of processing the occurrences of a term
* in a document that contains the term. * in a document that contains the term.
* This is for use by {@link TwoPhaseIterator#matchCost} implementations. * This is for use by {@link TwoPhaseIterator#matchCost} implementations.
* <br>This may be inaccurate when {@link TermsEnum#totalTermFreq()} is not available.
* @param termsEnum The term is the term at which this TermsEnum is positioned. * @param termsEnum The term is the term at which this TermsEnum is positioned.
*/ */
static float termPositionsCost(TermsEnum termsEnum) throws IOException { static float termPositionsCost(TermsEnum termsEnum) throws IOException {
int docFreq = termsEnum.docFreq(); int docFreq = termsEnum.docFreq();
assert docFreq > 0; assert docFreq > 0;
long totalTermFreq = termsEnum.totalTermFreq(); // -1 when not available long totalTermFreq = termsEnum.totalTermFreq();
float expOccurrencesInMatchingDoc = (totalTermFreq < docFreq) ? 1 : (totalTermFreq / (float) docFreq); float expOccurrencesInMatchingDoc = totalTermFreq / (float) docFreq;
return TERM_POSNS_SEEK_OPS_PER_DOC + expOccurrencesInMatchingDoc * TERM_OPS_PER_POS; return TERM_POSNS_SEEK_OPS_PER_DOC + expOccurrencesInMatchingDoc * TERM_OPS_PER_POS;
} }

View File

@ -23,6 +23,7 @@ import java.util.Collection;
import java.util.Iterator; import java.util.Iterator;
import java.util.NoSuchElementException; import java.util.NoSuchElementException;
import org.apache.lucene.document.IntPoint; import org.apache.lucene.document.IntPoint;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.LeafReader; import org.apache.lucene.index.LeafReader;
import org.apache.lucene.index.LeafReaderContext; import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.index.PointValues.IntersectVisitor; import org.apache.lucene.index.PointValues.IntersectVisitor;
@ -150,6 +151,11 @@ public abstract class PointInSetQuery extends Query {
return new ConstantScoreScorer(this, score(), result.build().iterator()); return new ConstantScoreScorer(this, score(), result.build().iterator());
} }
@Override
public IndexReader.CacheHelper getCacheHelper(LeafReaderContext context) {
return context.reader().getCoreCacheHelper();
}
}; };
} }

View File

@ -20,6 +20,7 @@ import java.io.IOException;
import java.util.Arrays; import java.util.Arrays;
import java.util.Objects; import java.util.Objects;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.PointValues; import org.apache.lucene.index.PointValues;
import org.apache.lucene.index.PointValues.IntersectVisitor; import org.apache.lucene.index.PointValues.IntersectVisitor;
import org.apache.lucene.index.PointValues.Relation; import org.apache.lucene.index.PointValues.Relation;
@ -321,6 +322,11 @@ public abstract class PointRangeQuery extends Query {
} }
return scorerSupplier.get(Long.MAX_VALUE); return scorerSupplier.get(Long.MAX_VALUE);
} }
@Override
public IndexReader.CacheHelper getCacheHelper(LeafReaderContext context) {
return context.reader().getCoreCacheHelper();
}
}; };
} }

View File

@ -140,11 +140,7 @@ public final class SynonymQuery extends Query {
TermStatistics termStats = searcher.termStatistics(terms[i], termContexts[i]); TermStatistics termStats = searcher.termStatistics(terms[i], termContexts[i]);
if (termStats != null) { if (termStats != null) {
docFreq = Math.max(termStats.docFreq(), docFreq); docFreq = Math.max(termStats.docFreq(), docFreq);
if (termStats.totalTermFreq() == -1) { totalTermFreq += termStats.totalTermFreq();
totalTermFreq = -1;
} else if (totalTermFreq != -1) {
totalTermFreq += termStats.totalTermFreq();
}
} }
} }
this.similarity = searcher.getSimilarity(true); this.similarity = searcher.getSimilarity(true);
@ -217,6 +213,11 @@ public final class SynonymQuery extends Query {
return new SynonymScorer(simScorer, this, subScorers); return new SynonymScorer(simScorer, this, subScorers);
} }
} }
@Override
public IndexReader.CacheHelper getCacheHelper(LeafReaderContext context) {
return context.reader().getCoreCacheHelper();
}
} }
static class SynonymScorer extends DisjunctionScorer { static class SynonymScorer extends DisjunctionScorer {

View File

@ -315,6 +315,11 @@ public class TermInSetQuery extends Query implements Accountable {
return scorer(weightOrBitSet.set); return scorer(weightOrBitSet.set);
} }
} }
@Override
public IndexReader.CacheHelper getCacheHelper(LeafReaderContext context) {
return context.reader().getCoreCacheHelper();
}
}; };
} }
} }

View File

@ -21,6 +21,7 @@ import java.io.IOException;
import java.util.Objects; import java.util.Objects;
import java.util.Set; import java.util.Set;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexReaderContext; import org.apache.lucene.index.IndexReaderContext;
import org.apache.lucene.index.LeafReader; import org.apache.lucene.index.LeafReader;
import org.apache.lucene.index.LeafReaderContext; import org.apache.lucene.index.LeafReaderContext;
@ -65,9 +66,9 @@ public class TermQuery extends Query {
collectionStats = searcher.collectionStatistics(term.field()); collectionStats = searcher.collectionStatistics(term.field());
termStats = searcher.termStatistics(term, termStates); termStats = searcher.termStatistics(term, termStates);
} else { } else {
// we do not need the actual stats, use fake stats with docFreq=maxDoc=1 and ttf=-1 // we do not need the actual stats, use fake stats with docFreq=maxDoc=ttf=1
collectionStats = new CollectionStatistics(term.field(), 1, -1, -1, -1); collectionStats = new CollectionStatistics(term.field(), 1, 1, 1, 1);
termStats = new TermStatistics(term.bytes(), 1, -1); termStats = new TermStatistics(term.bytes(), 1, 1);
} }
if (termStats == null) { if (termStats == null) {
@ -99,6 +100,11 @@ public class TermQuery extends Query {
return new TermScorer(this, docs, similarity.simScorer(stats, context)); return new TermScorer(this, docs, similarity.simScorer(stats, context));
} }
@Override
public IndexReader.CacheHelper getCacheHelper(LeafReaderContext context) {
return context.reader().getCoreCacheHelper();
}
/** /**
* Returns a {@link TermsEnum} positioned at this weights Term or null if * Returns a {@link TermsEnum} positioned at this weights Term or null if
* the term does not exist in the given context * the term does not exist in the given context

View File

@ -24,8 +24,29 @@ import org.apache.lucene.index.TermsEnum; // javadocs
import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.BytesRef;
/** /**
* Contains statistics for a specific term * Contains statistics for a specific term
* <p>
* This class holds statistics for this term across all documents for scoring purposes:
* <ul>
* <li> {@link #docFreq}: number of documents this term occurs in.
* <li> {@link #totalTermFreq}: number of tokens for this term.
* </ul>
* <p>
* The following conditions are always true:
* <ul>
* <li> All statistics are positive integers: never zero or negative.
* <li> {@code docFreq} &lt;= {@code totalTermFreq}
* <li> {@code docFreq} &lt;= {@code sumDocFreq} of the collection
* <li> {@code totalTermFreq} &lt;= {@code sumTotalTermFreq} of the collection
* </ul>
* <p>
* Values may include statistics on deleted documents that have not yet been merged away.
* <p>
* Be careful when performing calculations on these values because they are represented
* as 64-bit integer values, you may need to cast to {@code double} for your use.
* @lucene.experimental * @lucene.experimental
*/ */
// TODO: actually add missing cross-checks to guarantee TermStatistics is in bounds of CollectionStatistics,
// otherwise many similarity functions will implode.
public class TermStatistics { public class TermStatistics {
private final BytesRef term; private final BytesRef term;
private final long docFreq; private final long docFreq;
@ -45,29 +66,52 @@ public class TermStatistics {
if (docFreq <= 0) { if (docFreq <= 0) {
throw new IllegalArgumentException("docFreq must be positive, docFreq: " + docFreq); throw new IllegalArgumentException("docFreq must be positive, docFreq: " + docFreq);
} }
if (totalTermFreq != -1) { if (totalTermFreq <= 0) {
if (totalTermFreq < docFreq) { throw new IllegalArgumentException("totalTermFreq must be positive, totalTermFreq: " + totalTermFreq);
throw new IllegalArgumentException("totalTermFreq must be at least docFreq, totalTermFreq: " + totalTermFreq + ", docFreq: " + docFreq); }
} if (totalTermFreq < docFreq) {
throw new IllegalArgumentException("totalTermFreq must be at least docFreq, totalTermFreq: " + totalTermFreq + ", docFreq: " + docFreq);
} }
this.term = term; this.term = term;
this.docFreq = docFreq; this.docFreq = docFreq;
this.totalTermFreq = totalTermFreq; this.totalTermFreq = totalTermFreq;
} }
/** returns the term text */ /**
* The term text.
* <p>
* This value is never {@code null}.
* @return term's text, not {@code null}
*/
public final BytesRef term() { public final BytesRef term() {
return term; return term;
} }
/** returns the number of documents this term occurs in /**
* @see TermsEnum#docFreq() */ * The number of documents this term occurs in.
* <p>
* This is the document-frequency for the term: the count of documents
* where the term appears at least one time.
* <p>
* This value is always a positive number, and never
* exceeds {@link #totalTermFreq}. It also cannot exceed {@link CollectionStatistics#sumDocFreq()}.
* @return document frequency, in the range [1 .. {@link #totalTermFreq()}]
* @see TermsEnum#docFreq()
*/
public final long docFreq() { public final long docFreq() {
return docFreq; return docFreq;
} }
/** returns the total number of occurrences of this term /**
* @see TermsEnum#totalTermFreq() */ * The total number of occurrences of this term.
* <p>
* This is the token count for the term: the number of times it appears in the field across all documents.
* <p>
* This value is always a positive number, always at least {@link #docFreq()},
* and never exceeds {@link CollectionStatistics#sumTotalTermFreq()}.
* @return number of occurrences, in the range [{@link #docFreq()} .. {@link CollectionStatistics#sumTotalTermFreq()}]
* @see TermsEnum#totalTermFreq()
*/
public final long totalTermFreq() { public final long totalTermFreq() {
return totalTermFreq; return totalTermFreq;
} }

View File

@ -18,8 +18,11 @@ package org.apache.lucene.search;
import java.io.IOException; import java.io.IOException;
import java.util.List;
import java.util.Set; import java.util.Set;
import org.apache.lucene.index.FieldInfo;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexReaderContext; import org.apache.lucene.index.IndexReaderContext;
import org.apache.lucene.index.LeafReader; import org.apache.lucene.index.LeafReader;
import org.apache.lucene.index.LeafReaderContext; import org.apache.lucene.index.LeafReaderContext;
@ -102,6 +105,55 @@ public abstract class Weight {
*/ */
public abstract Scorer scorer(LeafReaderContext context) throws IOException; public abstract Scorer scorer(LeafReaderContext context) throws IOException;
/**
* Returns an {@link org.apache.lucene.index.IndexReader.CacheHelper} to cache this query against
*
* Weights that rely only on Terms or Points can return {@code context.reader().getCoreCacheHelper()}.
* Weights that use DocValues should call {@link #getDocValuesCacheHelper(String, LeafReaderContext)}
* Weights that should not be cached at all should return {@code null}
*
* @param context the {@link LeafReaderContext} to cache against
* @return an {@link org.apache.lucene.index.IndexReader.CacheHelper} indicating the cache level
*/
public abstract IndexReader.CacheHelper getCacheHelper(LeafReaderContext context);
/**
* Given a collection of Weights, return an {@link org.apache.lucene.index.IndexReader.CacheHelper} that will satisfy
* the requirements of them all.
* @param context the {@link LeafReaderContext} to cache against
* @param weights an array of {@link Weight} to be cached
* @return an {@link org.apache.lucene.index.IndexReader.CacheHelper} indicating the cache level
*/
protected static IndexReader.CacheHelper getCacheHelper(LeafReaderContext context, List<? extends Weight> weights) {
if (weights.size() == 0)
return null;
IndexReader.CacheHelper helper = weights.get(0).getCacheHelper(context);
if (helper == null)
return null;
for (int i = 1; i < weights.size(); i++) {
IndexReader.CacheHelper nextHelper = weights.get(i).getCacheHelper(context);
if (nextHelper == null || nextHelper != helper)
return null;
}
return helper;
}
/**
* Returns an {@link org.apache.lucene.index.IndexReader.CacheHelper} for a Weight using doc values
*
* This will return the core reader for
*
* @param field the docvalues field
* @param ctx the {@link LeafReaderContext} to cache against
* @return an {@link org.apache.lucene.index.IndexReader.CacheHelper} indicating the cache level
*/
public static IndexReader.CacheHelper getDocValuesCacheHelper(String field, LeafReaderContext ctx) {
FieldInfo fi = ctx.reader().getFieldInfos().fieldInfo(field);
if (fi == null || fi.getDocValuesGen() == -1)
return ctx.reader().getCoreCacheHelper();
return null;
}
/** /**
* Optional method. * Optional method.
* Get a {@link ScorerSupplier}, which allows to know the cost of the {@link Scorer} * Get a {@link ScorerSupplier}, which allows to know the cost of the {@link Scorer}

View File

@ -85,19 +85,7 @@ public class BM25Similarity extends Similarity {
/** The default implementation computes the average as <code>sumTotalTermFreq / docCount</code> */ /** The default implementation computes the average as <code>sumTotalTermFreq / docCount</code> */
protected float avgFieldLength(CollectionStatistics collectionStats) { protected float avgFieldLength(CollectionStatistics collectionStats) {
final long sumTotalTermFreq; return (float) (collectionStats.sumTotalTermFreq() / (double) collectionStats.docCount());
if (collectionStats.sumTotalTermFreq() == -1) {
// frequencies are omitted (tf=1), its # of postings
if (collectionStats.sumDocFreq() == -1) {
// theoretical case only: remove!
return 1f;
}
sumTotalTermFreq = collectionStats.sumDocFreq();
} else {
sumTotalTermFreq = collectionStats.sumTotalTermFreq();
}
final long docCount = collectionStats.docCount() == -1 ? collectionStats.maxDoc() : collectionStats.docCount();
return (float) (sumTotalTermFreq / (double) docCount);
} }
/** /**
@ -161,7 +149,7 @@ public class BM25Similarity extends Similarity {
*/ */
public Explanation idfExplain(CollectionStatistics collectionStats, TermStatistics termStats) { public Explanation idfExplain(CollectionStatistics collectionStats, TermStatistics termStats) {
final long df = termStats.docFreq(); final long df = termStats.docFreq();
final long docCount = collectionStats.docCount() == -1 ? collectionStats.maxDoc() : collectionStats.docCount(); final long docCount = collectionStats.docCount();
final float idf = idf(df, docCount); final float idf = idf(df, docCount);
return Explanation.match(idf, "idf, computed as log(1 + (N - n + 0.5) / (n + 0.5)) from:", return Explanation.match(idf, "idf, computed as log(1 + (N - n + 0.5) / (n + 0.5)) from:",
Explanation.match(df, "n, number of documents containing term"), Explanation.match(df, "n, number of documents containing term"),

View File

@ -62,7 +62,7 @@ public class ClassicSimilarity extends TFIDFSimilarity {
@Override @Override
public Explanation idfExplain(CollectionStatistics collectionStats, TermStatistics termStats) { public Explanation idfExplain(CollectionStatistics collectionStats, TermStatistics termStats) {
final long df = termStats.docFreq(); final long df = termStats.docFreq();
final long docCount = collectionStats.docCount() == -1 ? collectionStats.maxDoc() : collectionStats.docCount(); final long docCount = collectionStats.docCount();
final float idf = idf(df, docCount); final float idf = idf(df, docCount);
return Explanation.match(idf, "idf, computed as log((docCount+1)/(docFreq+1)) + 1 from:", return Explanation.match(idf, "idf, computed as log((docCount+1)/(docFreq+1)) + 1 from:",
Explanation.match(df, "docFreq, number of documents containing term"), Explanation.match(df, "docFreq, number of documents containing term"),

View File

@ -100,42 +100,16 @@ public abstract class SimilarityBase extends Similarity {
/** Fills all member fields defined in {@code BasicStats} in {@code stats}. /** Fills all member fields defined in {@code BasicStats} in {@code stats}.
* Subclasses can override this method to fill additional stats. */ * Subclasses can override this method to fill additional stats. */
protected void fillBasicStats(BasicStats stats, CollectionStatistics collectionStats, TermStatistics termStats) { protected void fillBasicStats(BasicStats stats, CollectionStatistics collectionStats, TermStatistics termStats) {
// #positions(field) must be >= #positions(term) // TODO: validate this for real, somewhere else
assert collectionStats.sumTotalTermFreq() == -1 || collectionStats.sumTotalTermFreq() >= termStats.totalTermFreq(); assert termStats.totalTermFreq() <= collectionStats.sumTotalTermFreq();
long numberOfDocuments = collectionStats.docCount() == -1 ? collectionStats.maxDoc() : collectionStats.docCount(); assert termStats.docFreq() <= collectionStats.sumDocFreq();
long docFreq = termStats.docFreq();
long totalTermFreq = termStats.totalTermFreq();
// frequencies are omitted, all postings have tf=1, so totalTermFreq = docFreq
if (totalTermFreq == -1) {
totalTermFreq = docFreq;
}
final long numberOfFieldTokens;
final double avgFieldLength;
if (collectionStats.sumTotalTermFreq() == -1) {
// frequencies are omitted, so sumTotalTermFreq = # postings
if (collectionStats.sumDocFreq() == -1) {
// theoretical case only: remove!
numberOfFieldTokens = docFreq;
avgFieldLength = 1f;
} else {
numberOfFieldTokens = collectionStats.sumDocFreq();
avgFieldLength = (float) (collectionStats.sumDocFreq() / (double)numberOfDocuments);
}
} else {
numberOfFieldTokens = collectionStats.sumTotalTermFreq();
avgFieldLength = (float) (collectionStats.sumTotalTermFreq() / (double)numberOfDocuments);
}
// TODO: add sumDocFreq for field (numberOfFieldPostings) // TODO: add sumDocFreq for field (numberOfFieldPostings)
stats.setNumberOfDocuments(numberOfDocuments); stats.setNumberOfDocuments(collectionStats.docCount());
stats.setNumberOfFieldTokens(numberOfFieldTokens); stats.setNumberOfFieldTokens(collectionStats.sumTotalTermFreq());
stats.setAvgFieldLength(avgFieldLength); stats.setAvgFieldLength(collectionStats.sumTotalTermFreq() / (double) collectionStats.docCount());
stats.setDocFreq(docFreq); stats.setDocFreq(termStats.docFreq());
stats.setTotalTermFreq(totalTermFreq); stats.setTotalTermFreq(termStats.totalTermFreq());
} }
/** /**

View File

@ -448,7 +448,7 @@ public abstract class TFIDFSimilarity extends Similarity {
*/ */
public Explanation idfExplain(CollectionStatistics collectionStats, TermStatistics termStats) { public Explanation idfExplain(CollectionStatistics collectionStats, TermStatistics termStats) {
final long df = termStats.docFreq(); final long df = termStats.docFreq();
final long docCount = collectionStats.docCount() == -1 ? collectionStats.maxDoc() : collectionStats.docCount(); final long docCount = collectionStats.docCount();
final float idf = idf(df, docCount); final float idf = idf(df, docCount);
return Explanation.match(idf, "idf(docFreq, docCount)", return Explanation.match(idf, "idf(docFreq, docCount)",
Explanation.match(df, "docFreq, number of documents containing term"), Explanation.match(df, "docFreq, number of documents containing term"),

View File

@ -32,13 +32,13 @@
* <a name="sims"></a> * <a name="sims"></a>
* <h2>Summary of the Ranking Methods</h2> * <h2>Summary of the Ranking Methods</h2>
* *
* <p>{@link org.apache.lucene.search.similarities.ClassicSimilarity} is the original Lucene
* scoring function. It is based on a highly optimized
* <a href="http://en.wikipedia.org/wiki/Vector_Space_Model">Vector Space Model</a>. For more
* information, see {@link org.apache.lucene.search.similarities.TFIDFSimilarity}.
*
* <p>{@link org.apache.lucene.search.similarities.BM25Similarity} is an optimized * <p>{@link org.apache.lucene.search.similarities.BM25Similarity} is an optimized
* implementation of the successful Okapi BM25 model. * implementation of the successful Okapi BM25 model.
*
* <p>{@link org.apache.lucene.search.similarities.ClassicSimilarity} is the original Lucene
* scoring function. It is based on the
* <a href="http://en.wikipedia.org/wiki/Vector_Space_Model">Vector Space Model</a>. For more
* information, see {@link org.apache.lucene.search.similarities.TFIDFSimilarity}.
* *
* <p>{@link org.apache.lucene.search.similarities.SimilarityBase} provides a basic * <p>{@link org.apache.lucene.search.similarities.SimilarityBase} provides a basic
* implementation of the Similarity contract and exposes a highly simplified * implementation of the Similarity contract and exposes a highly simplified

View File

@ -19,8 +19,10 @@ package org.apache.lucene.search.spans;
import java.io.IOException; import java.io.IOException;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.Arrays;
import java.util.Map; import java.util.Map;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.LeafReaderContext; import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.index.Term; import org.apache.lucene.index.Term;
import org.apache.lucene.index.TermContext; import org.apache.lucene.index.TermContext;
@ -115,5 +117,10 @@ public final class SpanContainingQuery extends SpanContainQuery {
} }
}; };
} }
@Override
public IndexReader.CacheHelper getCacheHelper(LeafReaderContext context) {
return getCacheHelper(context, Arrays.asList(bigWeight, littleWeight));
}
} }
} }

View File

@ -229,6 +229,11 @@ public class SpanNearQuery extends SpanQuery implements Cloneable {
w.extractTerms(terms); w.extractTerms(terms);
} }
} }
@Override
public IndexReader.CacheHelper getCacheHelper(LeafReaderContext context) {
return getCacheHelper(context, subWeights);
}
} }
@Override @Override
@ -319,6 +324,11 @@ public class SpanNearQuery extends SpanQuery implements Cloneable {
public void extractTerms(Set<Term> terms) { public void extractTerms(Set<Term> terms) {
} }
@Override
public IndexReader.CacheHelper getCacheHelper(LeafReaderContext context) {
return context.reader().getCoreCacheHelper();
}
} }
@Override @Override

View File

@ -18,6 +18,7 @@ package org.apache.lucene.search.spans;
import java.io.IOException; import java.io.IOException;
import java.util.Arrays;
import java.util.Map; import java.util.Map;
import java.util.Objects; import java.util.Objects;
import java.util.Set; import java.util.Set;
@ -191,6 +192,11 @@ public final class SpanNotQuery extends SpanQuery {
public void extractTerms(Set<Term> terms) { public void extractTerms(Set<Term> terms) {
includeWeight.extractTerms(terms); includeWeight.extractTerms(terms);
} }
@Override
public IndexReader.CacheHelper getCacheHelper(LeafReaderContext context) {
return getCacheHelper(context, Arrays.asList(includeWeight, excludeWeight));
}
} }
@Override @Override

View File

@ -138,6 +138,11 @@ public final class SpanOrQuery extends SpanQuery {
} }
} }
@Override
public IndexReader.CacheHelper getCacheHelper(LeafReaderContext context) {
return getCacheHelper(context, subWeights);
}
@Override @Override
public void extractTermContexts(Map<Term, TermContext> contexts) { public void extractTermContexts(Map<Term, TermContext> contexts) {
for (SpanWeight w : subWeights) { for (SpanWeight w : subWeights) {

View File

@ -86,6 +86,11 @@ public abstract class SpanPositionCheckQuery extends SpanQuery implements Clonea
matchWeight.extractTerms(terms); matchWeight.extractTerms(terms);
} }
@Override
public IndexReader.CacheHelper getCacheHelper(LeafReaderContext context) {
return matchWeight.getCacheHelper(context);
}
@Override @Override
public void extractTermContexts(Map<Term, TermContext> contexts) { public void extractTermContexts(Map<Term, TermContext> contexts) {
matchWeight.extractTermContexts(contexts); matchWeight.extractTermContexts(contexts);

View File

@ -23,6 +23,7 @@ import java.util.Map;
import java.util.Objects; import java.util.Objects;
import java.util.Set; import java.util.Set;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexReaderContext; import org.apache.lucene.index.IndexReaderContext;
import org.apache.lucene.index.LeafReaderContext; import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.index.PostingsEnum; import org.apache.lucene.index.PostingsEnum;
@ -91,6 +92,11 @@ public class SpanTermQuery extends SpanQuery {
terms.add(term); terms.add(term);
} }
@Override
public IndexReader.CacheHelper getCacheHelper(LeafReaderContext context) {
return context.reader().getCoreCacheHelper();
}
@Override @Override
public void extractTermContexts(Map<Term, TermContext> contexts) { public void extractTermContexts(Map<Term, TermContext> contexts) {
contexts.put(term, termContext); contexts.put(term, termContext);
@ -135,7 +141,6 @@ public class SpanTermQuery extends SpanQuery {
/** Returns an expected cost in simple operations /** Returns an expected cost in simple operations
* of processing the occurrences of a term * of processing the occurrences of a term
* in a document that contains the term. * in a document that contains the term.
* <br>This may be inaccurate when {@link TermsEnum#totalTermFreq()} is not available.
* @param termsEnum The term is the term at which this TermsEnum is positioned. * @param termsEnum The term is the term at which this TermsEnum is positioned.
* <p> * <p>
* This is a copy of org.apache.lucene.search.PhraseQuery.termPositionsCost(). * This is a copy of org.apache.lucene.search.PhraseQuery.termPositionsCost().
@ -146,8 +151,9 @@ public class SpanTermQuery extends SpanQuery {
static float termPositionsCost(TermsEnum termsEnum) throws IOException { static float termPositionsCost(TermsEnum termsEnum) throws IOException {
int docFreq = termsEnum.docFreq(); int docFreq = termsEnum.docFreq();
assert docFreq > 0; assert docFreq > 0;
long totalTermFreq = termsEnum.totalTermFreq(); // -1 when not available long totalTermFreq = termsEnum.totalTermFreq();
float expOccurrencesInMatchingDoc = (totalTermFreq < docFreq) ? 1 : (totalTermFreq / (float) docFreq); assert totalTermFreq > 0;
float expOccurrencesInMatchingDoc = totalTermFreq / (float) docFreq;
return TERM_POSNS_SEEK_OPS_PER_DOC + expOccurrencesInMatchingDoc * TERM_OPS_PER_POS; return TERM_POSNS_SEEK_OPS_PER_DOC + expOccurrencesInMatchingDoc * TERM_OPS_PER_POS;
} }

View File

@ -19,8 +19,10 @@ package org.apache.lucene.search.spans;
import java.io.IOException; import java.io.IOException;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.Arrays;
import java.util.Map; import java.util.Map;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.LeafReaderContext; import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.index.Term; import org.apache.lucene.index.Term;
import org.apache.lucene.index.TermContext; import org.apache.lucene.index.TermContext;
@ -116,6 +118,11 @@ public final class SpanWithinQuery extends SpanContainQuery {
} }
}; };
} }
@Override
public IndexReader.CacheHelper getCacheHelper(LeafReaderContext context) {
return getCacheHelper(context, Arrays.asList(littleWeight, bigWeight));
}
} }
} }

View File

@ -172,10 +172,11 @@ public class TestBlockPostingsFormat3 extends LuceneTestCase {
// NOTE: we don't assert hasOffsets/hasPositions/hasPayloads because they are allowed to be different // NOTE: we don't assert hasOffsets/hasPositions/hasPayloads because they are allowed to be different
boolean bothHaveFreqs = leftTerms.hasFreqs() && rightTerms.hasFreqs();
boolean bothHavePositions = leftTerms.hasPositions() && rightTerms.hasPositions(); boolean bothHavePositions = leftTerms.hasPositions() && rightTerms.hasPositions();
TermsEnum leftTermsEnum = leftTerms.iterator(); TermsEnum leftTermsEnum = leftTerms.iterator();
TermsEnum rightTermsEnum = rightTerms.iterator(); TermsEnum rightTermsEnum = rightTerms.iterator();
assertTermsEnum(leftTermsEnum, rightTermsEnum, true, bothHavePositions); assertTermsEnum(leftTermsEnum, rightTermsEnum, true, bothHaveFreqs, bothHavePositions);
assertTermsSeeking(leftTerms, rightTerms); assertTermsSeeking(leftTerms, rightTerms);
@ -188,7 +189,7 @@ public class TestBlockPostingsFormat3 extends LuceneTestCase {
// TODO: test start term too // TODO: test start term too
TermsEnum leftIntersection = leftTerms.intersect(automaton, null); TermsEnum leftIntersection = leftTerms.intersect(automaton, null);
TermsEnum rightIntersection = rightTerms.intersect(automaton, null); TermsEnum rightIntersection = rightTerms.intersect(automaton, null);
assertTermsEnum(leftIntersection, rightIntersection, rarely(), bothHavePositions); assertTermsEnum(leftIntersection, rightIntersection, rarely(), bothHaveFreqs, bothHavePositions);
} }
} }
} }
@ -263,13 +264,9 @@ public class TestBlockPostingsFormat3 extends LuceneTestCase {
* checks collection-level statistics on Terms * checks collection-level statistics on Terms
*/ */
public void assertTermsStatistics(Terms leftTerms, Terms rightTerms) throws Exception { public void assertTermsStatistics(Terms leftTerms, Terms rightTerms) throws Exception {
if (leftTerms.getDocCount() != -1 && rightTerms.getDocCount() != -1) { assertEquals(leftTerms.getDocCount(), rightTerms.getDocCount());
assertEquals(leftTerms.getDocCount(), rightTerms.getDocCount()); assertEquals(leftTerms.getSumDocFreq(), rightTerms.getSumDocFreq());
} if (leftTerms.hasFreqs() && rightTerms.hasFreqs()) {
if (leftTerms.getSumDocFreq() != -1 && rightTerms.getSumDocFreq() != -1) {
assertEquals(leftTerms.getSumDocFreq(), rightTerms.getSumDocFreq());
}
if (leftTerms.getSumTotalTermFreq() != -1 && rightTerms.getSumTotalTermFreq() != -1) {
assertEquals(leftTerms.getSumTotalTermFreq(), rightTerms.getSumTotalTermFreq()); assertEquals(leftTerms.getSumTotalTermFreq(), rightTerms.getSumTotalTermFreq());
} }
if (leftTerms.size() != -1 && rightTerms.size() != -1) { if (leftTerms.size() != -1 && rightTerms.size() != -1) {
@ -281,7 +278,7 @@ public class TestBlockPostingsFormat3 extends LuceneTestCase {
* checks the terms enum sequentially * checks the terms enum sequentially
* if deep is false, it does a 'shallow' test that doesnt go down to the docsenums * if deep is false, it does a 'shallow' test that doesnt go down to the docsenums
*/ */
public void assertTermsEnum(TermsEnum leftTermsEnum, TermsEnum rightTermsEnum, boolean deep, boolean hasPositions) throws Exception { public void assertTermsEnum(TermsEnum leftTermsEnum, TermsEnum rightTermsEnum, boolean deep, boolean hasFreqs, boolean hasPositions) throws Exception {
BytesRef term; BytesRef term;
PostingsEnum leftPositions = null; PostingsEnum leftPositions = null;
PostingsEnum rightPositions = null; PostingsEnum rightPositions = null;
@ -290,7 +287,7 @@ public class TestBlockPostingsFormat3 extends LuceneTestCase {
while ((term = leftTermsEnum.next()) != null) { while ((term = leftTermsEnum.next()) != null) {
assertEquals(term, rightTermsEnum.next()); assertEquals(term, rightTermsEnum.next());
assertTermStats(leftTermsEnum, rightTermsEnum); assertTermStats(leftTermsEnum, rightTermsEnum, hasFreqs);
if (deep) { if (deep) {
if (hasPositions) { if (hasPositions) {
// with payloads + off // with payloads + off
@ -350,9 +347,9 @@ public class TestBlockPostingsFormat3 extends LuceneTestCase {
/** /**
* checks term-level statistics * checks term-level statistics
*/ */
public void assertTermStats(TermsEnum leftTermsEnum, TermsEnum rightTermsEnum) throws Exception { public void assertTermStats(TermsEnum leftTermsEnum, TermsEnum rightTermsEnum, boolean bothHaveFreqs) throws Exception {
assertEquals(leftTermsEnum.docFreq(), rightTermsEnum.docFreq()); assertEquals(leftTermsEnum.docFreq(), rightTermsEnum.docFreq());
if (leftTermsEnum.totalTermFreq() != -1 && rightTermsEnum.totalTermFreq() != -1) { if (bothHaveFreqs) {
assertEquals(leftTermsEnum.totalTermFreq(), rightTermsEnum.totalTermFreq()); assertEquals(leftTermsEnum.totalTermFreq(), rightTermsEnum.totalTermFreq());
} }
} }

View File

@ -148,26 +148,22 @@ public class TestMultiTermsEnum extends LuceneTestCase {
@Override @Override
public long size() throws IOException { public long size() throws IOException {
// Docs say we can return -1 if we don't know. throw new UnsupportedOperationException();
return -1;
} }
@Override @Override
public long getSumTotalTermFreq() throws IOException { public long getSumTotalTermFreq() throws IOException {
// Docs say we can return -1 if we don't know. throw new UnsupportedOperationException();
return -1;
} }
@Override @Override
public long getSumDocFreq() throws IOException { public long getSumDocFreq() throws IOException {
// Docs say we can return -1 if we don't know. throw new UnsupportedOperationException();
return -1;
} }
@Override @Override
public int getDocCount() throws IOException { public int getDocCount() throws IOException {
// Docs say we can return -1 if we don't know. throw new UnsupportedOperationException();
return -1;
} }
@Override @Override

View File

@ -445,7 +445,7 @@ public class TestOmitTf extends LuceneTestCase {
} }
} }
/** test that when freqs are omitted, that totalTermFreq and sumTotalTermFreq are -1 */ /** test that when freqs are omitted, that totalTermFreq and sumTotalTermFreq are docFreq, and sumDocFreq */
public void testStats() throws Exception { public void testStats() throws Exception {
Directory dir = newDirectory(); Directory dir = newDirectory();
RandomIndexWriter iw = new RandomIndexWriter(random(), dir, RandomIndexWriter iw = new RandomIndexWriter(random(), dir,
@ -459,8 +459,8 @@ public class TestOmitTf extends LuceneTestCase {
iw.addDocument(doc); iw.addDocument(doc);
IndexReader ir = iw.getReader(); IndexReader ir = iw.getReader();
iw.close(); iw.close();
assertEquals(-1, ir.totalTermFreq(new Term("foo", new BytesRef("bar")))); assertEquals(ir.docFreq(new Term("foo", new BytesRef("bar"))), ir.totalTermFreq(new Term("foo", new BytesRef("bar"))));
assertEquals(-1, ir.getSumTotalTermFreq("foo")); assertEquals(ir.getSumDocFreq("foo"), ir.getSumTotalTermFreq("foo"));
ir.close(); ir.close();
dir.close(); dir.close();
} }

View File

@ -21,6 +21,7 @@ import java.io.IOException;
import java.util.Set; import java.util.Set;
import org.apache.lucene.index.FieldInvertState; import org.apache.lucene.index.FieldInvertState;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.LeafReaderContext; import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.index.Term; import org.apache.lucene.index.Term;
import org.apache.lucene.search.similarities.Similarity; import org.apache.lucene.search.similarities.Similarity;
@ -262,6 +263,11 @@ final class JustCompileSearch {
throw new UnsupportedOperationException(UNSUPPORTED_MSG); throw new UnsupportedOperationException(UNSUPPORTED_MSG);
} }
@Override
public IndexReader.CacheHelper getCacheHelper(LeafReaderContext context) {
return null;
}
} }
} }

View File

@ -94,6 +94,11 @@ public class TestBooleanScorer extends LuceneTestCase {
throw new UnsupportedOperationException(); throw new UnsupportedOperationException();
} }
@Override
public IndexReader.CacheHelper getCacheHelper(LeafReaderContext context) {
return null;
}
@Override @Override
public BulkScorer bulkScorer(LeafReaderContext context) { public BulkScorer bulkScorer(LeafReaderContext context) {
return new BulkScorer() { return new BulkScorer() {

View File

@ -29,6 +29,7 @@ import java.util.HashSet;
import java.util.LinkedHashMap; import java.util.LinkedHashMap;
import java.util.List; import java.util.List;
import java.util.Map; import java.util.Map;
import java.util.Set;
import java.util.concurrent.ExecutionException; import java.util.concurrent.ExecutionException;
import java.util.concurrent.atomic.AtomicBoolean; import java.util.concurrent.atomic.AtomicBoolean;
import java.util.concurrent.atomic.AtomicInteger; import java.util.concurrent.atomic.AtomicInteger;
@ -36,15 +37,16 @@ import java.util.concurrent.atomic.AtomicLong;
import java.util.concurrent.atomic.AtomicReference; import java.util.concurrent.atomic.AtomicReference;
import com.carrotsearch.randomizedtesting.generators.RandomPicks; import com.carrotsearch.randomizedtesting.generators.RandomPicks;
import org.apache.lucene.document.Document; import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field.Store; import org.apache.lucene.document.Field.Store;
import org.apache.lucene.document.NumericDocValuesField;
import org.apache.lucene.document.StringField; import org.apache.lucene.document.StringField;
import org.apache.lucene.document.TextField; import org.apache.lucene.document.TextField;
import org.apache.lucene.index.DirectoryReader; import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.FilterDirectoryReader; import org.apache.lucene.index.FilterDirectoryReader;
import org.apache.lucene.index.FilterLeafReader; import org.apache.lucene.index.FilterLeafReader;
import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig; import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.LeafReader; import org.apache.lucene.index.LeafReader;
import org.apache.lucene.index.LeafReaderContext; import org.apache.lucene.index.LeafReaderContext;
@ -358,6 +360,11 @@ public class TestLRUQueryCache extends LuceneTestCase {
public Scorer scorer(LeafReaderContext context) throws IOException { public Scorer scorer(LeafReaderContext context) throws IOException {
return null; return null;
} }
@Override
public IndexReader.CacheHelper getCacheHelper(LeafReaderContext context) {
return context.reader().getCoreCacheHelper();
}
}; };
} }
@ -947,6 +954,11 @@ public class TestLRUQueryCache extends LuceneTestCase {
public Scorer scorer(LeafReaderContext context) throws IOException { public Scorer scorer(LeafReaderContext context) throws IOException {
return null; return null;
} }
@Override
public IndexReader.CacheHelper getCacheHelper(LeafReaderContext context) {
return context.reader().getCoreCacheHelper();
}
}; };
} }
@ -1276,6 +1288,78 @@ public class TestLRUQueryCache extends LuceneTestCase {
dir.close(); dir.close();
} }
// A query that returns null from Weight.getCacheHelper
private static class NoCacheQuery extends Query {
@Override
public Weight createWeight(IndexSearcher searcher, boolean needsScores, float boost) throws IOException {
return new Weight(this) {
@Override
public void extractTerms(Set<Term> terms) {
}
@Override
public Explanation explain(LeafReaderContext context, int doc) throws IOException {
return null;
}
@Override
public Scorer scorer(LeafReaderContext context) throws IOException {
return null;
}
@Override
public IndexReader.CacheHelper getCacheHelper(LeafReaderContext context) {
return null;
}
};
}
@Override
public String toString(String field) {
return "NoCacheQuery";
}
@Override
public boolean equals(Object obj) {
return sameClassAs(obj);
}
@Override
public int hashCode() {
return 0;
}
}
public void testQueryNotSuitedForCaching() throws IOException {
Directory dir = newDirectory();
IndexWriterConfig iwc = newIndexWriterConfig().setMergePolicy(NoMergePolicy.INSTANCE);
RandomIndexWriter w = new RandomIndexWriter(random(), dir, iwc);
w.addDocument(new Document());
DirectoryReader reader = w.getReader();
IndexSearcher searcher = newSearcher(reader);
searcher.setQueryCachingPolicy(QueryCachingPolicy.ALWAYS_CACHE);
LRUQueryCache cache = new LRUQueryCache(2, 10000, context -> true);
searcher.setQueryCache(cache);
assertEquals(0, searcher.count(new NoCacheQuery()));
assertEquals(0, cache.getCacheCount());
// BooleanQuery wrapping an uncacheable query should also not be cached
BooleanQuery bq = new BooleanQuery.Builder()
.add(new NoCacheQuery(), Occur.MUST)
.add(new TermQuery(new Term("field", "term")), Occur.MUST).build();
assertEquals(0, searcher.count(bq));
assertEquals(0, cache.getCacheCount());
reader.close();
w.close();
dir.close();
}
private static class DummyQuery2 extends Query { private static class DummyQuery2 extends Query {
private final AtomicBoolean scorerCreated; private final AtomicBoolean scorerCreated;
@ -1291,6 +1375,12 @@ public class TestLRUQueryCache extends LuceneTestCase {
public Scorer scorer(LeafReaderContext context) throws IOException { public Scorer scorer(LeafReaderContext context) throws IOException {
return scorerSupplier(context).get(Long.MAX_VALUE); return scorerSupplier(context).get(Long.MAX_VALUE);
} }
@Override
public IndexReader.CacheHelper getCacheHelper(LeafReaderContext context) {
return context.reader().getCoreCacheHelper();
}
@Override @Override
public ScorerSupplier scorerSupplier(LeafReaderContext context) throws IOException { public ScorerSupplier scorerSupplier(LeafReaderContext context) throws IOException {
final Weight weight = this; final Weight weight = this;
@ -1351,4 +1441,110 @@ public class TestLRUQueryCache extends LuceneTestCase {
w.close(); w.close();
dir.close(); dir.close();
} }
static class DVCacheQuery extends Query {
final String field;
AtomicInteger scorerCreatedCount = new AtomicInteger(0);
DVCacheQuery(String field) {
this.field = field;
}
@Override
public String toString(String field) {
return "DVCacheQuery";
}
@Override
public boolean equals(Object obj) {
return sameClassAs(obj);
}
@Override
public int hashCode() {
return 0;
}
@Override
public Weight createWeight(IndexSearcher searcher, boolean needsScores, float boost) throws IOException {
return new ConstantScoreWeight(this, 1) {
@Override
public Scorer scorer(LeafReaderContext context) throws IOException {
scorerCreatedCount.incrementAndGet();
return new ConstantScoreScorer(this, 1, DocIdSetIterator.all(context.reader().maxDoc()));
}
@Override
public IndexReader.CacheHelper getCacheHelper(LeafReaderContext context) {
return getDocValuesCacheHelper(field, context);
}
};
}
}
public void testDocValuesUpdatesDontBreakCache() throws IOException {
Directory dir = newDirectory();
IndexWriterConfig iwc = newIndexWriterConfig().setMergePolicy(NoMergePolicy.INSTANCE);
//RandomIndexWriter w = new RandomIndexWriter(random(), dir, iwc);
IndexWriter w = new IndexWriter(dir, iwc);
w.addDocument(new Document());
w.commit();
DirectoryReader reader = DirectoryReader.open(w);
IndexSearcher searcher = newSearcher(reader);
searcher.setQueryCachingPolicy(QueryCachingPolicy.ALWAYS_CACHE);
LRUQueryCache cache = new LRUQueryCache(1, 1000, context -> true);
searcher.setQueryCache(cache);
DVCacheQuery query = new DVCacheQuery("field");
assertEquals(1, searcher.count(query));
assertEquals(1, query.scorerCreatedCount.get());
assertEquals(1, searcher.count(query));
assertEquals(1, query.scorerCreatedCount.get()); // should be cached
Document doc = new Document();
doc.add(new NumericDocValuesField("field", 1));
doc.add(newTextField("text", "text", Store.NO));
w.addDocument(doc);
reader.close();
reader = DirectoryReader.open(w);
searcher = newSearcher(reader);
searcher.setQueryCachingPolicy(QueryCachingPolicy.ALWAYS_CACHE);
searcher.setQueryCache(cache);
assertEquals(2, searcher.count(query));
assertEquals(2, query.scorerCreatedCount.get()); // first segment cached
reader.close();
reader = DirectoryReader.open(w);
searcher = newSearcher(reader);
searcher.setQueryCachingPolicy(QueryCachingPolicy.ALWAYS_CACHE);
searcher.setQueryCache(cache);
assertEquals(2, searcher.count(query));
assertEquals(2, query.scorerCreatedCount.get()); // both segments cached
w.updateNumericDocValue(new Term("text", "text"), "field", 2l);
reader.close();
reader = DirectoryReader.open(w);
searcher = newSearcher(reader);
searcher.setQueryCachingPolicy(QueryCachingPolicy.ALWAYS_CACHE);
searcher.setQueryCache(cache);
assertEquals(2, searcher.count(query));
assertEquals(3, query.scorerCreatedCount.get()); // second segment no longer cached due to DV update
assertEquals(2, searcher.count(query));
assertEquals(4, query.scorerCreatedCount.get()); // still no caching
reader.close();
w.close();
dir.close();
}
} }

View File

@ -18,7 +18,6 @@ package org.apache.lucene.search;
import java.io.IOException; import java.io.IOException;
import java.util.Set;
import java.util.Objects; import java.util.Objects;
import org.apache.lucene.document.Document; import org.apache.lucene.document.Document;
@ -103,17 +102,7 @@ public class TestNeedsScores extends LuceneTestCase {
@Override @Override
public Weight createWeight(IndexSearcher searcher, boolean needsScores, float boost) throws IOException { public Weight createWeight(IndexSearcher searcher, boolean needsScores, float boost) throws IOException {
final Weight w = in.createWeight(searcher, needsScores, boost); final Weight w = in.createWeight(searcher, needsScores, boost);
return new Weight(AssertNeedsScores.this) { return new FilterWeight(w) {
@Override
public void extractTerms(Set<Term> terms) {
w.extractTerms(terms);
}
@Override
public Explanation explain(LeafReaderContext context, int doc) throws IOException {
return w.explain(context, doc);
}
@Override @Override
public Scorer scorer(LeafReaderContext context) throws IOException { public Scorer scorer(LeafReaderContext context) throws IOException {
assertEquals("query=" + in, value, needsScores); assertEquals("query=" + in, value, needsScores);

View File

@ -487,6 +487,11 @@ public class TestQueryRescorer extends LuceneTestCase {
}; };
} }
@Override
public IndexReader.CacheHelper getCacheHelper(LeafReaderContext context) {
return null;
}
@Override @Override
public Explanation explain(LeafReaderContext context, int doc) throws IOException { public Explanation explain(LeafReaderContext context, int doc) throws IOException {
return null; return null;

View File

@ -155,6 +155,11 @@ public class TestScorerPerf extends LuceneTestCase {
public Scorer scorer(LeafReaderContext context) throws IOException { public Scorer scorer(LeafReaderContext context) throws IOException {
return new ConstantScoreScorer(this, score(), new BitSetIterator(docs, docs.approximateCardinality())); return new ConstantScoreScorer(this, score(), new BitSetIterator(docs, docs.approximateCardinality()));
} }
@Override
public IndexReader.CacheHelper getCacheHelper(LeafReaderContext context) {
return null;
}
}; };
} }

View File

@ -249,6 +249,11 @@ public class TestSortRandom extends LuceneTestCase {
return new ConstantScoreScorer(this, score(), new BitSetIterator(bits, bits.approximateCardinality())); return new ConstantScoreScorer(this, score(), new BitSetIterator(bits, bits.approximateCardinality()));
} }
@Override
public IndexReader.CacheHelper getCacheHelper(LeafReaderContext context) {
return null;
}
}; };
} }

View File

@ -124,6 +124,11 @@ public class TestUsageTrackingFilterCachingPolicy extends LuceneTestCase {
public Scorer scorer(LeafReaderContext context) throws IOException { public Scorer scorer(LeafReaderContext context) throws IOException {
return new ConstantScoreScorer(this, score(), DocIdSetIterator.all(1)); return new ConstantScoreScorer(this, score(), DocIdSetIterator.all(1));
} }
@Override
public IndexReader.CacheHelper getCacheHelper(LeafReaderContext context) {
return context.reader().getCoreCacheHelper();
}
}; };
} }

View File

@ -40,6 +40,7 @@ import org.apache.lucene.search.TopDocs;
import org.apache.lucene.store.Directory; import org.apache.lucene.store.Directory;
import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.LuceneTestCase; import org.apache.lucene.util.LuceneTestCase;
import org.apache.lucene.util.TestUtil;
import org.apache.lucene.util.Version; import org.apache.lucene.util.Version;
/** /**
@ -183,7 +184,17 @@ public class TestSimilarityBase extends LuceneTestCase {
} }
private CollectionStatistics toCollectionStats(BasicStats stats) { private CollectionStatistics toCollectionStats(BasicStats stats) {
return new CollectionStatistics(stats.field, stats.getNumberOfDocuments(), -1, stats.getNumberOfFieldTokens(), -1); long sumTtf = stats.getNumberOfFieldTokens();
long sumDf;
if (sumTtf == -1) {
sumDf = TestUtil.nextLong(random(), stats.getNumberOfDocuments(), 2L * stats.getNumberOfDocuments());
} else {
sumDf = TestUtil.nextLong(random(), Math.min(stats.getNumberOfDocuments(), sumTtf), sumTtf);
}
int docCount = Math.toIntExact(Math.min(sumDf, stats.getNumberOfDocuments()));
int maxDoc = TestUtil.nextInt(random(), docCount, docCount + 10);
return new CollectionStatistics(stats.field, maxDoc, docCount, sumTtf, sumDf);
} }
private TermStatistics toTermStats(BasicStats stats) { private TermStatistics toTermStats(BasicStats stats) {

View File

@ -17,8 +17,10 @@
package org.apache.lucene.facet; package org.apache.lucene.facet;
import java.io.IOException; import java.io.IOException;
import java.util.ArrayList;
import java.util.Arrays; import java.util.Arrays;
import java.util.Comparator; import java.util.Comparator;
import java.util.List;
import java.util.Objects; import java.util.Objects;
import java.util.Set; import java.util.Set;
@ -101,6 +103,14 @@ class DrillSidewaysQuery extends Query {
throw new UnsupportedOperationException(); throw new UnsupportedOperationException();
} }
@Override
public IndexReader.CacheHelper getCacheHelper(LeafReaderContext context) {
List<Weight> weights = new ArrayList<>();
weights.add(baseWeight);
weights.addAll(Arrays.asList(drillDowns));
return getCacheHelper(context, weights);
}
@Override @Override
public BulkScorer bulkScorer(LeafReaderContext context) throws IOException { public BulkScorer bulkScorer(LeafReaderContext context) throws IOException {
Scorer baseScorer = baseWeight.scorer(context); Scorer baseScorer = baseWeight.scorer(context);

View File

@ -171,6 +171,11 @@ public final class DoubleRange extends Range {
}; };
return new ConstantScoreScorer(this, score(), twoPhase); return new ConstantScoreScorer(this, score(), twoPhase);
} }
@Override
public IndexReader.CacheHelper getCacheHelper(LeafReaderContext context) {
return null; // TODO delegate to LongValuesSource?
}
}; };
} }

View File

@ -163,6 +163,11 @@ public final class LongRange extends Range {
}; };
return new ConstantScoreScorer(this, score(), twoPhase); return new ConstantScoreScorer(this, score(), twoPhase);
} }
@Override
public IndexReader.CacheHelper getCacheHelper(LeafReaderContext context) {
return null; // TODO delegate to LongValuesSource?
}
}; };
} }

View File

@ -16,6 +16,16 @@
*/ */
package org.apache.lucene.facet; package org.apache.lucene.facet;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;
import org.apache.lucene.analysis.MockAnalyzer; import org.apache.lucene.analysis.MockAnalyzer;
import org.apache.lucene.document.Document; import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field; import org.apache.lucene.document.Field;
@ -58,16 +68,6 @@ import org.apache.lucene.util.InPlaceMergeSorter;
import org.apache.lucene.util.InfoStream; import org.apache.lucene.util.InfoStream;
import org.apache.lucene.util.TestUtil; import org.apache.lucene.util.TestUtil;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;
public class TestDrillSideways extends FacetTestCase { public class TestDrillSideways extends FacetTestCase {
protected DrillSideways getNewDrillSideways(IndexSearcher searcher, FacetsConfig config, protected DrillSideways getNewDrillSideways(IndexSearcher searcher, FacetsConfig config,
@ -740,6 +740,11 @@ public class TestDrillSideways extends FacetTestCase {
}); });
} }
@Override
public IndexReader.CacheHelper getCacheHelper(LeafReaderContext context) {
return null;
}
}; };
} }

View File

@ -20,7 +20,6 @@ import java.io.IOException;
import java.util.HashMap; import java.util.HashMap;
import java.util.List; import java.util.List;
import java.util.Map; import java.util.Map;
import java.util.Set;
import java.util.concurrent.atomic.AtomicBoolean; import java.util.concurrent.atomic.AtomicBoolean;
import org.apache.lucene.document.Document; import org.apache.lucene.document.Document;
@ -29,8 +28,8 @@ import org.apache.lucene.document.DoublePoint;
import org.apache.lucene.document.LongPoint; import org.apache.lucene.document.LongPoint;
import org.apache.lucene.document.NumericDocValuesField; import org.apache.lucene.document.NumericDocValuesField;
import org.apache.lucene.facet.DrillDownQuery; import org.apache.lucene.facet.DrillDownQuery;
import org.apache.lucene.facet.DrillSideways.DrillSidewaysResult;
import org.apache.lucene.facet.DrillSideways; import org.apache.lucene.facet.DrillSideways;
import org.apache.lucene.facet.DrillSideways.DrillSidewaysResult;
import org.apache.lucene.facet.FacetField; import org.apache.lucene.facet.FacetField;
import org.apache.lucene.facet.FacetResult; import org.apache.lucene.facet.FacetResult;
import org.apache.lucene.facet.FacetTestCase; import org.apache.lucene.facet.FacetTestCase;
@ -46,10 +45,10 @@ import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriterConfig; import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.LeafReaderContext; import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.index.RandomIndexWriter; import org.apache.lucene.index.RandomIndexWriter;
import org.apache.lucene.index.Term;
import org.apache.lucene.search.DoubleValues; import org.apache.lucene.search.DoubleValues;
import org.apache.lucene.search.DoubleValuesSource; import org.apache.lucene.search.DoubleValuesSource;
import org.apache.lucene.search.Explanation; import org.apache.lucene.search.Explanation;
import org.apache.lucene.search.FilterWeight;
import org.apache.lucene.search.IndexSearcher; import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.LongValuesSource; import org.apache.lucene.search.LongValuesSource;
import org.apache.lucene.search.MatchAllDocsQuery; import org.apache.lucene.search.MatchAllDocsQuery;
@ -717,24 +716,12 @@ public class TestRangeFacetCounts extends FacetTestCase {
@Override @Override
public Weight createWeight(IndexSearcher searcher, boolean needsScores, float boost) throws IOException { public Weight createWeight(IndexSearcher searcher, boolean needsScores, float boost) throws IOException {
final Weight in = this.in.createWeight(searcher, needsScores, boost); final Weight in = this.in.createWeight(searcher, needsScores, boost);
return new Weight(in.getQuery()) { return new FilterWeight(in) {
@Override
public void extractTerms(Set<Term> terms) {
in.extractTerms(terms);
}
@Override
public Explanation explain(LeafReaderContext context, int doc) throws IOException {
return in.explain(context, doc);
}
@Override @Override
public Scorer scorer(LeafReaderContext context) throws IOException { public Scorer scorer(LeafReaderContext context) throws IOException {
used.set(true); used.set(true);
return in.scorer(context); return in.scorer(context);
} }
}; };
} }

View File

@ -238,13 +238,8 @@ public final class TokenStreamFromTermVector extends TokenStream {
// Estimate the number of position slots we need from term stats. We use some estimation factors taken from // Estimate the number of position slots we need from term stats. We use some estimation factors taken from
// Wikipedia that reduce the likelihood of needing to expand the array. // Wikipedia that reduce the likelihood of needing to expand the array.
int sumTotalTermFreq = (int) vector.getSumTotalTermFreq(); int sumTotalTermFreq = (int) vector.getSumTotalTermFreq();
if (sumTotalTermFreq == -1) {//unfortunately term vectors seem to not have this stat assert sumTotalTermFreq != -1;
int size = (int) vector.size();
if (size == -1) {//doesn't happen with term vectors, it seems, but pick a default any way
size = 128;
}
sumTotalTermFreq = (int)(size * 2.4);
}
final int originalPositionEstimate = (int) (sumTotalTermFreq * 1.5);//less than 1 in 10 docs exceed this final int originalPositionEstimate = (int) (sumTotalTermFreq * 1.5);//less than 1 in 10 docs exceed this
// This estimate is based on maxStartOffset. Err on the side of this being larger than needed. // This estimate is based on maxStartOffset. Err on the side of this being larger than needed.

View File

@ -153,7 +153,7 @@ org.apache.hadoop.version = 2.7.4
/org.apache.httpcomponents/httpcore = 4.4.6 /org.apache.httpcomponents/httpcore = 4.4.6
/org.apache.httpcomponents/httpmime = 4.5.3 /org.apache.httpcomponents/httpmime = 4.5.3
/org.apache.ivy/ivy = 2.3.0 /org.apache.ivy/ivy = 2.4.0
org.apache.james.apache.mime4j.version = 0.7.2 org.apache.james.apache.mime4j.version = 0.7.2
/org.apache.james/apache-mime4j-core = ${org.apache.james.apache.mime4j.version} /org.apache.james/apache-mime4j-core = ${org.apache.james.apache.mime4j.version}

View File

@ -20,6 +20,7 @@ import java.io.IOException;
import java.util.Set; import java.util.Set;
import org.apache.lucene.index.DocValues; import org.apache.lucene.index.DocValues;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.LeafReaderContext; import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.index.OrdinalMap; import org.apache.lucene.index.OrdinalMap;
import org.apache.lucene.index.SortedDocValues; import org.apache.lucene.index.SortedDocValues;
@ -154,6 +155,11 @@ final class GlobalOrdinalsQuery extends Query {
} }
} }
@Override
public IndexReader.CacheHelper getCacheHelper(LeafReaderContext context) {
return getDocValuesCacheHelper(joinField, context);
}
} }
final static class OrdinalMapScorer extends BaseGlobalOrdinalScorer { final static class OrdinalMapScorer extends BaseGlobalOrdinalScorer {

View File

@ -194,6 +194,11 @@ public class ParentChildrenBlockJoinQuery extends Query {
} }
}; };
} }
@Override
public IndexReader.CacheHelper getCacheHelper(LeafReaderContext context) {
return null; // TODO delegate to BitSetProducer?
}
}; };
} }
} }

View File

@ -29,6 +29,7 @@ import org.apache.lucene.document.FloatPoint;
import org.apache.lucene.document.IntPoint; import org.apache.lucene.document.IntPoint;
import org.apache.lucene.document.LongPoint; import org.apache.lucene.document.LongPoint;
import org.apache.lucene.index.FieldInfo; import org.apache.lucene.index.FieldInfo;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.LeafReader; import org.apache.lucene.index.LeafReader;
import org.apache.lucene.index.LeafReaderContext; import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.index.PointValues; import org.apache.lucene.index.PointValues;
@ -186,6 +187,11 @@ abstract class PointInSetIncludingScoreQuery extends Query {
}; };
} }
@Override
public IndexReader.CacheHelper getCacheHelper(LeafReaderContext context) {
return context.reader().getCoreCacheHelper();
}
}; };
} }

View File

@ -21,6 +21,7 @@ import java.util.Locale;
import java.util.Objects; import java.util.Objects;
import java.util.Set; import java.util.Set;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.LeafReaderContext; import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.index.PostingsEnum; import org.apache.lucene.index.PostingsEnum;
import org.apache.lucene.index.Term; import org.apache.lucene.index.Term;
@ -140,6 +141,11 @@ class TermsIncludingScoreQuery extends Query {
} }
} }
@Override
public IndexReader.CacheHelper getCacheHelper(LeafReaderContext context) {
return context.reader().getCoreCacheHelper();
}
}; };
} }

View File

@ -562,6 +562,11 @@ public class TestJoinUtil extends LuceneTestCase {
} }
}; };
} }
@Override
public IndexReader.CacheHelper getCacheHelper(LeafReaderContext context) {
return null;
}
}; };
} }

View File

@ -1 +0,0 @@
c5ebf1c253ad4959a29f4acfe696ee48cdd9f473

View File

@ -0,0 +1 @@
5abe4c24bbe992a9ac07ca563d5bd3e8d569e9ed

View File

@ -17,6 +17,7 @@
package org.apache.lucene.queries; package org.apache.lucene.queries;
import java.io.IOException; import java.io.IOException;
import java.util.Arrays;
import java.util.Objects; import java.util.Objects;
import java.util.Set; import java.util.Set;
@ -121,6 +122,11 @@ public class BoostingQuery extends Query {
} }
}; };
} }
@Override
public IndexReader.CacheHelper getCacheHelper(LeafReaderContext context) {
return getCacheHelper(context, Arrays.asList(matchWeight, contextWeight));
}
}; };
} }

View File

@ -17,9 +17,11 @@
package org.apache.lucene.queries; package org.apache.lucene.queries;
import java.io.IOException; import java.io.IOException;
import java.util.ArrayList;
import java.util.Arrays; import java.util.Arrays;
import java.util.Collection; import java.util.Collection;
import java.util.Collections; import java.util.Collections;
import java.util.List;
import java.util.Set; import java.util.Set;
import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.IndexReader;
@ -207,6 +209,14 @@ public class CustomScoreQuery extends Query implements Cloneable {
return new CustomScorer(CustomScoreQuery.this.getCustomScoreProvider(context), this, queryWeight, subQueryScorer, valSrcScorers); return new CustomScorer(CustomScoreQuery.this.getCustomScoreProvider(context), this, queryWeight, subQueryScorer, valSrcScorers);
} }
@Override
public IndexReader.CacheHelper getCacheHelper(LeafReaderContext context) {
List<Weight> weights = new ArrayList<>();
weights.add(subQueryWeight);
weights.addAll(Arrays.asList(valSrcWeights));
return getCacheHelper(context, weights);
}
@Override @Override
public Explanation explain(LeafReaderContext context, int doc) throws IOException { public Explanation explain(LeafReaderContext context, int doc) throws IOException {
Explanation explain = doExplain(context, doc); Explanation explain = doExplain(context, doc);

View File

@ -88,6 +88,11 @@ public final class BoostedQuery extends Query {
return new BoostedQuery.CustomScorer(context, this, subQueryScorer, boostVal); return new BoostedQuery.CustomScorer(context, this, subQueryScorer, boostVal);
} }
@Override
public IndexReader.CacheHelper getCacheHelper(LeafReaderContext context) {
return null;
}
@Override @Override
public Explanation explain(LeafReaderContext readerContext, int doc) throws IOException { public Explanation explain(LeafReaderContext readerContext, int doc) throws IOException {
Explanation subQueryExpl = qWeight.explain(readerContext,doc); Explanation subQueryExpl = qWeight.explain(readerContext,doc);

View File

@ -21,6 +21,7 @@ import java.io.IOException;
import java.util.Objects; import java.util.Objects;
import java.util.function.DoublePredicate; import java.util.function.DoublePredicate;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.LeafReaderContext; import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.search.ConstantScoreScorer; import org.apache.lucene.search.ConstantScoreScorer;
import org.apache.lucene.search.ConstantScoreWeight; import org.apache.lucene.search.ConstantScoreWeight;
@ -80,6 +81,11 @@ public final class FunctionMatchQuery extends Query {
}; };
return new ConstantScoreScorer(this, score(), twoPhase); return new ConstantScoreScorer(this, score(), twoPhase);
} }
@Override
public IndexReader.CacheHelper getCacheHelper(LeafReaderContext context) {
return null; // TODO delegate to DoubleValuesSource?
}
}; };
} }

View File

@ -74,6 +74,11 @@ public class FunctionQuery extends Query {
return new AllScorer(context, this, boost); return new AllScorer(context, this, boost);
} }
@Override
public IndexReader.CacheHelper getCacheHelper(LeafReaderContext context) {
return null;
}
@Override @Override
public Explanation explain(LeafReaderContext context, int doc) throws IOException { public Explanation explain(LeafReaderContext context, int doc) throws IOException {
return ((AllScorer)scorer(context)).explain(doc); return ((AllScorer)scorer(context)).explain(doc);

View File

@ -21,6 +21,7 @@ import java.util.Map;
import java.util.Objects; import java.util.Objects;
import java.util.Set; import java.util.Set;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.LeafReaderContext; import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.index.Term; import org.apache.lucene.index.Term;
import org.apache.lucene.search.Explanation; import org.apache.lucene.search.Explanation;
@ -153,5 +154,10 @@ public class FunctionRangeQuery extends Query {
// getRangeScorer takes String args and parses them. Weird. // getRangeScorer takes String args and parses them. Weird.
return functionValues.getRangeScorer(context, lowerVal, upperVal, includeLower, includeUpper); return functionValues.getRangeScorer(context, lowerVal, upperVal, includeLower, includeUpper);
} }
@Override
public IndexReader.CacheHelper getCacheHelper(LeafReaderContext context) {
return null;
}
} }
} }

View File

@ -137,5 +137,10 @@ public final class FunctionScoreQuery extends Query {
} }
}; };
} }
@Override
public IndexReader.CacheHelper getCacheHelper(LeafReaderContext context) {
return null; // TODO delegate to DoubleValuesSource
}
} }
} }

View File

@ -29,8 +29,6 @@ import java.util.Map;
/** /**
* <code>SumTotalTermFreqValueSource</code> returns the number of tokens. * <code>SumTotalTermFreqValueSource</code> returns the number of tokens.
* (sum of term freqs across all documents, across all terms). * (sum of term freqs across all documents, across all terms).
* Returns -1 if frequencies were omitted for the field, or if
* the codec doesn't support this statistic.
* @lucene.internal * @lucene.internal
*/ */
public class SumTotalTermFreqValueSource extends ValueSource { public class SumTotalTermFreqValueSource extends ValueSource {
@ -61,12 +59,8 @@ public class SumTotalTermFreqValueSource extends ValueSource {
Terms terms = readerContext.reader().terms(indexedField); Terms terms = readerContext.reader().terms(indexedField);
if (terms == null) continue; if (terms == null) continue;
long v = terms.getSumTotalTermFreq(); long v = terms.getSumTotalTermFreq();
if (v == -1) { assert v != -1;
sumTotalTermFreq = -1; sumTotalTermFreq += v;
break;
} else {
sumTotalTermFreq += v;
}
} }
final long ttf = sumTotalTermFreq; final long ttf = sumTotalTermFreq;
context.put(this, new LongDocValues(this) { context.put(this, new LongDocValues(this) {

Some files were not shown because too many files have changed in this diff Show More