This commit is contained in:
Karl Wright 2017-11-08 07:29:28 -05:00
commit dae5c570b9
238 changed files with 5402 additions and 895 deletions

View File

@ -186,20 +186,26 @@
}
def checkLicenseHeaderPrecedes = { f, description, contentPattern, commentPattern, text, ratDocument ->
def contentMatcher = contentPattern.matcher(text);
if (contentMatcher.find()) {
def contentStartPos = contentMatcher.start();
def commentMatcher = commentPattern.matcher(text);
while (commentMatcher.find()) {
if (isLicense(commentMatcher, ratDocument)) {
if (commentMatcher.start() < contentStartPos) {
break; // This file is all good, so break loop: license header precedes 'description' definition
} else {
reportViolation(f, description+' declaration precedes license header');
}
def contentMatcher = contentPattern.matcher(text);
if (contentMatcher.find()) {
def contentStartPos = contentMatcher.start();
def commentMatcher = commentPattern.matcher(text);
while (commentMatcher.find()) {
if (isLicense(commentMatcher, ratDocument)) {
if (commentMatcher.start() < contentStartPos) {
break; // This file is all good, so break loop: license header precedes 'description' definition
} else {
reportViolation(f, description+' declaration precedes license header');
}
}
}
}
}
def checkMockitoAssume = { f, text ->
if (text.contains("mockito") && !text.contains("assumeWorkingMockito()")) {
reportViolation(f, 'File uses Mockito but has no assumeWorkingMockito() call');
}
}
def checkForUnescapedSymbolSubstitutions = { f, text ->
@ -265,18 +271,21 @@
ratDocument.getMetaData().value(MetaData.RAT_URL_LICENSE_FAMILY_NAME)));
}
}
if (f.toString().endsWith('.java')) {
if (f.name.endsWith('.java')) {
if (text.contains('org.slf4j.LoggerFactory')) {
if (!validLoggerPattern.matcher(text).find()) {
reportViolation(f, 'invalid logging pattern [not private static final, uses static class name]');
}
}
checkLicenseHeaderPrecedes(f, 'package', packagePattern, javaCommentPattern, text, ratDocument);
if (f.name.contains("Test")) {
checkMockitoAssume(f, text);
}
}
if (f.toString().endsWith('.xml') || f.toString().endsWith('.xml.template')) {
if (f.name.endsWith('.xml') || f.name.endsWith('.xml.template')) {
checkLicenseHeaderPrecedes(f, '<tag>', xmlTagPattern, xmlCommentPattern, text, ratDocument);
}
if (f.toString().endsWith('.adoc')) {
if (f.name.endsWith('.adoc')) {
checkForUnescapedSymbolSubstitutions(f, text);
}
};

View File

@ -12,6 +12,16 @@
</content>
<orderEntry type="inheritedJdk" />
<orderEntry type="sourceFolder" forTests="false" />
<orderEntry type="module-library" exported="">
<library>
<CLASSES>
<root url="file://$MODULE_DIR$/lib" />
</CLASSES>
<JAVADOC />
<SOURCES />
<jarDirectory url="file://$MODULE_DIR$/lib" recursive="false" />
</library>
</orderEntry>
<orderEntry type="library" scope="TEST" name="JUnit" level="project" />
<orderEntry type="module" scope="TEST" module-name="lucene-test-framework" />
<orderEntry type="module" module-name="analysis-common" />

View File

@ -5,6 +5,14 @@ http://s.apache.org/luceneversions
======================= Lucene 8.0.0 =======================
API Changes
* LUCENE-8007: Index statistics Terms.getSumDocFreq(), Terms.getDocCount() are
now required to be stored by codecs. Additionally, TermsEnum.totalTermFreq()
and Terms.getSumTotalTermFreq() are now required: if frequencies are not
stored they are equal to TermsEnum.docFreq() and Terms.getSumDocFreq(),
respectively, because all freq() values equal 1. (Adrien Grand, Robert Muir)
Changes in Runtime Behavior
* LUCENE-7837: Indices that were created before the previous major version
@ -25,6 +33,11 @@ Improvements
======================= Lucene 7.2.0 =======================
API Changes
* LUCENE-8017: Weight now exposes a getCacheHelper() method to help query caches
determine whether or not a query can be cached. (Alan Woodward)
Bug Fixes
* LUCENE-7991: KNearestNeighborDocumentClassifier.knnSearch no longer applies
@ -49,6 +62,16 @@ Optimizations
* LUCENE-7994: Use int/int scatter map to gather facet counts when the
number of hits is small relative to the number of unique facet labels
(Dawid Weiss, Robert Muir, Mike McCandless)
Tests
* LUCENE-8035: Run tests with JDK-specific options: --illegal-access=deny
on Java 9+. (Uwe Schindler)
Build
* LUCENE-6144: Upgrade Ivy to 2.4.0; 'ant ivy-bootstrap' now removes old Ivy
jars in ~/.ant/lib/. (Shawn Heisey, Steve Rowe)
======================= Lucene 7.1.0 =======================

View File

@ -139,8 +139,9 @@ public class BlockTermsReader extends FieldsProducer {
assert numTerms >= 0;
final long termsStartPointer = in.readVLong();
final FieldInfo fieldInfo = state.fieldInfos.fieldInfo(field);
final long sumTotalTermFreq = fieldInfo.getIndexOptions() == IndexOptions.DOCS ? -1 : in.readVLong();
final long sumDocFreq = in.readVLong();
final long sumTotalTermFreq = in.readVLong();
// when frequencies are omitted, sumDocFreq=totalTermFreq and we only write one value
final long sumDocFreq = fieldInfo.getIndexOptions() == IndexOptions.DOCS ? sumTotalTermFreq : in.readVLong();
final int docCount = in.readVInt();
final int longsSize = in.readVInt();
if (docCount < 0 || docCount > state.segmentInfo.maxDoc()) { // #docs with field must be <= #docs
@ -149,7 +150,7 @@ public class BlockTermsReader extends FieldsProducer {
if (sumDocFreq < docCount) { // #postings must be >= #docs with field
throw new CorruptIndexException("invalid sumDocFreq: " + sumDocFreq + " docCount: " + docCount, in);
}
if (sumTotalTermFreq != -1 && sumTotalTermFreq < sumDocFreq) { // #positions must be >= #postings
if (sumTotalTermFreq < sumDocFreq) { // #positions must be >= #postings
throw new CorruptIndexException("invalid sumTotalTermFreq: " + sumTotalTermFreq + " sumDocFreq: " + sumDocFreq, in);
}
FieldReader previous = fields.put(fieldInfo.name, new FieldReader(fieldInfo, numTerms, termsStartPointer, sumTotalTermFreq, sumDocFreq, docCount, longsSize));
@ -810,7 +811,9 @@ public class BlockTermsReader extends FieldsProducer {
// docFreq, totalTermFreq
state.docFreq = freqReader.readVInt();
//System.out.println(" dF=" + state.docFreq);
if (fieldInfo.getIndexOptions() != IndexOptions.DOCS) {
if (fieldInfo.getIndexOptions() == IndexOptions.DOCS) {
state.totalTermFreq = state.docFreq; // all postings have tf=1
} else {
state.totalTermFreq = state.docFreq + freqReader.readVLong();
//System.out.println(" totTF=" + state.totalTermFreq);
}

View File

@ -126,8 +126,9 @@ public final class OrdsBlockTreeTermsReader extends FieldsProducer {
final FieldInfo fieldInfo = state.fieldInfos.fieldInfo(field);
assert fieldInfo != null: "field=" + field;
assert numTerms <= Integer.MAX_VALUE;
final long sumTotalTermFreq = fieldInfo.getIndexOptions() == IndexOptions.DOCS ? -1 : in.readVLong();
final long sumDocFreq = in.readVLong();
final long sumTotalTermFreq = in.readVLong();
// when frequencies are omitted, sumDocFreq=totalTermFreq and we only write one value
final long sumDocFreq = fieldInfo.getIndexOptions() == IndexOptions.DOCS ? sumTotalTermFreq : in.readVLong();
final int docCount = in.readVInt();
final int longsSize = in.readVInt();
// System.out.println(" longsSize=" + longsSize);
@ -140,7 +141,7 @@ public final class OrdsBlockTreeTermsReader extends FieldsProducer {
if (sumDocFreq < docCount) { // #postings must be >= #docs with field
throw new CorruptIndexException("invalid sumDocFreq: " + sumDocFreq + " docCount: " + docCount, in);
}
if (sumTotalTermFreq != -1 && sumTotalTermFreq < sumDocFreq) { // #positions must be >= #postings
if (sumTotalTermFreq < sumDocFreq) { // #positions must be >= #postings
throw new CorruptIndexException("invalid sumTotalTermFreq: " + sumTotalTermFreq + " sumDocFreq: " + sumDocFreq, in);
}
final long indexStartFP = indexIn.readVLong();

View File

@ -292,7 +292,9 @@ final class OrdsIntersectTermsEnumFrame {
// stats
termState.docFreq = statsReader.readVInt();
//if (DEBUG) System.out.println(" dF=" + state.docFreq);
if (ite.fr.fieldInfo.getIndexOptions() != IndexOptions.DOCS) {
if (ite.fr.fieldInfo.getIndexOptions() == IndexOptions.DOCS) {
termState.totalTermFreq = termState.docFreq; // all tf values are 1
} else {
termState.totalTermFreq = termState.docFreq + statsReader.readVLong();
//if (DEBUG) System.out.println(" totTF=" + state.totalTermFreq);
}

View File

@ -499,7 +499,9 @@ final class OrdsSegmentTermsEnumFrame {
// stats
state.docFreq = statsReader.readVInt();
//if (DEBUG) System.out.println(" dF=" + state.docFreq);
if (ste.fr.fieldInfo.getIndexOptions() != IndexOptions.DOCS) {
if (ste.fr.fieldInfo.getIndexOptions() == IndexOptions.DOCS) {
state.totalTermFreq = state.docFreq; // all tf values are 1
} else {
state.totalTermFreq = state.docFreq + statsReader.readVLong();
//if (DEBUG) System.out.println(" totTF=" + state.totalTermFreq);
}

View File

@ -19,7 +19,6 @@ package org.apache.lucene.codecs.memory;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.BitSet;
import java.util.Collection;
import java.util.Collections;
@ -111,8 +110,9 @@ public class FSTOrdTermsReader extends FieldsProducer {
FieldInfo fieldInfo = fieldInfos.fieldInfo(blockIn.readVInt());
boolean hasFreq = fieldInfo.getIndexOptions() != IndexOptions.DOCS;
long numTerms = blockIn.readVLong();
long sumTotalTermFreq = hasFreq ? blockIn.readVLong() : -1;
long sumDocFreq = blockIn.readVLong();
long sumTotalTermFreq = blockIn.readVLong();
// if freqs are omitted, sumDocFreq=sumTotalTermFreq and we only write one value
long sumDocFreq = hasFreq ? blockIn.readVLong() : sumTotalTermFreq;
int docCount = blockIn.readVInt();
int longsSize = blockIn.readVInt();
FST<Long> index = new FST<>(indexIn, PositiveIntOutputs.getSingleton());
@ -146,7 +146,7 @@ public class FSTOrdTermsReader extends FieldsProducer {
throw new CorruptIndexException("invalid sumDocFreq: " + field.sumDocFreq + " docCount: " + field.docCount + " (blockIn=" + blockIn + ")", indexIn);
}
// #positions must be >= #postings
if (field.sumTotalTermFreq != -1 && field.sumTotalTermFreq < field.sumDocFreq) {
if (field.sumTotalTermFreq < field.sumDocFreq) {
throw new CorruptIndexException("invalid sumTotalTermFreq: " + field.sumTotalTermFreq + " sumDocFreq: " + field.sumDocFreq + " (blockIn=" + blockIn + ")", indexIn);
}
if (previous != null) {
@ -343,9 +343,6 @@ public class FSTOrdTermsReader extends FieldsProducer {
this.totalTermFreq = new long[INTERVAL];
this.statsBlockOrd = -1;
this.metaBlockOrd = -1;
if (!hasFreqs()) {
Arrays.fill(totalTermFreq, -1);
}
}
/** Decodes stats data into term state */
@ -388,6 +385,7 @@ public class FSTOrdTermsReader extends FieldsProducer {
}
} else {
docFreq[i] = code;
totalTermFreq[i] = code;
}
}
}

View File

@ -94,8 +94,9 @@ public class FSTTermsReader extends FieldsProducer {
int fieldNumber = in.readVInt();
FieldInfo fieldInfo = fieldInfos.fieldInfo(fieldNumber);
long numTerms = in.readVLong();
long sumTotalTermFreq = fieldInfo.getIndexOptions() == IndexOptions.DOCS ? -1 : in.readVLong();
long sumDocFreq = in.readVLong();
long sumTotalTermFreq = in.readVLong();
// if frequencies are omitted, sumTotalTermFreq=sumDocFreq and we only write one value
long sumDocFreq = fieldInfo.getIndexOptions() == IndexOptions.DOCS ? sumTotalTermFreq : in.readVLong();
int docCount = in.readVInt();
int longsSize = in.readVInt();
TermsReader current = new TermsReader(fieldInfo, in, numTerms, sumTotalTermFreq, sumDocFreq, docCount, longsSize);
@ -126,7 +127,7 @@ public class FSTTermsReader extends FieldsProducer {
throw new CorruptIndexException("invalid sumDocFreq: " + field.sumDocFreq + " docCount: " + field.docCount, in);
}
// #positions must be >= #postings
if (field.sumTotalTermFreq != -1 && field.sumTotalTermFreq < field.sumDocFreq) {
if (field.sumTotalTermFreq < field.sumDocFreq) {
throw new CorruptIndexException("invalid sumTotalTermFreq: " + field.sumTotalTermFreq + " sumDocFreq: " + field.sumDocFreq, in);
}
if (previous != null) {
@ -288,7 +289,7 @@ public class FSTTermsReader extends FieldsProducer {
@Override
public long totalTermFreq() throws IOException {
return state.totalTermFreq;
return state.totalTermFreq == -1 ? state.docFreq : state.totalTermFreq;
}
@Override

View File

@ -733,10 +733,10 @@ public final class MemoryPostingsFormat extends PostingsFormat {
if (!didDecode) {
buffer.reset(current.output.bytes, current.output.offset, current.output.length);
docFreq = buffer.readVInt();
if (field.getIndexOptions() != IndexOptions.DOCS) {
totalTermFreq = docFreq + buffer.readVLong();
if (field.getIndexOptions() == IndexOptions.DOCS) {
totalTermFreq = docFreq;
} else {
totalTermFreq = -1;
totalTermFreq = docFreq + buffer.readVLong();
}
postingsSpare.bytes = current.output.bytes;
postingsSpare.offset = buffer.getPosition();
@ -873,12 +873,15 @@ public final class MemoryPostingsFormat extends PostingsFormat {
field = fieldInfos.fieldInfo(fieldNumber);
if (field == null) {
throw new CorruptIndexException("invalid field number: " + fieldNumber, in);
} else if (field.getIndexOptions() != IndexOptions.DOCS) {
sumTotalTermFreq = in.readVLong();
} else {
sumTotalTermFreq = -1;
sumTotalTermFreq = in.readVLong();
}
// if frequencies are omitted, sumDocFreq = sumTotalTermFreq and we only write one value.
if (field.getIndexOptions() == IndexOptions.DOCS) {
sumDocFreq = sumTotalTermFreq;
} else {
sumDocFreq = in.readVLong();
}
sumDocFreq = in.readVLong();
docCount = in.readVInt();
fst = new FST<>(in, outputs);

View File

@ -202,7 +202,7 @@ class SimpleTextFieldsReader extends FieldsProducer {
@Override
public long totalTermFreq() {
return indexOptions == IndexOptions.DOCS ? -1 : totalTermFreq;
return indexOptions == IndexOptions.DOCS ? docFreq : totalTermFreq;
}
@Override
@ -568,12 +568,13 @@ class SimpleTextFieldsReader extends FieldsProducer {
} else if (StringHelper.startsWith(scratch.get(), DOC)) {
docFreq++;
sumDocFreq++;
totalTermFreq++;
scratchUTF16.copyUTF8Bytes(scratch.bytes(), DOC.length, scratch.length()-DOC.length);
int docID = ArrayUtil.parseInt(scratchUTF16.chars(), 0, scratchUTF16.length());
visitedDocs.set(docID);
} else if (StringHelper.startsWith(scratch.get(), FREQ)) {
scratchUTF16.copyUTF8Bytes(scratch.bytes(), FREQ.length, scratch.length()-FREQ.length);
totalTermFreq += ArrayUtil.parseInt(scratchUTF16.chars(), 0, scratchUTF16.length());
totalTermFreq += ArrayUtil.parseInt(scratchUTF16.chars(), 0, scratchUTF16.length()) - 1;
} else if (StringHelper.startsWith(scratch.get(), TERM)) {
if (lastDocsStart != -1) {
b.add(Util.toIntsRef(lastTerm.get(), scratchIntsRef), outputs.newPair(lastDocsStart,
@ -637,7 +638,7 @@ class SimpleTextFieldsReader extends FieldsProducer {
@Override
public long getSumTotalTermFreq() {
return fieldInfo.getIndexOptions() == IndexOptions.DOCS ? -1 : sumTotalTermFreq;
return sumTotalTermFreq;
}
@Override

View File

@ -288,7 +288,13 @@ public class SimpleTextTermVectorsReader extends TermVectorsReader {
@Override
public long getSumTotalTermFreq() throws IOException {
return -1;
// TODO: make it constant-time
long ttf = 0;
TermsEnum iterator = iterator();
for (BytesRef b = iterator.next(); b != null; b = iterator.next()) {
ttf += iterator.totalTermFreq();
}
return ttf;
}
@Override

View File

@ -80,13 +80,15 @@
<!-- Needed in case a module needs the original build, also for compile-tools to be called from a module -->
<property name="common.build.dir" location="${common.dir}/build"/>
<property name="ivy.bootstrap.version" value="2.3.0" /> <!-- UPGRADE NOTE: update disallowed.ivy.jars regex in ivy-availability-check -->
<property name="ivy.bootstrap.version" value="2.4.0" /> <!-- UPGRADE NOTE: update disallowed_ivy_jars_regex below -->
<property name="disallowed_ivy_jars_regex" value="ivy-2\.[0123].*\.jar"/>
<property name="ivy.default.configuration" value="*"/>
<!-- Running ant targets in parralel may require this set to false because ivy:retrieve tasks may race with resolve -->
<property name="ivy.sync" value="true"/>
<property name="ivy.resolution-cache.dir" location="${common.build.dir}/ivy-resolution-cache"/>
<property name="ivy.lock-strategy" value="artifact-lock"/>
<property name="ivy.lock-strategy" value="artifact-lock-nio"/>
<property name="local.caches" location="${common.dir}/../.caches" />
<property name="tests.cachedir" location="${local.caches}/test-stats" />
@ -413,12 +415,12 @@
<property name="ivy_bootstrap_url1" value="http://repo1.maven.org/maven2"/>
<!-- you might need to tweak this from china so it works -->
<property name="ivy_bootstrap_url2" value="http://uk.maven.org/maven2"/>
<property name="ivy_checksum_sha1" value="c5ebf1c253ad4959a29f4acfe696ee48cdd9f473"/>
<property name="ivy_checksum_sha1" value="5abe4c24bbe992a9ac07ca563d5bd3e8d569e9ed"/>
<target name="ivy-availability-check" unless="ivy.available">
<path id="disallowed.ivy.jars">
<fileset dir="${ivy_install_path}">
<filename regex="ivy-2\.[012].*\.jar"/> <!-- TODO: Update this regex to disallow Ivy versions -->
<filename regex="${disallowed_ivy_jars_regex}"/>
</fileset>
</path>
<loadresource property="disallowed.ivy.jars.list">
@ -482,19 +484,20 @@
<fail>Ivy is not available</fail>
</target>
<target name="ivy-bootstrap" description="Download and install Ivy in the users ant lib dir" depends="ivy-bootstrap1,ivy-bootstrap2,ivy-checksum"/>
<target name="ivy-bootstrap" description="Download and install Ivy in the users ant lib dir"
depends="-ivy-bootstrap1,-ivy-bootstrap2,-ivy-checksum,-ivy-remove-old-versions"/>
<!-- try to download from repo1.maven.org -->
<target name="ivy-bootstrap1">
<target name="-ivy-bootstrap1">
<ivy-download src="${ivy_bootstrap_url1}" dest="${ivy_install_path}"/>
<available file="${ivy_install_path}/ivy-${ivy.bootstrap.version}.jar" property="ivy.bootstrap1.success" />
</target>
<target name="ivy-bootstrap2" unless="ivy.bootstrap1.success">
<target name="-ivy-bootstrap2" unless="ivy.bootstrap1.success">
<ivy-download src="${ivy_bootstrap_url2}" dest="${ivy_install_path}"/>
</target>
<target name="ivy-checksum">
<target name="-ivy-checksum">
<checksum file="${ivy_install_path}/ivy-${ivy.bootstrap.version}.jar"
property="${ivy_checksum_sha1}"
algorithm="SHA"
@ -505,6 +508,14 @@
</condition>
</fail>
</target>
<target name="-ivy-remove-old-versions">
<delete verbose="true" failonerror="true">
<fileset dir="${ivy_install_path}">
<filename regex="${disallowed_ivy_jars_regex}"/>
</fileset>
</delete>
</target>
<macrodef name="ivy-download">
<attribute name="src"/>
@ -948,6 +959,12 @@
<condition property="java.security.manager" value="org.apache.lucene.util.TestSecurityManager">
<istrue value="${tests.useSecurityManager}"/>
</condition>
<!-- additional arguments for Java 9+ -->
<local name="tests.runtimespecific.args"/>
<condition property="tests.runtimespecific.args" value="" else="--illegal-access=deny">
<equals arg1="${build.java.runtime}" arg2="1.8"/>
</condition>
<!-- create a fileset pattern that matches ${tests.class}. -->
<loadresource property="tests.explicitclass" quiet="true">
@ -1029,6 +1046,7 @@
<jvmarg line="${tests.clover.args}"/>
<jvmarg line="@{additional.vm.args}"/>
<jvmarg line="${tests.asserts.args}"/>
<jvmarg line="${tests.runtimespecific.args}"/>
<!-- set the number of times tests should run -->
<sysproperty key="tests.iters" value="${tests.iters}"/>

View File

@ -180,8 +180,9 @@ public final class BlockTreeTermsReader extends FieldsProducer {
if (fieldInfo == null) {
throw new CorruptIndexException("invalid field number: " + field, termsIn);
}
final long sumTotalTermFreq = fieldInfo.getIndexOptions() == IndexOptions.DOCS ? -1 : termsIn.readVLong();
final long sumDocFreq = termsIn.readVLong();
final long sumTotalTermFreq = termsIn.readVLong();
// when frequencies are omitted, sumDocFreq=sumTotalTermFreq and only one value is written.
final long sumDocFreq = fieldInfo.getIndexOptions() == IndexOptions.DOCS ? sumTotalTermFreq : termsIn.readVLong();
final int docCount = termsIn.readVInt();
final int longsSize = termsIn.readVInt();
if (longsSize < 0) {
@ -195,7 +196,7 @@ public final class BlockTreeTermsReader extends FieldsProducer {
if (sumDocFreq < docCount) { // #postings must be >= #docs with field
throw new CorruptIndexException("invalid sumDocFreq: " + sumDocFreq + " docCount: " + docCount, termsIn);
}
if (sumTotalTermFreq != -1 && sumTotalTermFreq < sumDocFreq) { // #positions must be >= #postings
if (sumTotalTermFreq < sumDocFreq) { // #positions must be >= #postings
throw new CorruptIndexException("invalid sumTotalTermFreq: " + sumTotalTermFreq + " sumDocFreq: " + sumDocFreq, termsIn);
}
final long indexStartFP = indexIn.readVLong();

View File

@ -288,7 +288,9 @@ final class IntersectTermsEnumFrame {
// stats
termState.docFreq = statsReader.readVInt();
if (ite.fr.fieldInfo.getIndexOptions() != IndexOptions.DOCS) {
if (ite.fr.fieldInfo.getIndexOptions() == IndexOptions.DOCS) {
termState.totalTermFreq = termState.docFreq; // all postings have freq=1
} else {
termState.totalTermFreq = termState.docFreq + statsReader.readVLong();
}
// metadata

View File

@ -417,7 +417,9 @@ final class SegmentTermsEnumFrame {
// stats
state.docFreq = statsReader.readVInt();
//if (DEBUG) System.out.println(" dF=" + state.docFreq);
if (ste.fr.fieldInfo.getIndexOptions() != IndexOptions.DOCS) {
if (ste.fr.fieldInfo.getIndexOptions() == IndexOptions.DOCS) {
state.totalTermFreq = state.docFreq; // all postings have freq=1
} else {
state.totalTermFreq = state.docFreq + statsReader.readVLong();
//if (DEBUG) System.out.println(" totTF=" + state.totalTermFreq);
}

View File

@ -745,6 +745,7 @@ public final class CompressingTermVectorsReader extends TermVectorsReader implem
private static class TVTerms extends Terms {
private final int numTerms, flags;
private final long totalTermFreq;
private final int[] prefixLengths, suffixLengths, termFreqs, positionIndex, positions, startOffsets, lengths, payloadIndex;
private final BytesRef termBytes, payloadBytes;
@ -764,6 +765,11 @@ public final class CompressingTermVectorsReader extends TermVectorsReader implem
this.payloadIndex = payloadIndex;
this.payloadBytes = payloadBytes;
this.termBytes = termBytes;
long ttf = 0;
for (int tf : termFreqs) {
ttf += tf;
}
this.totalTermFreq = ttf;
}
@Override
@ -782,7 +788,7 @@ public final class CompressingTermVectorsReader extends TermVectorsReader implem
@Override
public long getSumTotalTermFreq() throws IOException {
return -1L;
return totalTermFreq;
}
@Override

View File

@ -21,6 +21,7 @@ import java.util.Arrays;
import java.util.Objects;
import org.apache.lucene.index.FieldInfo;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.LeafReader;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.index.PointValues;
@ -356,6 +357,11 @@ abstract class RangeFieldQuery extends Query {
}
return scorerSupplier.get(Long.MAX_VALUE);
}
@Override
public IndexReader.CacheHelper getCacheHelper(LeafReaderContext context) {
return context.reader().getCoreCacheHelper();
}
};
}

View File

@ -138,6 +138,11 @@ abstract class SortedNumericDocValuesRangeQuery extends Query {
}
return new ConstantScoreScorer(this, score(), iterator);
}
@Override
public IndexReader.CacheHelper getCacheHelper(LeafReaderContext context) {
return getDocValuesCacheHelper(field, context);
}
};
}

View File

@ -181,6 +181,11 @@ abstract class SortedSetDocValuesRangeQuery extends Query {
}
return new ConstantScoreScorer(this, score(), iterator);
}
@Override
public IndexReader.CacheHelper getCacheHelper(LeafReaderContext context) {
return getDocValuesCacheHelper(field, context);
}
};
}

View File

@ -123,7 +123,10 @@ public abstract class BaseCompositeReader<R extends IndexReader> extends Composi
ensureOpen();
int total = 0; // sum freqs in subreaders
for (int i = 0; i < subReaders.length; i++) {
total += subReaders[i].docFreq(term);
int sub = subReaders[i].docFreq(term);
assert sub >= 0;
assert sub <= subReaders[i].getDocCount(term.field());
total += sub;
}
return total;
}
@ -134,9 +137,8 @@ public abstract class BaseCompositeReader<R extends IndexReader> extends Composi
long total = 0; // sum freqs in subreaders
for (int i = 0; i < subReaders.length; i++) {
long sub = subReaders[i].totalTermFreq(term);
if (sub == -1) {
return -1;
}
assert sub >= 0;
assert sub <= subReaders[i].getSumTotalTermFreq(term.field());
total += sub;
}
return total;
@ -148,9 +150,8 @@ public abstract class BaseCompositeReader<R extends IndexReader> extends Composi
long total = 0; // sum doc freqs in subreaders
for (R reader : subReaders) {
long sub = reader.getSumDocFreq(field);
if (sub == -1) {
return -1; // if any of the subs doesn't support it, return -1
}
assert sub >= 0;
assert sub <= reader.getSumTotalTermFreq(field);
total += sub;
}
return total;
@ -162,9 +163,8 @@ public abstract class BaseCompositeReader<R extends IndexReader> extends Composi
int total = 0; // sum doc counts in subreaders
for (R reader : subReaders) {
int sub = reader.getDocCount(field);
if (sub == -1) {
return -1; // if any of the subs doesn't support it, return -1
}
assert sub >= 0;
assert sub <= reader.maxDoc();
total += sub;
}
return total;
@ -176,9 +176,8 @@ public abstract class BaseCompositeReader<R extends IndexReader> extends Composi
long total = 0; // sum doc total term freqs in subreaders
for (R reader : subReaders) {
long sub = reader.getSumTotalTermFreq(field);
if (sub == -1) {
return -1; // if any of the subs doesn't support it, return -1
}
assert sub >= 0;
assert sub >= reader.getSumDocFreq(field);
total += sub;
}
return total;

View File

@ -1253,6 +1253,10 @@ public final class CheckIndex implements Closeable {
continue;
}
if (terms.getDocCount() > maxDoc) {
throw new RuntimeException("docCount > maxDoc for field: " + field + ", docCount=" + terms.getDocCount() + ", maxDoc=" + maxDoc);
}
final boolean hasFreqs = terms.hasFreqs();
final boolean hasPositions = terms.hasPositions();
final boolean hasPayloads = terms.hasPayloads();
@ -1295,12 +1299,6 @@ public final class CheckIndex implements Closeable {
throw new RuntimeException("field \"" + field + "\" should have hasFreqs=" + expectedHasFreqs + " but got " + hasFreqs);
}
if (hasFreqs == false) {
if (terms.getSumTotalTermFreq() != -1) {
throw new RuntimeException("field \"" + field + "\" hasFreqs is false, but Terms.getSumTotalTermFreq()=" + terms.getSumTotalTermFreq() + " (should be -1)");
}
}
if (!isVectors) {
final boolean expectedHasPositions = fieldInfo.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) >= 0;
if (hasPositions != expectedHasPositions) {
@ -1375,8 +1373,8 @@ public final class CheckIndex implements Closeable {
postings = termsEnum.postings(postings, PostingsEnum.ALL);
if (hasFreqs == false) {
if (termsEnum.totalTermFreq() != -1) {
throw new RuntimeException("field \"" + field + "\" hasFreqs is false, but TermsEnum.totalTermFreq()=" + termsEnum.totalTermFreq() + " (should be -1)");
if (termsEnum.totalTermFreq() != termsEnum.docFreq()) {
throw new RuntimeException("field \"" + field + "\" hasFreqs is false, but TermsEnum.totalTermFreq()=" + termsEnum.totalTermFreq() + " (should be " + termsEnum.docFreq() + ")");
}
}
@ -1406,14 +1404,11 @@ public final class CheckIndex implements Closeable {
break;
}
visitedDocs.set(doc);
int freq = -1;
if (hasFreqs) {
freq = postings.freq();
if (freq <= 0) {
throw new RuntimeException("term " + term + ": doc " + doc + ": freq " + freq + " is out of bounds");
}
totalTermFreq += freq;
} else {
int freq = postings.freq();
if (freq <= 0) {
throw new RuntimeException("term " + term + ": doc " + doc + ": freq " + freq + " is out of bounds");
}
if (hasFreqs == false) {
// When a field didn't index freq, it must
// consistently "lie" and pretend that freq was
// 1:
@ -1421,6 +1416,8 @@ public final class CheckIndex implements Closeable {
throw new RuntimeException("term " + term + ": doc " + doc + ": freq " + freq + " != 1 when Terms.hasFreqs() is false");
}
}
totalTermFreq += freq;
if (liveDocs == null || liveDocs.get(doc)) {
hasNonDeletedDocs = true;
status.totFreq++;
@ -1490,19 +1487,25 @@ public final class CheckIndex implements Closeable {
}
final long totalTermFreq2 = termsEnum.totalTermFreq();
final boolean hasTotalTermFreq = hasFreqs && totalTermFreq2 != -1;
if (docCount != docFreq) {
throw new RuntimeException("term " + term + " docFreq=" + docFreq + " != tot docs w/o deletions " + docCount);
}
if (hasTotalTermFreq) {
if (totalTermFreq2 <= 0) {
throw new RuntimeException("totalTermFreq: " + totalTermFreq2 + " is out of bounds");
}
sumTotalTermFreq += totalTermFreq;
if (totalTermFreq != totalTermFreq2) {
throw new RuntimeException("term " + term + " totalTermFreq=" + totalTermFreq2 + " != recomputed totalTermFreq=" + totalTermFreq);
}
if (docFreq > terms.getDocCount()) {
throw new RuntimeException("term " + term + " docFreq=" + docFreq + " > docCount=" + terms.getDocCount());
}
if (totalTermFreq2 <= 0) {
throw new RuntimeException("totalTermFreq: " + totalTermFreq2 + " is out of bounds");
}
sumTotalTermFreq += totalTermFreq;
if (totalTermFreq != totalTermFreq2) {
throw new RuntimeException("term " + term + " totalTermFreq=" + totalTermFreq2 + " != recomputed totalTermFreq=" + totalTermFreq);
}
if (totalTermFreq2 < docFreq) {
throw new RuntimeException("totalTermFreq: " + totalTermFreq2 + " is out of bounds, docFreq=" + docFreq);
}
if (hasFreqs == false && totalTermFreq != docFreq) {
throw new RuntimeException("term " + term + " totalTermFreq=" + totalTermFreq + " != docFreq=" + docFreq);
}
// Test skipping
@ -1626,22 +1629,22 @@ public final class CheckIndex implements Closeable {
}
status.blockTreeStats.put(field, stats);
if (sumTotalTermFreq != 0) {
final long v = fields.terms(field).getSumTotalTermFreq();
if (v != -1 && sumTotalTermFreq != v) {
throw new RuntimeException("sumTotalTermFreq for field " + field + "=" + v + " != recomputed sumTotalTermFreq=" + sumTotalTermFreq);
}
final long actualSumDocFreq = fields.terms(field).getSumDocFreq();
if (sumDocFreq != actualSumDocFreq) {
throw new RuntimeException("sumDocFreq for field " + field + "=" + actualSumDocFreq + " != recomputed sumDocFreq=" + sumDocFreq);
}
final long actualSumTotalTermFreq = fields.terms(field).getSumTotalTermFreq();
if (sumTotalTermFreq != actualSumTotalTermFreq) {
throw new RuntimeException("sumTotalTermFreq for field " + field + "=" + actualSumTotalTermFreq + " != recomputed sumTotalTermFreq=" + sumTotalTermFreq);
}
if (sumDocFreq != 0) {
final long v = fields.terms(field).getSumDocFreq();
if (v != -1 && sumDocFreq != v) {
throw new RuntimeException("sumDocFreq for field " + field + "=" + v + " != recomputed sumDocFreq=" + sumDocFreq);
}
if (hasFreqs == false && sumTotalTermFreq != sumDocFreq) {
throw new RuntimeException("sumTotalTermFreq for field " + field + " should be " + sumDocFreq + ", got sumTotalTermFreq=" + sumTotalTermFreq);
}
final int v = fieldTerms.getDocCount();
if (v != -1 && visitedDocs.cardinality() != v) {
if (visitedDocs.cardinality() != v) {
throw new RuntimeException("docCount for field " + field + "=" + v + " != recomputed docCount=" + visitedDocs.cardinality());
}

View File

@ -450,25 +450,25 @@ public abstract class IndexReader implements Closeable {
/**
* Returns the total number of occurrences of {@code term} across all
* documents (the sum of the freq() for each doc that has this term). This
* will be -1 if the codec doesn't support this measure. Note that, like other
* term measures, this measure does not take deleted documents into account.
* documents (the sum of the freq() for each doc that has this term).
* Note that, like other term measures, this measure does not take
* deleted documents into account.
*/
public abstract long totalTermFreq(Term term) throws IOException;
/**
* Returns the sum of {@link TermsEnum#docFreq()} for all terms in this field,
* or -1 if this measure isn't stored by the codec. Note that, just like other
* term measures, this measure does not take deleted documents into account.
* Returns the sum of {@link TermsEnum#docFreq()} for all terms in this field.
* Note that, just like other term measures, this measure does not take deleted
* documents into account.
*
* @see Terms#getSumDocFreq()
*/
public abstract long getSumDocFreq(String field) throws IOException;
/**
* Returns the number of documents that have at least one term for this field,
* or -1 if this measure isn't stored by the codec. Note that, just like other
* term measures, this measure does not take deleted documents into account.
* Returns the number of documents that have at least one term for this field.
* Note that, just like other term measures, this measure does not take deleted
* documents into account.
*
* @see Terms#getDocCount()
*/
@ -476,9 +476,8 @@ public abstract class IndexReader implements Closeable {
/**
* Returns the sum of {@link TermsEnum#totalTermFreq} for all terms in this
* field, or -1 if this measure isn't stored by the codec (or if this fields
* omits term freq and positions). Note that, just like other term measures,
* this measure does not take deleted documents into account.
* field. Note that, just like other term measures, this measure does not take
* deleted documents into account.
*
* @see Terms#getSumTotalTermFreq()
*/

View File

@ -149,9 +149,7 @@ public final class MultiTerms extends Terms {
long sum = 0;
for(Terms terms : subs) {
final long v = terms.getSumTotalTermFreq();
if (v == -1) {
return -1;
}
assert v != -1;
sum += v;
}
return sum;
@ -162,9 +160,7 @@ public final class MultiTerms extends Terms {
long sum = 0;
for(Terms terms : subs) {
final long v = terms.getSumDocFreq();
if (v == -1) {
return -1;
}
assert v != -1;
sum += v;
}
return sum;
@ -175,9 +171,7 @@ public final class MultiTerms extends Terms {
int sum = 0;
for(Terms terms : subs) {
final int v = terms.getDocCount();
if (v == -1) {
return -1;
}
assert v != -1;
sum += v;
}
return sum;

View File

@ -326,9 +326,7 @@ public final class MultiTermsEnum extends TermsEnum {
long sum = 0;
for(int i=0;i<numTop;i++) {
final long v = top[i].terms.totalTermFreq();
if (v == -1) {
return v;
}
assert v != -1;
sum += v;
}
return sum;

View File

@ -101,7 +101,7 @@ class SortedDocValuesTermsEnum extends TermsEnum {
@Override
public long totalTermFreq() {
return -1;
throw new UnsupportedOperationException();
}
@Override

View File

@ -101,7 +101,7 @@ class SortedSetDocValuesTermsEnum extends TermsEnum {
@Override
public long totalTermFreq() {
return -1;
throw new UnsupportedOperationException();
}
@Override

View File

@ -141,11 +141,11 @@ public final class TermContext {
/** Expert: Accumulate term statistics. */
public void accumulateStatistics(final int docFreq, final long totalTermFreq) {
assert docFreq >= 0;
assert totalTermFreq >= 0;
assert docFreq <= totalTermFreq;
this.docFreq += docFreq;
if (this.totalTermFreq >= 0 && totalTermFreq >= 0)
this.totalTermFreq += totalTermFreq;
else
this.totalTermFreq = -1;
this.totalTermFreq += totalTermFreq;
}
/**

View File

@ -99,25 +99,21 @@ public abstract class Terms {
* other term measures, this measure does not take deleted
* documents into account. */
public abstract long size() throws IOException;
/** Returns the sum of {@link TermsEnum#totalTermFreq} for
* all terms in this field, or -1 if this measure isn't
* stored by the codec (or if this fields omits term freq
* and positions). Note that, just like other term
* all terms in this field. Note that, just like other term
* measures, this measure does not take deleted documents
* into account. */
public abstract long getSumTotalTermFreq() throws IOException;
/** Returns the sum of {@link TermsEnum#docFreq()} for
* all terms in this field, or -1 if this measure isn't
* stored by the codec. Note that, just like other term
* all terms in this field. Note that, just like other term
* measures, this measure does not take deleted documents
* into account. */
public abstract long getSumDocFreq() throws IOException;
/** Returns the number of documents that have at least one
* term for this field, or -1 if this measure isn't
* stored by the codec. Note that, just like other term
* term for this field. Note that, just like other term
* measures, this measure does not take deleted documents
* into account. */
public abstract int getDocCount() throws IOException;

View File

@ -131,8 +131,7 @@ public abstract class TermsEnum implements BytesRefIterator {
/** Returns the total number of occurrences of this term
* across all documents (the sum of the freq() for each
* doc that has this term). This will be -1 if the
* codec doesn't support this measure. Note that, like
* doc that has this term). Note that, like
* other term measures, this measure does not take
* deleted documents into account. */
public abstract long totalTermFreq() throws IOException;

View File

@ -148,12 +148,8 @@
* deleted documents, when segments are merged the statistic is updated as
* those deleted documents are merged away.
* <li>{@link org.apache.lucene.index.TermsEnum#totalTermFreq}: Returns the number
* of occurrences of this term across all documents. Note that this statistic
* is unavailable (returns <code>-1</code>) if term frequencies were omitted
* from the index
* ({@link org.apache.lucene.index.IndexOptions#DOCS DOCS})
* for the field. Like docFreq(), it will also count occurrences that appear in
* deleted documents.
* of occurrences of this term across all documents. Like docFreq(), it will
* also count occurrences that appear in deleted documents.
* </ul>
* <a name="fieldstats"></a>
* <h3>
@ -180,10 +176,7 @@
* of tokens for the field. This can be thought of as the sum of
* {@link org.apache.lucene.index.TermsEnum#totalTermFreq} across all terms in the
* field, and like totalTermFreq() it will also count occurrences that appear in
* deleted documents, and will be unavailable (returns <code>-1</code>) if term
* frequencies were omitted from the index
* ({@link org.apache.lucene.index.IndexOptions#DOCS DOCS})
* for the field.
* deleted documents.
* </ul>
* <a name="segmentstats"></a>
* <h3>

View File

@ -277,11 +277,7 @@ public final class BlendedTermQuery extends Query {
long ttf = 0;
for (TermContext ctx : contexts) {
df = Math.max(df, ctx.docFreq());
if (ctx.totalTermFreq() == -1L) {
ttf = -1L;
} else if (ttf != -1L) {
ttf += ctx.totalTermFreq();
}
ttf += ctx.totalTermFreq();
}
for (int i = 0; i < contexts.length; ++i) {

View File

@ -26,6 +26,7 @@ import java.util.List;
import java.util.Map;
import java.util.Set;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.index.Term;
import org.apache.lucene.search.BooleanClause.Occur;
@ -299,6 +300,11 @@ final class BooleanWeight extends Weight {
return scorerSupplier.get(Long.MAX_VALUE);
}
@Override
public IndexReader.CacheHelper getCacheHelper(LeafReaderContext context) {
return getCacheHelper(context, weights);
}
@Override
public ScorerSupplier scorerSupplier(LeafReaderContext context) throws IOException {
int minShouldMatch = query.getMinimumNumberShouldMatch();

View File

@ -23,7 +23,27 @@ import org.apache.lucene.index.Terms; // javadocs
/**
* Contains statistics for a collection (field)
* Contains statistics for a collection (field).
* <p>
* This class holds statistics across all documents for scoring purposes:
* <ul>
* <li> {@link #maxDoc()}: number of documents.
* <li> {@link #docCount()}: number of documents that contain this field.
* <li> {@link #sumDocFreq()}: number of postings-list entries.
* <li> {@link #sumTotalTermFreq()}: number of tokens.
* </ul>
* <p>
* The following conditions are always true:
* <ul>
* <li> All statistics are positive integers: never zero or negative.
* <li> {@code docCount} &lt;= {@code maxDoc}
* <li> {@code docCount} &lt;= {@code sumDocFreq} &lt;= {@code sumTotalTermFreq}
* </ul>
* <p>
* Values may include statistics on deleted documents that have not yet been merged away.
* <p>
* Be careful when performing calculations on these values because they are represented
* as 64-bit integer values, you may need to cast to {@code double} for your use.
* @lucene.experimental
*/
public class CollectionStatistics {
@ -51,33 +71,23 @@ public class CollectionStatistics {
if (maxDoc <= 0) {
throw new IllegalArgumentException("maxDoc must be positive, maxDoc: " + maxDoc);
}
if (docCount != -1) {
if (docCount <= 0) {
throw new IllegalArgumentException("docCount must be positive, docCount: " + docCount);
}
if (docCount > maxDoc) {
throw new IllegalArgumentException("docCount must not exceed maxDoc, docCount: " + docCount + ", maxDoc: " + maxDoc);
}
if (docCount <= 0) {
throw new IllegalArgumentException("docCount must be positive, docCount: " + docCount);
}
if (sumDocFreq != -1) {
if (sumDocFreq <= 0) {
throw new IllegalArgumentException("sumDocFreq must be positive, sumDocFreq: " + sumDocFreq);
}
if (docCount != -1) {
if (sumDocFreq < docCount) {
throw new IllegalArgumentException("sumDocFreq must be at least docCount, sumDocFreq: " + sumDocFreq + ", docCount: " + docCount);
}
}
if (docCount > maxDoc) {
throw new IllegalArgumentException("docCount must not exceed maxDoc, docCount: " + docCount + ", maxDoc: " + maxDoc);
}
if (sumTotalTermFreq != -1) {
if (sumTotalTermFreq <= 0) {
throw new IllegalArgumentException("sumTotalTermFreq must be positive, sumTotalTermFreq: " + sumTotalTermFreq);
}
if (sumDocFreq != -1) {
if (sumTotalTermFreq < sumDocFreq) {
throw new IllegalArgumentException("sumTotalTermFreq must be at least sumDocFreq, sumTotalTermFreq: " + sumTotalTermFreq + ", sumDocFreq: " + sumDocFreq);
}
}
if (sumDocFreq <= 0) {
throw new IllegalArgumentException("sumDocFreq must be positive, sumDocFreq: " + sumDocFreq);
}
if (sumDocFreq < docCount) {
throw new IllegalArgumentException("sumDocFreq must be at least docCount, sumDocFreq: " + sumDocFreq + ", docCount: " + docCount);
}
if (sumTotalTermFreq <= 0) {
throw new IllegalArgumentException("sumTotalTermFreq must be positive, sumTotalTermFreq: " + sumTotalTermFreq);
}
if (sumTotalTermFreq < sumDocFreq) {
throw new IllegalArgumentException("sumTotalTermFreq must be at least sumDocFreq, sumTotalTermFreq: " + sumTotalTermFreq + ", sumDocFreq: " + sumDocFreq);
}
this.field = field;
this.maxDoc = maxDoc;
@ -86,33 +96,65 @@ public class CollectionStatistics {
this.sumDocFreq = sumDocFreq;
}
/** returns the field name */
/**
* The field's name.
* <p>
* This value is never {@code null}.
* @return field's name, not {@code null}
*/
public final String field() {
return field;
}
/** returns the total number of documents, regardless of
* whether they all contain values for this field.
* @see IndexReader#maxDoc() */
/**
* The total number of documents, regardless of
* whether they all contain values for this field.
* <p>
* This value is always a positive number.
* @return total number of documents, in the range [1 .. {@link Long#MAX_VALUE}]
* @see IndexReader#maxDoc()
*/
public final long maxDoc() {
return maxDoc;
}
/** returns the total number of documents that
* have at least one term for this field.
* @see Terms#getDocCount() */
/**
* The total number of documents that have at least
* one term for this field.
* <p>
* This value is always a positive number, and never
* exceeds {@link #maxDoc()}.
* @return total number of documents containing this field, in the range [1 .. {@link #maxDoc()}]
* @see Terms#getDocCount()
*/
public final long docCount() {
return docCount;
}
/** returns the total number of tokens for this field
* @see Terms#getSumTotalTermFreq() */
/**
* The total number of tokens for this field.
* This is the "word count" for this field across all documents.
* It is the sum of {@link TermStatistics#totalTermFreq()} across all terms.
* It is also the sum of each document's field length across all documents.
* <p>
* This value is always a positive number, and always at least {@link #sumDocFreq()}.
* @return total number of tokens in the field, in the range [{@link #sumDocFreq()} .. {@link Long#MAX_VALUE}]
* @see Terms#getSumTotalTermFreq()
*/
public final long sumTotalTermFreq() {
return sumTotalTermFreq;
}
/** returns the total number of postings for this field
* @see Terms#getSumDocFreq() */
/**
* The total number of posting list entries for this field.
* This is the sum of term-document pairs: the sum of {@link TermStatistics#docFreq()} across all terms.
* It is also the sum of each document's unique term count for this field across all documents.
* <p>
* This value is always a positive number, always at least {@link #docCount()}, and never
* exceeds {@link #sumTotalTermFreq()}.
* @return number of posting list entries, in the range [{@link #docCount()} .. {@link #sumTotalTermFreq()}]
* @see Terms#getSumDocFreq()
*/
public final long sumDocFreq() {
return sumDocFreq;
}

View File

@ -167,6 +167,11 @@ public final class ConstantScoreQuery extends Query {
return scorerSupplier.get(Long.MAX_VALUE);
}
@Override
public IndexReader.CacheHelper getCacheHelper(LeafReaderContext context) {
return innerWeight.getCacheHelper(context);
}
};
} else {
return innerWeight;

View File

@ -137,6 +137,11 @@ public final class DisjunctionMaxQuery extends Query implements Iterable<Query>
}
}
@Override
public IndexReader.CacheHelper getCacheHelper(LeafReaderContext context) {
return getCacheHelper(context, weights);
}
/** Explain the score we computed for doc */
@Override
public Explanation explain(LeafReaderContext context, int doc) throws IOException {

View File

@ -23,6 +23,7 @@ import java.util.Objects;
import org.apache.lucene.index.DocValuesType;
import org.apache.lucene.index.FieldInfo;
import org.apache.lucene.index.FieldInfos;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.LeafReader;
import org.apache.lucene.index.LeafReaderContext;
@ -97,6 +98,11 @@ public final class DocValuesFieldExistsQuery extends Query {
return new ConstantScoreScorer(this, score(), iterator);
}
@Override
public IndexReader.CacheHelper getCacheHelper(LeafReaderContext context) {
return getDocValuesCacheHelper(field, context);
}
};
}
}

View File

@ -86,17 +86,17 @@ public final class DocValuesRewriteMethod extends MultiTermQuery.RewriteMethod {
@Override
public long getSumTotalTermFreq() {
return -1;
throw new UnsupportedOperationException();
}
@Override
public long getSumDocFreq() {
return -1;
throw new UnsupportedOperationException();
}
@Override
public int getDocCount() {
return -1;
throw new UnsupportedOperationException();
}
@Override
@ -158,6 +158,11 @@ public final class DocValuesRewriteMethod extends MultiTermQuery.RewriteMethod {
}
});
}
@Override
public IndexReader.CacheHelper getCacheHelper(LeafReaderContext context) {
return getDocValuesCacheHelper(query.field, context);
}
};
}
}

View File

@ -19,6 +19,7 @@ package org.apache.lucene.search;
import java.io.IOException;
import java.util.Set;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.index.Term;
@ -55,6 +56,11 @@ public abstract class FilterWeight extends Weight {
this.in = weight;
}
@Override
public IndexReader.CacheHelper getCacheHelper(LeafReaderContext context) {
return in.getCacheHelper(context);
}
@Override
public void extractTerms(Set<Term> terms) {
in.extractTerms(terms);

View File

@ -169,6 +169,13 @@ public final class IndexOrDocValuesQuery extends Query {
}
return scorerSupplier.get(Long.MAX_VALUE);
}
@Override
public IndexReader.CacheHelper getCacheHelper(LeafReaderContext context) {
// Both index and dv query should return the same values, so we can use
// the index query's cachehelper here
return indexWeight.getCacheHelper(context);
}
};
}

View File

@ -722,8 +722,7 @@ public class LRUQueryCache implements QueryCache, Accountable {
policy.onUse(getQuery());
}
// TODO: should it be pluggable, eg. for queries that run on doc values?
final IndexReader.CacheHelper cacheHelper = context.reader().getCoreCacheHelper();
final IndexReader.CacheHelper cacheHelper = in.getCacheHelper(context);
if (cacheHelper == null) {
// this segment is not suitable for caching
return in.scorerSupplier(context);
@ -788,14 +787,18 @@ public class LRUQueryCache implements QueryCache, Accountable {
return scorerSupplier.get(Long.MAX_VALUE);
}
@Override
public IndexReader.CacheHelper getCacheHelper(LeafReaderContext context) {
return in.getCacheHelper(context);
}
@Override
public BulkScorer bulkScorer(LeafReaderContext context) throws IOException {
if (used.compareAndSet(false, true)) {
policy.onUse(getQuery());
}
// TODO: should it be pluggable, eg. for queries that run on doc values?
final IndexReader.CacheHelper cacheHelper = context.reader().getCoreCacheHelper();
final IndexReader.CacheHelper cacheHelper = in.getCacheHelper(context);
if (cacheHelper == null) {
// this segment is not suitable for caching
return in.bulkScorer(context);

View File

@ -19,6 +19,7 @@ package org.apache.lucene.search;
import java.io.IOException;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.util.Bits;
@ -39,6 +40,12 @@ public final class MatchAllDocsQuery extends Query {
public Scorer scorer(LeafReaderContext context) throws IOException {
return new ConstantScoreScorer(this, score(), DocIdSetIterator.all(context.reader().maxDoc()));
}
@Override
public IndexReader.CacheHelper getCacheHelper(LeafReaderContext context) {
return context.reader().getCoreCacheHelper();
}
@Override
public BulkScorer bulkScorer(LeafReaderContext context) throws IOException {
final float score = score();

View File

@ -20,6 +20,7 @@ package org.apache.lucene.search;
import java.io.IOException;
import java.util.Set;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.index.Term;
@ -58,6 +59,11 @@ public class MatchNoDocsQuery extends Query {
return null;
}
@Override
public IndexReader.CacheHelper getCacheHelper(LeafReaderContext context) {
return context.reader().getCoreCacheHelper();
}
};
}

View File

@ -291,6 +291,11 @@ public class MultiPhraseQuery extends Query {
}
}
@Override
public IndexReader.CacheHelper getCacheHelper(LeafReaderContext context) {
return context.reader().getCoreCacheHelper();
}
@Override
public Explanation explain(LeafReaderContext context, int doc) throws IOException {
Scorer scorer = scorer(context);

View File

@ -22,6 +22,7 @@ import java.util.ArrayList;
import java.util.List;
import java.util.Objects;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.index.PostingsEnum;
import org.apache.lucene.index.Term;
@ -211,6 +212,11 @@ final class MultiTermQueryConstantScoreWrapper<Q extends MultiTermQuery> extends
return scorer(weightOrBitSet.set);
}
}
@Override
public IndexReader.CacheHelper getCacheHelper(LeafReaderContext context) {
return context.reader().getCoreCacheHelper();
}
};
}
}

View File

@ -23,6 +23,7 @@ import java.util.Objects;
import org.apache.lucene.document.StringField;
import org.apache.lucene.index.FieldInfo;
import org.apache.lucene.index.FieldInfos;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.LeafReader;
import org.apache.lucene.index.LeafReaderContext;
@ -75,6 +76,11 @@ public final class NormsFieldExistsQuery extends Query {
DocIdSetIterator iterator = reader.getNormValues(field);
return new ConstantScoreScorer(this, score(), iterator);
}
@Override
public IndexReader.CacheHelper getCacheHelper(LeafReaderContext context) {
return context.reader().getCoreCacheHelper();
}
};
}
}

View File

@ -441,7 +441,12 @@ public class PhraseQuery extends Query {
needsScores, totalMatchCost);
}
}
@Override
public IndexReader.CacheHelper getCacheHelper(LeafReaderContext context) {
return context.reader().getCoreCacheHelper();
}
// only called from assert
private boolean termNotInReader(LeafReader reader, Term term) throws IOException {
return reader.docFreq(term) == 0;
@ -492,14 +497,13 @@ public class PhraseQuery extends Query {
* of processing the occurrences of a term
* in a document that contains the term.
* This is for use by {@link TwoPhaseIterator#matchCost} implementations.
* <br>This may be inaccurate when {@link TermsEnum#totalTermFreq()} is not available.
* @param termsEnum The term is the term at which this TermsEnum is positioned.
*/
static float termPositionsCost(TermsEnum termsEnum) throws IOException {
int docFreq = termsEnum.docFreq();
assert docFreq > 0;
long totalTermFreq = termsEnum.totalTermFreq(); // -1 when not available
float expOccurrencesInMatchingDoc = (totalTermFreq < docFreq) ? 1 : (totalTermFreq / (float) docFreq);
long totalTermFreq = termsEnum.totalTermFreq();
float expOccurrencesInMatchingDoc = totalTermFreq / (float) docFreq;
return TERM_POSNS_SEEK_OPS_PER_DOC + expOccurrencesInMatchingDoc * TERM_OPS_PER_POS;
}

View File

@ -23,6 +23,7 @@ import java.util.Collection;
import java.util.Iterator;
import java.util.NoSuchElementException;
import org.apache.lucene.document.IntPoint;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.LeafReader;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.index.PointValues.IntersectVisitor;
@ -150,6 +151,11 @@ public abstract class PointInSetQuery extends Query {
return new ConstantScoreScorer(this, score(), result.build().iterator());
}
@Override
public IndexReader.CacheHelper getCacheHelper(LeafReaderContext context) {
return context.reader().getCoreCacheHelper();
}
};
}

View File

@ -20,6 +20,7 @@ import java.io.IOException;
import java.util.Arrays;
import java.util.Objects;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.PointValues;
import org.apache.lucene.index.PointValues.IntersectVisitor;
import org.apache.lucene.index.PointValues.Relation;
@ -321,6 +322,11 @@ public abstract class PointRangeQuery extends Query {
}
return scorerSupplier.get(Long.MAX_VALUE);
}
@Override
public IndexReader.CacheHelper getCacheHelper(LeafReaderContext context) {
return context.reader().getCoreCacheHelper();
}
};
}

View File

@ -140,11 +140,7 @@ public final class SynonymQuery extends Query {
TermStatistics termStats = searcher.termStatistics(terms[i], termContexts[i]);
if (termStats != null) {
docFreq = Math.max(termStats.docFreq(), docFreq);
if (termStats.totalTermFreq() == -1) {
totalTermFreq = -1;
} else if (totalTermFreq != -1) {
totalTermFreq += termStats.totalTermFreq();
}
totalTermFreq += termStats.totalTermFreq();
}
}
this.similarity = searcher.getSimilarity(true);
@ -217,6 +213,11 @@ public final class SynonymQuery extends Query {
return new SynonymScorer(simScorer, this, subScorers);
}
}
@Override
public IndexReader.CacheHelper getCacheHelper(LeafReaderContext context) {
return context.reader().getCoreCacheHelper();
}
}
static class SynonymScorer extends DisjunctionScorer {

View File

@ -315,6 +315,11 @@ public class TermInSetQuery extends Query implements Accountable {
return scorer(weightOrBitSet.set);
}
}
@Override
public IndexReader.CacheHelper getCacheHelper(LeafReaderContext context) {
return context.reader().getCoreCacheHelper();
}
};
}
}

View File

@ -21,6 +21,7 @@ import java.io.IOException;
import java.util.Objects;
import java.util.Set;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexReaderContext;
import org.apache.lucene.index.LeafReader;
import org.apache.lucene.index.LeafReaderContext;
@ -65,9 +66,9 @@ public class TermQuery extends Query {
collectionStats = searcher.collectionStatistics(term.field());
termStats = searcher.termStatistics(term, termStates);
} else {
// we do not need the actual stats, use fake stats with docFreq=maxDoc=1 and ttf=-1
collectionStats = new CollectionStatistics(term.field(), 1, -1, -1, -1);
termStats = new TermStatistics(term.bytes(), 1, -1);
// we do not need the actual stats, use fake stats with docFreq=maxDoc=ttf=1
collectionStats = new CollectionStatistics(term.field(), 1, 1, 1, 1);
termStats = new TermStatistics(term.bytes(), 1, 1);
}
if (termStats == null) {
@ -99,6 +100,11 @@ public class TermQuery extends Query {
return new TermScorer(this, docs, similarity.simScorer(stats, context));
}
@Override
public IndexReader.CacheHelper getCacheHelper(LeafReaderContext context) {
return context.reader().getCoreCacheHelper();
}
/**
* Returns a {@link TermsEnum} positioned at this weights Term or null if
* the term does not exist in the given context

View File

@ -24,8 +24,29 @@ import org.apache.lucene.index.TermsEnum; // javadocs
import org.apache.lucene.util.BytesRef;
/**
* Contains statistics for a specific term
* <p>
* This class holds statistics for this term across all documents for scoring purposes:
* <ul>
* <li> {@link #docFreq}: number of documents this term occurs in.
* <li> {@link #totalTermFreq}: number of tokens for this term.
* </ul>
* <p>
* The following conditions are always true:
* <ul>
* <li> All statistics are positive integers: never zero or negative.
* <li> {@code docFreq} &lt;= {@code totalTermFreq}
* <li> {@code docFreq} &lt;= {@code sumDocFreq} of the collection
* <li> {@code totalTermFreq} &lt;= {@code sumTotalTermFreq} of the collection
* </ul>
* <p>
* Values may include statistics on deleted documents that have not yet been merged away.
* <p>
* Be careful when performing calculations on these values because they are represented
* as 64-bit integer values, you may need to cast to {@code double} for your use.
* @lucene.experimental
*/
// TODO: actually add missing cross-checks to guarantee TermStatistics is in bounds of CollectionStatistics,
// otherwise many similarity functions will implode.
public class TermStatistics {
private final BytesRef term;
private final long docFreq;
@ -45,29 +66,52 @@ public class TermStatistics {
if (docFreq <= 0) {
throw new IllegalArgumentException("docFreq must be positive, docFreq: " + docFreq);
}
if (totalTermFreq != -1) {
if (totalTermFreq < docFreq) {
throw new IllegalArgumentException("totalTermFreq must be at least docFreq, totalTermFreq: " + totalTermFreq + ", docFreq: " + docFreq);
}
if (totalTermFreq <= 0) {
throw new IllegalArgumentException("totalTermFreq must be positive, totalTermFreq: " + totalTermFreq);
}
if (totalTermFreq < docFreq) {
throw new IllegalArgumentException("totalTermFreq must be at least docFreq, totalTermFreq: " + totalTermFreq + ", docFreq: " + docFreq);
}
this.term = term;
this.docFreq = docFreq;
this.totalTermFreq = totalTermFreq;
}
/** returns the term text */
/**
* The term text.
* <p>
* This value is never {@code null}.
* @return term's text, not {@code null}
*/
public final BytesRef term() {
return term;
}
/** returns the number of documents this term occurs in
* @see TermsEnum#docFreq() */
/**
* The number of documents this term occurs in.
* <p>
* This is the document-frequency for the term: the count of documents
* where the term appears at least one time.
* <p>
* This value is always a positive number, and never
* exceeds {@link #totalTermFreq}. It also cannot exceed {@link CollectionStatistics#sumDocFreq()}.
* @return document frequency, in the range [1 .. {@link #totalTermFreq()}]
* @see TermsEnum#docFreq()
*/
public final long docFreq() {
return docFreq;
}
/** returns the total number of occurrences of this term
* @see TermsEnum#totalTermFreq() */
/**
* The total number of occurrences of this term.
* <p>
* This is the token count for the term: the number of times it appears in the field across all documents.
* <p>
* This value is always a positive number, always at least {@link #docFreq()},
* and never exceeds {@link CollectionStatistics#sumTotalTermFreq()}.
* @return number of occurrences, in the range [{@link #docFreq()} .. {@link CollectionStatistics#sumTotalTermFreq()}]
* @see TermsEnum#totalTermFreq()
*/
public final long totalTermFreq() {
return totalTermFreq;
}

View File

@ -18,8 +18,11 @@ package org.apache.lucene.search;
import java.io.IOException;
import java.util.List;
import java.util.Set;
import org.apache.lucene.index.FieldInfo;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexReaderContext;
import org.apache.lucene.index.LeafReader;
import org.apache.lucene.index.LeafReaderContext;
@ -102,6 +105,55 @@ public abstract class Weight {
*/
public abstract Scorer scorer(LeafReaderContext context) throws IOException;
/**
* Returns an {@link org.apache.lucene.index.IndexReader.CacheHelper} to cache this query against
*
* Weights that rely only on Terms or Points can return {@code context.reader().getCoreCacheHelper()}.
* Weights that use DocValues should call {@link #getDocValuesCacheHelper(String, LeafReaderContext)}
* Weights that should not be cached at all should return {@code null}
*
* @param context the {@link LeafReaderContext} to cache against
* @return an {@link org.apache.lucene.index.IndexReader.CacheHelper} indicating the cache level
*/
public abstract IndexReader.CacheHelper getCacheHelper(LeafReaderContext context);
/**
* Given a collection of Weights, return an {@link org.apache.lucene.index.IndexReader.CacheHelper} that will satisfy
* the requirements of them all.
* @param context the {@link LeafReaderContext} to cache against
* @param weights an array of {@link Weight} to be cached
* @return an {@link org.apache.lucene.index.IndexReader.CacheHelper} indicating the cache level
*/
protected static IndexReader.CacheHelper getCacheHelper(LeafReaderContext context, List<? extends Weight> weights) {
if (weights.size() == 0)
return null;
IndexReader.CacheHelper helper = weights.get(0).getCacheHelper(context);
if (helper == null)
return null;
for (int i = 1; i < weights.size(); i++) {
IndexReader.CacheHelper nextHelper = weights.get(i).getCacheHelper(context);
if (nextHelper == null || nextHelper != helper)
return null;
}
return helper;
}
/**
* Returns an {@link org.apache.lucene.index.IndexReader.CacheHelper} for a Weight using doc values
*
* This will return the core reader for
*
* @param field the docvalues field
* @param ctx the {@link LeafReaderContext} to cache against
* @return an {@link org.apache.lucene.index.IndexReader.CacheHelper} indicating the cache level
*/
public static IndexReader.CacheHelper getDocValuesCacheHelper(String field, LeafReaderContext ctx) {
FieldInfo fi = ctx.reader().getFieldInfos().fieldInfo(field);
if (fi == null || fi.getDocValuesGen() == -1)
return ctx.reader().getCoreCacheHelper();
return null;
}
/**
* Optional method.
* Get a {@link ScorerSupplier}, which allows to know the cost of the {@link Scorer}

View File

@ -85,19 +85,7 @@ public class BM25Similarity extends Similarity {
/** The default implementation computes the average as <code>sumTotalTermFreq / docCount</code> */
protected float avgFieldLength(CollectionStatistics collectionStats) {
final long sumTotalTermFreq;
if (collectionStats.sumTotalTermFreq() == -1) {
// frequencies are omitted (tf=1), its # of postings
if (collectionStats.sumDocFreq() == -1) {
// theoretical case only: remove!
return 1f;
}
sumTotalTermFreq = collectionStats.sumDocFreq();
} else {
sumTotalTermFreq = collectionStats.sumTotalTermFreq();
}
final long docCount = collectionStats.docCount() == -1 ? collectionStats.maxDoc() : collectionStats.docCount();
return (float) (sumTotalTermFreq / (double) docCount);
return (float) (collectionStats.sumTotalTermFreq() / (double) collectionStats.docCount());
}
/**
@ -161,7 +149,7 @@ public class BM25Similarity extends Similarity {
*/
public Explanation idfExplain(CollectionStatistics collectionStats, TermStatistics termStats) {
final long df = termStats.docFreq();
final long docCount = collectionStats.docCount() == -1 ? collectionStats.maxDoc() : collectionStats.docCount();
final long docCount = collectionStats.docCount();
final float idf = idf(df, docCount);
return Explanation.match(idf, "idf, computed as log(1 + (N - n + 0.5) / (n + 0.5)) from:",
Explanation.match(df, "n, number of documents containing term"),

View File

@ -62,7 +62,7 @@ public class ClassicSimilarity extends TFIDFSimilarity {
@Override
public Explanation idfExplain(CollectionStatistics collectionStats, TermStatistics termStats) {
final long df = termStats.docFreq();
final long docCount = collectionStats.docCount() == -1 ? collectionStats.maxDoc() : collectionStats.docCount();
final long docCount = collectionStats.docCount();
final float idf = idf(df, docCount);
return Explanation.match(idf, "idf, computed as log((docCount+1)/(docFreq+1)) + 1 from:",
Explanation.match(df, "docFreq, number of documents containing term"),

View File

@ -100,42 +100,16 @@ public abstract class SimilarityBase extends Similarity {
/** Fills all member fields defined in {@code BasicStats} in {@code stats}.
* Subclasses can override this method to fill additional stats. */
protected void fillBasicStats(BasicStats stats, CollectionStatistics collectionStats, TermStatistics termStats) {
// #positions(field) must be >= #positions(term)
assert collectionStats.sumTotalTermFreq() == -1 || collectionStats.sumTotalTermFreq() >= termStats.totalTermFreq();
long numberOfDocuments = collectionStats.docCount() == -1 ? collectionStats.maxDoc() : collectionStats.docCount();
long docFreq = termStats.docFreq();
long totalTermFreq = termStats.totalTermFreq();
// frequencies are omitted, all postings have tf=1, so totalTermFreq = docFreq
if (totalTermFreq == -1) {
totalTermFreq = docFreq;
}
final long numberOfFieldTokens;
final double avgFieldLength;
if (collectionStats.sumTotalTermFreq() == -1) {
// frequencies are omitted, so sumTotalTermFreq = # postings
if (collectionStats.sumDocFreq() == -1) {
// theoretical case only: remove!
numberOfFieldTokens = docFreq;
avgFieldLength = 1f;
} else {
numberOfFieldTokens = collectionStats.sumDocFreq();
avgFieldLength = (float) (collectionStats.sumDocFreq() / (double)numberOfDocuments);
}
} else {
numberOfFieldTokens = collectionStats.sumTotalTermFreq();
avgFieldLength = (float) (collectionStats.sumTotalTermFreq() / (double)numberOfDocuments);
}
// TODO: validate this for real, somewhere else
assert termStats.totalTermFreq() <= collectionStats.sumTotalTermFreq();
assert termStats.docFreq() <= collectionStats.sumDocFreq();
// TODO: add sumDocFreq for field (numberOfFieldPostings)
stats.setNumberOfDocuments(numberOfDocuments);
stats.setNumberOfFieldTokens(numberOfFieldTokens);
stats.setAvgFieldLength(avgFieldLength);
stats.setDocFreq(docFreq);
stats.setTotalTermFreq(totalTermFreq);
stats.setNumberOfDocuments(collectionStats.docCount());
stats.setNumberOfFieldTokens(collectionStats.sumTotalTermFreq());
stats.setAvgFieldLength(collectionStats.sumTotalTermFreq() / (double) collectionStats.docCount());
stats.setDocFreq(termStats.docFreq());
stats.setTotalTermFreq(termStats.totalTermFreq());
}
/**

View File

@ -448,7 +448,7 @@ public abstract class TFIDFSimilarity extends Similarity {
*/
public Explanation idfExplain(CollectionStatistics collectionStats, TermStatistics termStats) {
final long df = termStats.docFreq();
final long docCount = collectionStats.docCount() == -1 ? collectionStats.maxDoc() : collectionStats.docCount();
final long docCount = collectionStats.docCount();
final float idf = idf(df, docCount);
return Explanation.match(idf, "idf(docFreq, docCount)",
Explanation.match(df, "docFreq, number of documents containing term"),

View File

@ -32,13 +32,13 @@
* <a name="sims"></a>
* <h2>Summary of the Ranking Methods</h2>
*
* <p>{@link org.apache.lucene.search.similarities.ClassicSimilarity} is the original Lucene
* scoring function. It is based on a highly optimized
* <a href="http://en.wikipedia.org/wiki/Vector_Space_Model">Vector Space Model</a>. For more
* information, see {@link org.apache.lucene.search.similarities.TFIDFSimilarity}.
*
* <p>{@link org.apache.lucene.search.similarities.BM25Similarity} is an optimized
* implementation of the successful Okapi BM25 model.
*
* <p>{@link org.apache.lucene.search.similarities.ClassicSimilarity} is the original Lucene
* scoring function. It is based on the
* <a href="http://en.wikipedia.org/wiki/Vector_Space_Model">Vector Space Model</a>. For more
* information, see {@link org.apache.lucene.search.similarities.TFIDFSimilarity}.
*
* <p>{@link org.apache.lucene.search.similarities.SimilarityBase} provides a basic
* implementation of the Similarity contract and exposes a highly simplified

View File

@ -19,8 +19,10 @@ package org.apache.lucene.search.spans;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Map;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.index.Term;
import org.apache.lucene.index.TermContext;
@ -115,5 +117,10 @@ public final class SpanContainingQuery extends SpanContainQuery {
}
};
}
@Override
public IndexReader.CacheHelper getCacheHelper(LeafReaderContext context) {
return getCacheHelper(context, Arrays.asList(bigWeight, littleWeight));
}
}
}

View File

@ -229,6 +229,11 @@ public class SpanNearQuery extends SpanQuery implements Cloneable {
w.extractTerms(terms);
}
}
@Override
public IndexReader.CacheHelper getCacheHelper(LeafReaderContext context) {
return getCacheHelper(context, subWeights);
}
}
@Override
@ -319,6 +324,11 @@ public class SpanNearQuery extends SpanQuery implements Cloneable {
public void extractTerms(Set<Term> terms) {
}
@Override
public IndexReader.CacheHelper getCacheHelper(LeafReaderContext context) {
return context.reader().getCoreCacheHelper();
}
}
@Override

View File

@ -18,6 +18,7 @@ package org.apache.lucene.search.spans;
import java.io.IOException;
import java.util.Arrays;
import java.util.Map;
import java.util.Objects;
import java.util.Set;
@ -191,6 +192,11 @@ public final class SpanNotQuery extends SpanQuery {
public void extractTerms(Set<Term> terms) {
includeWeight.extractTerms(terms);
}
@Override
public IndexReader.CacheHelper getCacheHelper(LeafReaderContext context) {
return getCacheHelper(context, Arrays.asList(includeWeight, excludeWeight));
}
}
@Override

View File

@ -138,6 +138,11 @@ public final class SpanOrQuery extends SpanQuery {
}
}
@Override
public IndexReader.CacheHelper getCacheHelper(LeafReaderContext context) {
return getCacheHelper(context, subWeights);
}
@Override
public void extractTermContexts(Map<Term, TermContext> contexts) {
for (SpanWeight w : subWeights) {

View File

@ -86,6 +86,11 @@ public abstract class SpanPositionCheckQuery extends SpanQuery implements Clonea
matchWeight.extractTerms(terms);
}
@Override
public IndexReader.CacheHelper getCacheHelper(LeafReaderContext context) {
return matchWeight.getCacheHelper(context);
}
@Override
public void extractTermContexts(Map<Term, TermContext> contexts) {
matchWeight.extractTermContexts(contexts);

View File

@ -23,6 +23,7 @@ import java.util.Map;
import java.util.Objects;
import java.util.Set;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexReaderContext;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.index.PostingsEnum;
@ -91,6 +92,11 @@ public class SpanTermQuery extends SpanQuery {
terms.add(term);
}
@Override
public IndexReader.CacheHelper getCacheHelper(LeafReaderContext context) {
return context.reader().getCoreCacheHelper();
}
@Override
public void extractTermContexts(Map<Term, TermContext> contexts) {
contexts.put(term, termContext);
@ -135,7 +141,6 @@ public class SpanTermQuery extends SpanQuery {
/** Returns an expected cost in simple operations
* of processing the occurrences of a term
* in a document that contains the term.
* <br>This may be inaccurate when {@link TermsEnum#totalTermFreq()} is not available.
* @param termsEnum The term is the term at which this TermsEnum is positioned.
* <p>
* This is a copy of org.apache.lucene.search.PhraseQuery.termPositionsCost().
@ -146,8 +151,9 @@ public class SpanTermQuery extends SpanQuery {
static float termPositionsCost(TermsEnum termsEnum) throws IOException {
int docFreq = termsEnum.docFreq();
assert docFreq > 0;
long totalTermFreq = termsEnum.totalTermFreq(); // -1 when not available
float expOccurrencesInMatchingDoc = (totalTermFreq < docFreq) ? 1 : (totalTermFreq / (float) docFreq);
long totalTermFreq = termsEnum.totalTermFreq();
assert totalTermFreq > 0;
float expOccurrencesInMatchingDoc = totalTermFreq / (float) docFreq;
return TERM_POSNS_SEEK_OPS_PER_DOC + expOccurrencesInMatchingDoc * TERM_OPS_PER_POS;
}

View File

@ -19,8 +19,10 @@ package org.apache.lucene.search.spans;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Map;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.index.Term;
import org.apache.lucene.index.TermContext;
@ -116,6 +118,11 @@ public final class SpanWithinQuery extends SpanContainQuery {
}
};
}
@Override
public IndexReader.CacheHelper getCacheHelper(LeafReaderContext context) {
return getCacheHelper(context, Arrays.asList(littleWeight, bigWeight));
}
}
}

View File

@ -172,10 +172,11 @@ public class TestBlockPostingsFormat3 extends LuceneTestCase {
// NOTE: we don't assert hasOffsets/hasPositions/hasPayloads because they are allowed to be different
boolean bothHaveFreqs = leftTerms.hasFreqs() && rightTerms.hasFreqs();
boolean bothHavePositions = leftTerms.hasPositions() && rightTerms.hasPositions();
TermsEnum leftTermsEnum = leftTerms.iterator();
TermsEnum rightTermsEnum = rightTerms.iterator();
assertTermsEnum(leftTermsEnum, rightTermsEnum, true, bothHavePositions);
assertTermsEnum(leftTermsEnum, rightTermsEnum, true, bothHaveFreqs, bothHavePositions);
assertTermsSeeking(leftTerms, rightTerms);
@ -188,7 +189,7 @@ public class TestBlockPostingsFormat3 extends LuceneTestCase {
// TODO: test start term too
TermsEnum leftIntersection = leftTerms.intersect(automaton, null);
TermsEnum rightIntersection = rightTerms.intersect(automaton, null);
assertTermsEnum(leftIntersection, rightIntersection, rarely(), bothHavePositions);
assertTermsEnum(leftIntersection, rightIntersection, rarely(), bothHaveFreqs, bothHavePositions);
}
}
}
@ -263,13 +264,9 @@ public class TestBlockPostingsFormat3 extends LuceneTestCase {
* checks collection-level statistics on Terms
*/
public void assertTermsStatistics(Terms leftTerms, Terms rightTerms) throws Exception {
if (leftTerms.getDocCount() != -1 && rightTerms.getDocCount() != -1) {
assertEquals(leftTerms.getDocCount(), rightTerms.getDocCount());
}
if (leftTerms.getSumDocFreq() != -1 && rightTerms.getSumDocFreq() != -1) {
assertEquals(leftTerms.getSumDocFreq(), rightTerms.getSumDocFreq());
}
if (leftTerms.getSumTotalTermFreq() != -1 && rightTerms.getSumTotalTermFreq() != -1) {
assertEquals(leftTerms.getDocCount(), rightTerms.getDocCount());
assertEquals(leftTerms.getSumDocFreq(), rightTerms.getSumDocFreq());
if (leftTerms.hasFreqs() && rightTerms.hasFreqs()) {
assertEquals(leftTerms.getSumTotalTermFreq(), rightTerms.getSumTotalTermFreq());
}
if (leftTerms.size() != -1 && rightTerms.size() != -1) {
@ -281,7 +278,7 @@ public class TestBlockPostingsFormat3 extends LuceneTestCase {
* checks the terms enum sequentially
* if deep is false, it does a 'shallow' test that doesnt go down to the docsenums
*/
public void assertTermsEnum(TermsEnum leftTermsEnum, TermsEnum rightTermsEnum, boolean deep, boolean hasPositions) throws Exception {
public void assertTermsEnum(TermsEnum leftTermsEnum, TermsEnum rightTermsEnum, boolean deep, boolean hasFreqs, boolean hasPositions) throws Exception {
BytesRef term;
PostingsEnum leftPositions = null;
PostingsEnum rightPositions = null;
@ -290,7 +287,7 @@ public class TestBlockPostingsFormat3 extends LuceneTestCase {
while ((term = leftTermsEnum.next()) != null) {
assertEquals(term, rightTermsEnum.next());
assertTermStats(leftTermsEnum, rightTermsEnum);
assertTermStats(leftTermsEnum, rightTermsEnum, hasFreqs);
if (deep) {
if (hasPositions) {
// with payloads + off
@ -350,9 +347,9 @@ public class TestBlockPostingsFormat3 extends LuceneTestCase {
/**
* checks term-level statistics
*/
public void assertTermStats(TermsEnum leftTermsEnum, TermsEnum rightTermsEnum) throws Exception {
public void assertTermStats(TermsEnum leftTermsEnum, TermsEnum rightTermsEnum, boolean bothHaveFreqs) throws Exception {
assertEquals(leftTermsEnum.docFreq(), rightTermsEnum.docFreq());
if (leftTermsEnum.totalTermFreq() != -1 && rightTermsEnum.totalTermFreq() != -1) {
if (bothHaveFreqs) {
assertEquals(leftTermsEnum.totalTermFreq(), rightTermsEnum.totalTermFreq());
}
}

View File

@ -148,26 +148,22 @@ public class TestMultiTermsEnum extends LuceneTestCase {
@Override
public long size() throws IOException {
// Docs say we can return -1 if we don't know.
return -1;
throw new UnsupportedOperationException();
}
@Override
public long getSumTotalTermFreq() throws IOException {
// Docs say we can return -1 if we don't know.
return -1;
throw new UnsupportedOperationException();
}
@Override
public long getSumDocFreq() throws IOException {
// Docs say we can return -1 if we don't know.
return -1;
throw new UnsupportedOperationException();
}
@Override
public int getDocCount() throws IOException {
// Docs say we can return -1 if we don't know.
return -1;
throw new UnsupportedOperationException();
}
@Override

View File

@ -445,7 +445,7 @@ public class TestOmitTf extends LuceneTestCase {
}
}
/** test that when freqs are omitted, that totalTermFreq and sumTotalTermFreq are -1 */
/** test that when freqs are omitted, that totalTermFreq and sumTotalTermFreq are docFreq, and sumDocFreq */
public void testStats() throws Exception {
Directory dir = newDirectory();
RandomIndexWriter iw = new RandomIndexWriter(random(), dir,
@ -459,8 +459,8 @@ public class TestOmitTf extends LuceneTestCase {
iw.addDocument(doc);
IndexReader ir = iw.getReader();
iw.close();
assertEquals(-1, ir.totalTermFreq(new Term("foo", new BytesRef("bar"))));
assertEquals(-1, ir.getSumTotalTermFreq("foo"));
assertEquals(ir.docFreq(new Term("foo", new BytesRef("bar"))), ir.totalTermFreq(new Term("foo", new BytesRef("bar"))));
assertEquals(ir.getSumDocFreq("foo"), ir.getSumTotalTermFreq("foo"));
ir.close();
dir.close();
}

View File

@ -21,6 +21,7 @@ import java.io.IOException;
import java.util.Set;
import org.apache.lucene.index.FieldInvertState;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.index.Term;
import org.apache.lucene.search.similarities.Similarity;
@ -262,6 +263,11 @@ final class JustCompileSearch {
throw new UnsupportedOperationException(UNSUPPORTED_MSG);
}
@Override
public IndexReader.CacheHelper getCacheHelper(LeafReaderContext context) {
return null;
}
}
}

View File

@ -94,6 +94,11 @@ public class TestBooleanScorer extends LuceneTestCase {
throw new UnsupportedOperationException();
}
@Override
public IndexReader.CacheHelper getCacheHelper(LeafReaderContext context) {
return null;
}
@Override
public BulkScorer bulkScorer(LeafReaderContext context) {
return new BulkScorer() {

View File

@ -29,6 +29,7 @@ import java.util.HashSet;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.concurrent.ExecutionException;
import java.util.concurrent.atomic.AtomicBoolean;
import java.util.concurrent.atomic.AtomicInteger;
@ -36,15 +37,16 @@ import java.util.concurrent.atomic.AtomicLong;
import java.util.concurrent.atomic.AtomicReference;
import com.carrotsearch.randomizedtesting.generators.RandomPicks;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field.Store;
import org.apache.lucene.document.NumericDocValuesField;
import org.apache.lucene.document.StringField;
import org.apache.lucene.document.TextField;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.FilterDirectoryReader;
import org.apache.lucene.index.FilterLeafReader;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.LeafReader;
import org.apache.lucene.index.LeafReaderContext;
@ -358,6 +360,11 @@ public class TestLRUQueryCache extends LuceneTestCase {
public Scorer scorer(LeafReaderContext context) throws IOException {
return null;
}
@Override
public IndexReader.CacheHelper getCacheHelper(LeafReaderContext context) {
return context.reader().getCoreCacheHelper();
}
};
}
@ -947,6 +954,11 @@ public class TestLRUQueryCache extends LuceneTestCase {
public Scorer scorer(LeafReaderContext context) throws IOException {
return null;
}
@Override
public IndexReader.CacheHelper getCacheHelper(LeafReaderContext context) {
return context.reader().getCoreCacheHelper();
}
};
}
@ -1276,6 +1288,78 @@ public class TestLRUQueryCache extends LuceneTestCase {
dir.close();
}
// A query that returns null from Weight.getCacheHelper
private static class NoCacheQuery extends Query {
@Override
public Weight createWeight(IndexSearcher searcher, boolean needsScores, float boost) throws IOException {
return new Weight(this) {
@Override
public void extractTerms(Set<Term> terms) {
}
@Override
public Explanation explain(LeafReaderContext context, int doc) throws IOException {
return null;
}
@Override
public Scorer scorer(LeafReaderContext context) throws IOException {
return null;
}
@Override
public IndexReader.CacheHelper getCacheHelper(LeafReaderContext context) {
return null;
}
};
}
@Override
public String toString(String field) {
return "NoCacheQuery";
}
@Override
public boolean equals(Object obj) {
return sameClassAs(obj);
}
@Override
public int hashCode() {
return 0;
}
}
public void testQueryNotSuitedForCaching() throws IOException {
Directory dir = newDirectory();
IndexWriterConfig iwc = newIndexWriterConfig().setMergePolicy(NoMergePolicy.INSTANCE);
RandomIndexWriter w = new RandomIndexWriter(random(), dir, iwc);
w.addDocument(new Document());
DirectoryReader reader = w.getReader();
IndexSearcher searcher = newSearcher(reader);
searcher.setQueryCachingPolicy(QueryCachingPolicy.ALWAYS_CACHE);
LRUQueryCache cache = new LRUQueryCache(2, 10000, context -> true);
searcher.setQueryCache(cache);
assertEquals(0, searcher.count(new NoCacheQuery()));
assertEquals(0, cache.getCacheCount());
// BooleanQuery wrapping an uncacheable query should also not be cached
BooleanQuery bq = new BooleanQuery.Builder()
.add(new NoCacheQuery(), Occur.MUST)
.add(new TermQuery(new Term("field", "term")), Occur.MUST).build();
assertEquals(0, searcher.count(bq));
assertEquals(0, cache.getCacheCount());
reader.close();
w.close();
dir.close();
}
private static class DummyQuery2 extends Query {
private final AtomicBoolean scorerCreated;
@ -1291,6 +1375,12 @@ public class TestLRUQueryCache extends LuceneTestCase {
public Scorer scorer(LeafReaderContext context) throws IOException {
return scorerSupplier(context).get(Long.MAX_VALUE);
}
@Override
public IndexReader.CacheHelper getCacheHelper(LeafReaderContext context) {
return context.reader().getCoreCacheHelper();
}
@Override
public ScorerSupplier scorerSupplier(LeafReaderContext context) throws IOException {
final Weight weight = this;
@ -1351,4 +1441,110 @@ public class TestLRUQueryCache extends LuceneTestCase {
w.close();
dir.close();
}
static class DVCacheQuery extends Query {
final String field;
AtomicInteger scorerCreatedCount = new AtomicInteger(0);
DVCacheQuery(String field) {
this.field = field;
}
@Override
public String toString(String field) {
return "DVCacheQuery";
}
@Override
public boolean equals(Object obj) {
return sameClassAs(obj);
}
@Override
public int hashCode() {
return 0;
}
@Override
public Weight createWeight(IndexSearcher searcher, boolean needsScores, float boost) throws IOException {
return new ConstantScoreWeight(this, 1) {
@Override
public Scorer scorer(LeafReaderContext context) throws IOException {
scorerCreatedCount.incrementAndGet();
return new ConstantScoreScorer(this, 1, DocIdSetIterator.all(context.reader().maxDoc()));
}
@Override
public IndexReader.CacheHelper getCacheHelper(LeafReaderContext context) {
return getDocValuesCacheHelper(field, context);
}
};
}
}
public void testDocValuesUpdatesDontBreakCache() throws IOException {
Directory dir = newDirectory();
IndexWriterConfig iwc = newIndexWriterConfig().setMergePolicy(NoMergePolicy.INSTANCE);
//RandomIndexWriter w = new RandomIndexWriter(random(), dir, iwc);
IndexWriter w = new IndexWriter(dir, iwc);
w.addDocument(new Document());
w.commit();
DirectoryReader reader = DirectoryReader.open(w);
IndexSearcher searcher = newSearcher(reader);
searcher.setQueryCachingPolicy(QueryCachingPolicy.ALWAYS_CACHE);
LRUQueryCache cache = new LRUQueryCache(1, 1000, context -> true);
searcher.setQueryCache(cache);
DVCacheQuery query = new DVCacheQuery("field");
assertEquals(1, searcher.count(query));
assertEquals(1, query.scorerCreatedCount.get());
assertEquals(1, searcher.count(query));
assertEquals(1, query.scorerCreatedCount.get()); // should be cached
Document doc = new Document();
doc.add(new NumericDocValuesField("field", 1));
doc.add(newTextField("text", "text", Store.NO));
w.addDocument(doc);
reader.close();
reader = DirectoryReader.open(w);
searcher = newSearcher(reader);
searcher.setQueryCachingPolicy(QueryCachingPolicy.ALWAYS_CACHE);
searcher.setQueryCache(cache);
assertEquals(2, searcher.count(query));
assertEquals(2, query.scorerCreatedCount.get()); // first segment cached
reader.close();
reader = DirectoryReader.open(w);
searcher = newSearcher(reader);
searcher.setQueryCachingPolicy(QueryCachingPolicy.ALWAYS_CACHE);
searcher.setQueryCache(cache);
assertEquals(2, searcher.count(query));
assertEquals(2, query.scorerCreatedCount.get()); // both segments cached
w.updateNumericDocValue(new Term("text", "text"), "field", 2l);
reader.close();
reader = DirectoryReader.open(w);
searcher = newSearcher(reader);
searcher.setQueryCachingPolicy(QueryCachingPolicy.ALWAYS_CACHE);
searcher.setQueryCache(cache);
assertEquals(2, searcher.count(query));
assertEquals(3, query.scorerCreatedCount.get()); // second segment no longer cached due to DV update
assertEquals(2, searcher.count(query));
assertEquals(4, query.scorerCreatedCount.get()); // still no caching
reader.close();
w.close();
dir.close();
}
}

View File

@ -18,7 +18,6 @@ package org.apache.lucene.search;
import java.io.IOException;
import java.util.Set;
import java.util.Objects;
import org.apache.lucene.document.Document;
@ -103,17 +102,7 @@ public class TestNeedsScores extends LuceneTestCase {
@Override
public Weight createWeight(IndexSearcher searcher, boolean needsScores, float boost) throws IOException {
final Weight w = in.createWeight(searcher, needsScores, boost);
return new Weight(AssertNeedsScores.this) {
@Override
public void extractTerms(Set<Term> terms) {
w.extractTerms(terms);
}
@Override
public Explanation explain(LeafReaderContext context, int doc) throws IOException {
return w.explain(context, doc);
}
return new FilterWeight(w) {
@Override
public Scorer scorer(LeafReaderContext context) throws IOException {
assertEquals("query=" + in, value, needsScores);

View File

@ -487,6 +487,11 @@ public class TestQueryRescorer extends LuceneTestCase {
};
}
@Override
public IndexReader.CacheHelper getCacheHelper(LeafReaderContext context) {
return null;
}
@Override
public Explanation explain(LeafReaderContext context, int doc) throws IOException {
return null;

View File

@ -155,6 +155,11 @@ public class TestScorerPerf extends LuceneTestCase {
public Scorer scorer(LeafReaderContext context) throws IOException {
return new ConstantScoreScorer(this, score(), new BitSetIterator(docs, docs.approximateCardinality()));
}
@Override
public IndexReader.CacheHelper getCacheHelper(LeafReaderContext context) {
return null;
}
};
}

View File

@ -249,6 +249,11 @@ public class TestSortRandom extends LuceneTestCase {
return new ConstantScoreScorer(this, score(), new BitSetIterator(bits, bits.approximateCardinality()));
}
@Override
public IndexReader.CacheHelper getCacheHelper(LeafReaderContext context) {
return null;
}
};
}

View File

@ -124,6 +124,11 @@ public class TestUsageTrackingFilterCachingPolicy extends LuceneTestCase {
public Scorer scorer(LeafReaderContext context) throws IOException {
return new ConstantScoreScorer(this, score(), DocIdSetIterator.all(1));
}
@Override
public IndexReader.CacheHelper getCacheHelper(LeafReaderContext context) {
return context.reader().getCoreCacheHelper();
}
};
}

View File

@ -40,6 +40,7 @@ import org.apache.lucene.search.TopDocs;
import org.apache.lucene.store.Directory;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.LuceneTestCase;
import org.apache.lucene.util.TestUtil;
import org.apache.lucene.util.Version;
/**
@ -183,7 +184,17 @@ public class TestSimilarityBase extends LuceneTestCase {
}
private CollectionStatistics toCollectionStats(BasicStats stats) {
return new CollectionStatistics(stats.field, stats.getNumberOfDocuments(), -1, stats.getNumberOfFieldTokens(), -1);
long sumTtf = stats.getNumberOfFieldTokens();
long sumDf;
if (sumTtf == -1) {
sumDf = TestUtil.nextLong(random(), stats.getNumberOfDocuments(), 2L * stats.getNumberOfDocuments());
} else {
sumDf = TestUtil.nextLong(random(), Math.min(stats.getNumberOfDocuments(), sumTtf), sumTtf);
}
int docCount = Math.toIntExact(Math.min(sumDf, stats.getNumberOfDocuments()));
int maxDoc = TestUtil.nextInt(random(), docCount, docCount + 10);
return new CollectionStatistics(stats.field, maxDoc, docCount, sumTtf, sumDf);
}
private TermStatistics toTermStats(BasicStats stats) {

View File

@ -17,8 +17,10 @@
package org.apache.lucene.facet;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Comparator;
import java.util.List;
import java.util.Objects;
import java.util.Set;
@ -101,6 +103,14 @@ class DrillSidewaysQuery extends Query {
throw new UnsupportedOperationException();
}
@Override
public IndexReader.CacheHelper getCacheHelper(LeafReaderContext context) {
List<Weight> weights = new ArrayList<>();
weights.add(baseWeight);
weights.addAll(Arrays.asList(drillDowns));
return getCacheHelper(context, weights);
}
@Override
public BulkScorer bulkScorer(LeafReaderContext context) throws IOException {
Scorer baseScorer = baseWeight.scorer(context);

View File

@ -171,6 +171,11 @@ public final class DoubleRange extends Range {
};
return new ConstantScoreScorer(this, score(), twoPhase);
}
@Override
public IndexReader.CacheHelper getCacheHelper(LeafReaderContext context) {
return null; // TODO delegate to LongValuesSource?
}
};
}

View File

@ -163,6 +163,11 @@ public final class LongRange extends Range {
};
return new ConstantScoreScorer(this, score(), twoPhase);
}
@Override
public IndexReader.CacheHelper getCacheHelper(LeafReaderContext context) {
return null; // TODO delegate to LongValuesSource?
}
};
}

View File

@ -16,6 +16,16 @@
*/
package org.apache.lucene.facet;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;
import org.apache.lucene.analysis.MockAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
@ -58,16 +68,6 @@ import org.apache.lucene.util.InPlaceMergeSorter;
import org.apache.lucene.util.InfoStream;
import org.apache.lucene.util.TestUtil;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;
public class TestDrillSideways extends FacetTestCase {
protected DrillSideways getNewDrillSideways(IndexSearcher searcher, FacetsConfig config,
@ -740,6 +740,11 @@ public class TestDrillSideways extends FacetTestCase {
});
}
@Override
public IndexReader.CacheHelper getCacheHelper(LeafReaderContext context) {
return null;
}
};
}

View File

@ -20,7 +20,6 @@ import java.io.IOException;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.concurrent.atomic.AtomicBoolean;
import org.apache.lucene.document.Document;
@ -29,8 +28,8 @@ import org.apache.lucene.document.DoublePoint;
import org.apache.lucene.document.LongPoint;
import org.apache.lucene.document.NumericDocValuesField;
import org.apache.lucene.facet.DrillDownQuery;
import org.apache.lucene.facet.DrillSideways.DrillSidewaysResult;
import org.apache.lucene.facet.DrillSideways;
import org.apache.lucene.facet.DrillSideways.DrillSidewaysResult;
import org.apache.lucene.facet.FacetField;
import org.apache.lucene.facet.FacetResult;
import org.apache.lucene.facet.FacetTestCase;
@ -46,10 +45,10 @@ import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.index.RandomIndexWriter;
import org.apache.lucene.index.Term;
import org.apache.lucene.search.DoubleValues;
import org.apache.lucene.search.DoubleValuesSource;
import org.apache.lucene.search.Explanation;
import org.apache.lucene.search.FilterWeight;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.LongValuesSource;
import org.apache.lucene.search.MatchAllDocsQuery;
@ -717,24 +716,12 @@ public class TestRangeFacetCounts extends FacetTestCase {
@Override
public Weight createWeight(IndexSearcher searcher, boolean needsScores, float boost) throws IOException {
final Weight in = this.in.createWeight(searcher, needsScores, boost);
return new Weight(in.getQuery()) {
@Override
public void extractTerms(Set<Term> terms) {
in.extractTerms(terms);
}
@Override
public Explanation explain(LeafReaderContext context, int doc) throws IOException {
return in.explain(context, doc);
}
return new FilterWeight(in) {
@Override
public Scorer scorer(LeafReaderContext context) throws IOException {
used.set(true);
return in.scorer(context);
}
};
}

View File

@ -238,13 +238,8 @@ public final class TokenStreamFromTermVector extends TokenStream {
// Estimate the number of position slots we need from term stats. We use some estimation factors taken from
// Wikipedia that reduce the likelihood of needing to expand the array.
int sumTotalTermFreq = (int) vector.getSumTotalTermFreq();
if (sumTotalTermFreq == -1) {//unfortunately term vectors seem to not have this stat
int size = (int) vector.size();
if (size == -1) {//doesn't happen with term vectors, it seems, but pick a default any way
size = 128;
}
sumTotalTermFreq = (int)(size * 2.4);
}
assert sumTotalTermFreq != -1;
final int originalPositionEstimate = (int) (sumTotalTermFreq * 1.5);//less than 1 in 10 docs exceed this
// This estimate is based on maxStartOffset. Err on the side of this being larger than needed.

View File

@ -153,7 +153,7 @@ org.apache.hadoop.version = 2.7.4
/org.apache.httpcomponents/httpcore = 4.4.6
/org.apache.httpcomponents/httpmime = 4.5.3
/org.apache.ivy/ivy = 2.3.0
/org.apache.ivy/ivy = 2.4.0
org.apache.james.apache.mime4j.version = 0.7.2
/org.apache.james/apache-mime4j-core = ${org.apache.james.apache.mime4j.version}

View File

@ -20,6 +20,7 @@ import java.io.IOException;
import java.util.Set;
import org.apache.lucene.index.DocValues;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.index.OrdinalMap;
import org.apache.lucene.index.SortedDocValues;
@ -154,6 +155,11 @@ final class GlobalOrdinalsQuery extends Query {
}
}
@Override
public IndexReader.CacheHelper getCacheHelper(LeafReaderContext context) {
return getDocValuesCacheHelper(joinField, context);
}
}
final static class OrdinalMapScorer extends BaseGlobalOrdinalScorer {

View File

@ -194,6 +194,11 @@ public class ParentChildrenBlockJoinQuery extends Query {
}
};
}
@Override
public IndexReader.CacheHelper getCacheHelper(LeafReaderContext context) {
return null; // TODO delegate to BitSetProducer?
}
};
}
}

View File

@ -29,6 +29,7 @@ import org.apache.lucene.document.FloatPoint;
import org.apache.lucene.document.IntPoint;
import org.apache.lucene.document.LongPoint;
import org.apache.lucene.index.FieldInfo;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.LeafReader;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.index.PointValues;
@ -186,6 +187,11 @@ abstract class PointInSetIncludingScoreQuery extends Query {
};
}
@Override
public IndexReader.CacheHelper getCacheHelper(LeafReaderContext context) {
return context.reader().getCoreCacheHelper();
}
};
}

View File

@ -21,6 +21,7 @@ import java.util.Locale;
import java.util.Objects;
import java.util.Set;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.index.PostingsEnum;
import org.apache.lucene.index.Term;
@ -140,6 +141,11 @@ class TermsIncludingScoreQuery extends Query {
}
}
@Override
public IndexReader.CacheHelper getCacheHelper(LeafReaderContext context) {
return context.reader().getCoreCacheHelper();
}
};
}

View File

@ -562,6 +562,11 @@ public class TestJoinUtil extends LuceneTestCase {
}
};
}
@Override
public IndexReader.CacheHelper getCacheHelper(LeafReaderContext context) {
return null;
}
};
}

View File

@ -1 +0,0 @@
c5ebf1c253ad4959a29f4acfe696ee48cdd9f473

View File

@ -0,0 +1 @@
5abe4c24bbe992a9ac07ca563d5bd3e8d569e9ed

View File

@ -17,6 +17,7 @@
package org.apache.lucene.queries;
import java.io.IOException;
import java.util.Arrays;
import java.util.Objects;
import java.util.Set;
@ -121,6 +122,11 @@ public class BoostingQuery extends Query {
}
};
}
@Override
public IndexReader.CacheHelper getCacheHelper(LeafReaderContext context) {
return getCacheHelper(context, Arrays.asList(matchWeight, contextWeight));
}
};
}

View File

@ -17,9 +17,11 @@
package org.apache.lucene.queries;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection;
import java.util.Collections;
import java.util.List;
import java.util.Set;
import org.apache.lucene.index.IndexReader;
@ -207,6 +209,14 @@ public class CustomScoreQuery extends Query implements Cloneable {
return new CustomScorer(CustomScoreQuery.this.getCustomScoreProvider(context), this, queryWeight, subQueryScorer, valSrcScorers);
}
@Override
public IndexReader.CacheHelper getCacheHelper(LeafReaderContext context) {
List<Weight> weights = new ArrayList<>();
weights.add(subQueryWeight);
weights.addAll(Arrays.asList(valSrcWeights));
return getCacheHelper(context, weights);
}
@Override
public Explanation explain(LeafReaderContext context, int doc) throws IOException {
Explanation explain = doExplain(context, doc);

View File

@ -88,6 +88,11 @@ public final class BoostedQuery extends Query {
return new BoostedQuery.CustomScorer(context, this, subQueryScorer, boostVal);
}
@Override
public IndexReader.CacheHelper getCacheHelper(LeafReaderContext context) {
return null;
}
@Override
public Explanation explain(LeafReaderContext readerContext, int doc) throws IOException {
Explanation subQueryExpl = qWeight.explain(readerContext,doc);

View File

@ -21,6 +21,7 @@ import java.io.IOException;
import java.util.Objects;
import java.util.function.DoublePredicate;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.search.ConstantScoreScorer;
import org.apache.lucene.search.ConstantScoreWeight;
@ -80,6 +81,11 @@ public final class FunctionMatchQuery extends Query {
};
return new ConstantScoreScorer(this, score(), twoPhase);
}
@Override
public IndexReader.CacheHelper getCacheHelper(LeafReaderContext context) {
return null; // TODO delegate to DoubleValuesSource?
}
};
}

View File

@ -74,6 +74,11 @@ public class FunctionQuery extends Query {
return new AllScorer(context, this, boost);
}
@Override
public IndexReader.CacheHelper getCacheHelper(LeafReaderContext context) {
return null;
}
@Override
public Explanation explain(LeafReaderContext context, int doc) throws IOException {
return ((AllScorer)scorer(context)).explain(doc);

View File

@ -21,6 +21,7 @@ import java.util.Map;
import java.util.Objects;
import java.util.Set;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.index.Term;
import org.apache.lucene.search.Explanation;
@ -153,5 +154,10 @@ public class FunctionRangeQuery extends Query {
// getRangeScorer takes String args and parses them. Weird.
return functionValues.getRangeScorer(context, lowerVal, upperVal, includeLower, includeUpper);
}
@Override
public IndexReader.CacheHelper getCacheHelper(LeafReaderContext context) {
return null;
}
}
}

View File

@ -137,5 +137,10 @@ public final class FunctionScoreQuery extends Query {
}
};
}
@Override
public IndexReader.CacheHelper getCacheHelper(LeafReaderContext context) {
return null; // TODO delegate to DoubleValuesSource
}
}
}

View File

@ -29,8 +29,6 @@ import java.util.Map;
/**
* <code>SumTotalTermFreqValueSource</code> returns the number of tokens.
* (sum of term freqs across all documents, across all terms).
* Returns -1 if frequencies were omitted for the field, or if
* the codec doesn't support this statistic.
* @lucene.internal
*/
public class SumTotalTermFreqValueSource extends ValueSource {
@ -61,12 +59,8 @@ public class SumTotalTermFreqValueSource extends ValueSource {
Terms terms = readerContext.reader().terms(indexedField);
if (terms == null) continue;
long v = terms.getSumTotalTermFreq();
if (v == -1) {
sumTotalTermFreq = -1;
break;
} else {
sumTotalTermFreq += v;
}
assert v != -1;
sumTotalTermFreq += v;
}
final long ttf = sumTotalTermFreq;
context.put(this, new LongDocValues(this) {

Some files were not shown because too many files have changed in this diff Show More