mirror of https://github.com/apache/lucene.git
Merge branch 'master' of https://git-wip-us.apache.org/repos/asf/lucene-solr
This commit is contained in:
commit
dae5c570b9
15
build.xml
15
build.xml
|
@ -202,6 +202,12 @@
|
|||
}
|
||||
}
|
||||
|
||||
def checkMockitoAssume = { f, text ->
|
||||
if (text.contains("mockito") && !text.contains("assumeWorkingMockito()")) {
|
||||
reportViolation(f, 'File uses Mockito but has no assumeWorkingMockito() call');
|
||||
}
|
||||
}
|
||||
|
||||
def checkForUnescapedSymbolSubstitutions = { f, text ->
|
||||
def inCodeBlock = false;
|
||||
def underSourceHeader = false;
|
||||
|
@ -265,18 +271,21 @@
|
|||
ratDocument.getMetaData().value(MetaData.RAT_URL_LICENSE_FAMILY_NAME)));
|
||||
}
|
||||
}
|
||||
if (f.toString().endsWith('.java')) {
|
||||
if (f.name.endsWith('.java')) {
|
||||
if (text.contains('org.slf4j.LoggerFactory')) {
|
||||
if (!validLoggerPattern.matcher(text).find()) {
|
||||
reportViolation(f, 'invalid logging pattern [not private static final, uses static class name]');
|
||||
}
|
||||
}
|
||||
checkLicenseHeaderPrecedes(f, 'package', packagePattern, javaCommentPattern, text, ratDocument);
|
||||
if (f.name.contains("Test")) {
|
||||
checkMockitoAssume(f, text);
|
||||
}
|
||||
if (f.toString().endsWith('.xml') || f.toString().endsWith('.xml.template')) {
|
||||
}
|
||||
if (f.name.endsWith('.xml') || f.name.endsWith('.xml.template')) {
|
||||
checkLicenseHeaderPrecedes(f, '<tag>', xmlTagPattern, xmlCommentPattern, text, ratDocument);
|
||||
}
|
||||
if (f.toString().endsWith('.adoc')) {
|
||||
if (f.name.endsWith('.adoc')) {
|
||||
checkForUnescapedSymbolSubstitutions(f, text);
|
||||
}
|
||||
};
|
||||
|
|
|
@ -12,6 +12,16 @@
|
|||
</content>
|
||||
<orderEntry type="inheritedJdk" />
|
||||
<orderEntry type="sourceFolder" forTests="false" />
|
||||
<orderEntry type="module-library" exported="">
|
||||
<library>
|
||||
<CLASSES>
|
||||
<root url="file://$MODULE_DIR$/lib" />
|
||||
</CLASSES>
|
||||
<JAVADOC />
|
||||
<SOURCES />
|
||||
<jarDirectory url="file://$MODULE_DIR$/lib" recursive="false" />
|
||||
</library>
|
||||
</orderEntry>
|
||||
<orderEntry type="library" scope="TEST" name="JUnit" level="project" />
|
||||
<orderEntry type="module" scope="TEST" module-name="lucene-test-framework" />
|
||||
<orderEntry type="module" module-name="analysis-common" />
|
||||
|
|
|
@ -5,6 +5,14 @@ http://s.apache.org/luceneversions
|
|||
|
||||
======================= Lucene 8.0.0 =======================
|
||||
|
||||
API Changes
|
||||
|
||||
* LUCENE-8007: Index statistics Terms.getSumDocFreq(), Terms.getDocCount() are
|
||||
now required to be stored by codecs. Additionally, TermsEnum.totalTermFreq()
|
||||
and Terms.getSumTotalTermFreq() are now required: if frequencies are not
|
||||
stored they are equal to TermsEnum.docFreq() and Terms.getSumDocFreq(),
|
||||
respectively, because all freq() values equal 1. (Adrien Grand, Robert Muir)
|
||||
|
||||
Changes in Runtime Behavior
|
||||
|
||||
* LUCENE-7837: Indices that were created before the previous major version
|
||||
|
@ -25,6 +33,11 @@ Improvements
|
|||
|
||||
======================= Lucene 7.2.0 =======================
|
||||
|
||||
API Changes
|
||||
|
||||
* LUCENE-8017: Weight now exposes a getCacheHelper() method to help query caches
|
||||
determine whether or not a query can be cached. (Alan Woodward)
|
||||
|
||||
Bug Fixes
|
||||
|
||||
* LUCENE-7991: KNearestNeighborDocumentClassifier.knnSearch no longer applies
|
||||
|
@ -50,6 +63,16 @@ Optimizations
|
|||
number of hits is small relative to the number of unique facet labels
|
||||
(Dawid Weiss, Robert Muir, Mike McCandless)
|
||||
|
||||
Tests
|
||||
|
||||
* LUCENE-8035: Run tests with JDK-specific options: --illegal-access=deny
|
||||
on Java 9+. (Uwe Schindler)
|
||||
|
||||
Build
|
||||
|
||||
* LUCENE-6144: Upgrade Ivy to 2.4.0; 'ant ivy-bootstrap' now removes old Ivy
|
||||
jars in ~/.ant/lib/. (Shawn Heisey, Steve Rowe)
|
||||
|
||||
======================= Lucene 7.1.0 =======================
|
||||
|
||||
Changes in Runtime Behavior
|
||||
|
|
|
@ -139,8 +139,9 @@ public class BlockTermsReader extends FieldsProducer {
|
|||
assert numTerms >= 0;
|
||||
final long termsStartPointer = in.readVLong();
|
||||
final FieldInfo fieldInfo = state.fieldInfos.fieldInfo(field);
|
||||
final long sumTotalTermFreq = fieldInfo.getIndexOptions() == IndexOptions.DOCS ? -1 : in.readVLong();
|
||||
final long sumDocFreq = in.readVLong();
|
||||
final long sumTotalTermFreq = in.readVLong();
|
||||
// when frequencies are omitted, sumDocFreq=totalTermFreq and we only write one value
|
||||
final long sumDocFreq = fieldInfo.getIndexOptions() == IndexOptions.DOCS ? sumTotalTermFreq : in.readVLong();
|
||||
final int docCount = in.readVInt();
|
||||
final int longsSize = in.readVInt();
|
||||
if (docCount < 0 || docCount > state.segmentInfo.maxDoc()) { // #docs with field must be <= #docs
|
||||
|
@ -149,7 +150,7 @@ public class BlockTermsReader extends FieldsProducer {
|
|||
if (sumDocFreq < docCount) { // #postings must be >= #docs with field
|
||||
throw new CorruptIndexException("invalid sumDocFreq: " + sumDocFreq + " docCount: " + docCount, in);
|
||||
}
|
||||
if (sumTotalTermFreq != -1 && sumTotalTermFreq < sumDocFreq) { // #positions must be >= #postings
|
||||
if (sumTotalTermFreq < sumDocFreq) { // #positions must be >= #postings
|
||||
throw new CorruptIndexException("invalid sumTotalTermFreq: " + sumTotalTermFreq + " sumDocFreq: " + sumDocFreq, in);
|
||||
}
|
||||
FieldReader previous = fields.put(fieldInfo.name, new FieldReader(fieldInfo, numTerms, termsStartPointer, sumTotalTermFreq, sumDocFreq, docCount, longsSize));
|
||||
|
@ -810,7 +811,9 @@ public class BlockTermsReader extends FieldsProducer {
|
|||
// docFreq, totalTermFreq
|
||||
state.docFreq = freqReader.readVInt();
|
||||
//System.out.println(" dF=" + state.docFreq);
|
||||
if (fieldInfo.getIndexOptions() != IndexOptions.DOCS) {
|
||||
if (fieldInfo.getIndexOptions() == IndexOptions.DOCS) {
|
||||
state.totalTermFreq = state.docFreq; // all postings have tf=1
|
||||
} else {
|
||||
state.totalTermFreq = state.docFreq + freqReader.readVLong();
|
||||
//System.out.println(" totTF=" + state.totalTermFreq);
|
||||
}
|
||||
|
|
|
@ -126,8 +126,9 @@ public final class OrdsBlockTreeTermsReader extends FieldsProducer {
|
|||
final FieldInfo fieldInfo = state.fieldInfos.fieldInfo(field);
|
||||
assert fieldInfo != null: "field=" + field;
|
||||
assert numTerms <= Integer.MAX_VALUE;
|
||||
final long sumTotalTermFreq = fieldInfo.getIndexOptions() == IndexOptions.DOCS ? -1 : in.readVLong();
|
||||
final long sumDocFreq = in.readVLong();
|
||||
final long sumTotalTermFreq = in.readVLong();
|
||||
// when frequencies are omitted, sumDocFreq=totalTermFreq and we only write one value
|
||||
final long sumDocFreq = fieldInfo.getIndexOptions() == IndexOptions.DOCS ? sumTotalTermFreq : in.readVLong();
|
||||
final int docCount = in.readVInt();
|
||||
final int longsSize = in.readVInt();
|
||||
// System.out.println(" longsSize=" + longsSize);
|
||||
|
@ -140,7 +141,7 @@ public final class OrdsBlockTreeTermsReader extends FieldsProducer {
|
|||
if (sumDocFreq < docCount) { // #postings must be >= #docs with field
|
||||
throw new CorruptIndexException("invalid sumDocFreq: " + sumDocFreq + " docCount: " + docCount, in);
|
||||
}
|
||||
if (sumTotalTermFreq != -1 && sumTotalTermFreq < sumDocFreq) { // #positions must be >= #postings
|
||||
if (sumTotalTermFreq < sumDocFreq) { // #positions must be >= #postings
|
||||
throw new CorruptIndexException("invalid sumTotalTermFreq: " + sumTotalTermFreq + " sumDocFreq: " + sumDocFreq, in);
|
||||
}
|
||||
final long indexStartFP = indexIn.readVLong();
|
||||
|
|
|
@ -292,7 +292,9 @@ final class OrdsIntersectTermsEnumFrame {
|
|||
// stats
|
||||
termState.docFreq = statsReader.readVInt();
|
||||
//if (DEBUG) System.out.println(" dF=" + state.docFreq);
|
||||
if (ite.fr.fieldInfo.getIndexOptions() != IndexOptions.DOCS) {
|
||||
if (ite.fr.fieldInfo.getIndexOptions() == IndexOptions.DOCS) {
|
||||
termState.totalTermFreq = termState.docFreq; // all tf values are 1
|
||||
} else {
|
||||
termState.totalTermFreq = termState.docFreq + statsReader.readVLong();
|
||||
//if (DEBUG) System.out.println(" totTF=" + state.totalTermFreq);
|
||||
}
|
||||
|
|
|
@ -499,7 +499,9 @@ final class OrdsSegmentTermsEnumFrame {
|
|||
// stats
|
||||
state.docFreq = statsReader.readVInt();
|
||||
//if (DEBUG) System.out.println(" dF=" + state.docFreq);
|
||||
if (ste.fr.fieldInfo.getIndexOptions() != IndexOptions.DOCS) {
|
||||
if (ste.fr.fieldInfo.getIndexOptions() == IndexOptions.DOCS) {
|
||||
state.totalTermFreq = state.docFreq; // all tf values are 1
|
||||
} else {
|
||||
state.totalTermFreq = state.docFreq + statsReader.readVLong();
|
||||
//if (DEBUG) System.out.println(" totTF=" + state.totalTermFreq);
|
||||
}
|
||||
|
|
|
@ -19,7 +19,6 @@ package org.apache.lucene.codecs.memory;
|
|||
|
||||
import java.io.IOException;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Arrays;
|
||||
import java.util.BitSet;
|
||||
import java.util.Collection;
|
||||
import java.util.Collections;
|
||||
|
@ -111,8 +110,9 @@ public class FSTOrdTermsReader extends FieldsProducer {
|
|||
FieldInfo fieldInfo = fieldInfos.fieldInfo(blockIn.readVInt());
|
||||
boolean hasFreq = fieldInfo.getIndexOptions() != IndexOptions.DOCS;
|
||||
long numTerms = blockIn.readVLong();
|
||||
long sumTotalTermFreq = hasFreq ? blockIn.readVLong() : -1;
|
||||
long sumDocFreq = blockIn.readVLong();
|
||||
long sumTotalTermFreq = blockIn.readVLong();
|
||||
// if freqs are omitted, sumDocFreq=sumTotalTermFreq and we only write one value
|
||||
long sumDocFreq = hasFreq ? blockIn.readVLong() : sumTotalTermFreq;
|
||||
int docCount = blockIn.readVInt();
|
||||
int longsSize = blockIn.readVInt();
|
||||
FST<Long> index = new FST<>(indexIn, PositiveIntOutputs.getSingleton());
|
||||
|
@ -146,7 +146,7 @@ public class FSTOrdTermsReader extends FieldsProducer {
|
|||
throw new CorruptIndexException("invalid sumDocFreq: " + field.sumDocFreq + " docCount: " + field.docCount + " (blockIn=" + blockIn + ")", indexIn);
|
||||
}
|
||||
// #positions must be >= #postings
|
||||
if (field.sumTotalTermFreq != -1 && field.sumTotalTermFreq < field.sumDocFreq) {
|
||||
if (field.sumTotalTermFreq < field.sumDocFreq) {
|
||||
throw new CorruptIndexException("invalid sumTotalTermFreq: " + field.sumTotalTermFreq + " sumDocFreq: " + field.sumDocFreq + " (blockIn=" + blockIn + ")", indexIn);
|
||||
}
|
||||
if (previous != null) {
|
||||
|
@ -343,9 +343,6 @@ public class FSTOrdTermsReader extends FieldsProducer {
|
|||
this.totalTermFreq = new long[INTERVAL];
|
||||
this.statsBlockOrd = -1;
|
||||
this.metaBlockOrd = -1;
|
||||
if (!hasFreqs()) {
|
||||
Arrays.fill(totalTermFreq, -1);
|
||||
}
|
||||
}
|
||||
|
||||
/** Decodes stats data into term state */
|
||||
|
@ -388,6 +385,7 @@ public class FSTOrdTermsReader extends FieldsProducer {
|
|||
}
|
||||
} else {
|
||||
docFreq[i] = code;
|
||||
totalTermFreq[i] = code;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -94,8 +94,9 @@ public class FSTTermsReader extends FieldsProducer {
|
|||
int fieldNumber = in.readVInt();
|
||||
FieldInfo fieldInfo = fieldInfos.fieldInfo(fieldNumber);
|
||||
long numTerms = in.readVLong();
|
||||
long sumTotalTermFreq = fieldInfo.getIndexOptions() == IndexOptions.DOCS ? -1 : in.readVLong();
|
||||
long sumDocFreq = in.readVLong();
|
||||
long sumTotalTermFreq = in.readVLong();
|
||||
// if frequencies are omitted, sumTotalTermFreq=sumDocFreq and we only write one value
|
||||
long sumDocFreq = fieldInfo.getIndexOptions() == IndexOptions.DOCS ? sumTotalTermFreq : in.readVLong();
|
||||
int docCount = in.readVInt();
|
||||
int longsSize = in.readVInt();
|
||||
TermsReader current = new TermsReader(fieldInfo, in, numTerms, sumTotalTermFreq, sumDocFreq, docCount, longsSize);
|
||||
|
@ -126,7 +127,7 @@ public class FSTTermsReader extends FieldsProducer {
|
|||
throw new CorruptIndexException("invalid sumDocFreq: " + field.sumDocFreq + " docCount: " + field.docCount, in);
|
||||
}
|
||||
// #positions must be >= #postings
|
||||
if (field.sumTotalTermFreq != -1 && field.sumTotalTermFreq < field.sumDocFreq) {
|
||||
if (field.sumTotalTermFreq < field.sumDocFreq) {
|
||||
throw new CorruptIndexException("invalid sumTotalTermFreq: " + field.sumTotalTermFreq + " sumDocFreq: " + field.sumDocFreq, in);
|
||||
}
|
||||
if (previous != null) {
|
||||
|
@ -288,7 +289,7 @@ public class FSTTermsReader extends FieldsProducer {
|
|||
|
||||
@Override
|
||||
public long totalTermFreq() throws IOException {
|
||||
return state.totalTermFreq;
|
||||
return state.totalTermFreq == -1 ? state.docFreq : state.totalTermFreq;
|
||||
}
|
||||
|
||||
@Override
|
||||
|
|
|
@ -733,10 +733,10 @@ public final class MemoryPostingsFormat extends PostingsFormat {
|
|||
if (!didDecode) {
|
||||
buffer.reset(current.output.bytes, current.output.offset, current.output.length);
|
||||
docFreq = buffer.readVInt();
|
||||
if (field.getIndexOptions() != IndexOptions.DOCS) {
|
||||
totalTermFreq = docFreq + buffer.readVLong();
|
||||
if (field.getIndexOptions() == IndexOptions.DOCS) {
|
||||
totalTermFreq = docFreq;
|
||||
} else {
|
||||
totalTermFreq = -1;
|
||||
totalTermFreq = docFreq + buffer.readVLong();
|
||||
}
|
||||
postingsSpare.bytes = current.output.bytes;
|
||||
postingsSpare.offset = buffer.getPosition();
|
||||
|
@ -873,12 +873,15 @@ public final class MemoryPostingsFormat extends PostingsFormat {
|
|||
field = fieldInfos.fieldInfo(fieldNumber);
|
||||
if (field == null) {
|
||||
throw new CorruptIndexException("invalid field number: " + fieldNumber, in);
|
||||
} else if (field.getIndexOptions() != IndexOptions.DOCS) {
|
||||
sumTotalTermFreq = in.readVLong();
|
||||
} else {
|
||||
sumTotalTermFreq = -1;
|
||||
sumTotalTermFreq = in.readVLong();
|
||||
}
|
||||
// if frequencies are omitted, sumDocFreq = sumTotalTermFreq and we only write one value.
|
||||
if (field.getIndexOptions() == IndexOptions.DOCS) {
|
||||
sumDocFreq = sumTotalTermFreq;
|
||||
} else {
|
||||
sumDocFreq = in.readVLong();
|
||||
}
|
||||
docCount = in.readVInt();
|
||||
|
||||
fst = new FST<>(in, outputs);
|
||||
|
|
|
@ -202,7 +202,7 @@ class SimpleTextFieldsReader extends FieldsProducer {
|
|||
|
||||
@Override
|
||||
public long totalTermFreq() {
|
||||
return indexOptions == IndexOptions.DOCS ? -1 : totalTermFreq;
|
||||
return indexOptions == IndexOptions.DOCS ? docFreq : totalTermFreq;
|
||||
}
|
||||
|
||||
@Override
|
||||
|
@ -568,12 +568,13 @@ class SimpleTextFieldsReader extends FieldsProducer {
|
|||
} else if (StringHelper.startsWith(scratch.get(), DOC)) {
|
||||
docFreq++;
|
||||
sumDocFreq++;
|
||||
totalTermFreq++;
|
||||
scratchUTF16.copyUTF8Bytes(scratch.bytes(), DOC.length, scratch.length()-DOC.length);
|
||||
int docID = ArrayUtil.parseInt(scratchUTF16.chars(), 0, scratchUTF16.length());
|
||||
visitedDocs.set(docID);
|
||||
} else if (StringHelper.startsWith(scratch.get(), FREQ)) {
|
||||
scratchUTF16.copyUTF8Bytes(scratch.bytes(), FREQ.length, scratch.length()-FREQ.length);
|
||||
totalTermFreq += ArrayUtil.parseInt(scratchUTF16.chars(), 0, scratchUTF16.length());
|
||||
totalTermFreq += ArrayUtil.parseInt(scratchUTF16.chars(), 0, scratchUTF16.length()) - 1;
|
||||
} else if (StringHelper.startsWith(scratch.get(), TERM)) {
|
||||
if (lastDocsStart != -1) {
|
||||
b.add(Util.toIntsRef(lastTerm.get(), scratchIntsRef), outputs.newPair(lastDocsStart,
|
||||
|
@ -637,7 +638,7 @@ class SimpleTextFieldsReader extends FieldsProducer {
|
|||
|
||||
@Override
|
||||
public long getSumTotalTermFreq() {
|
||||
return fieldInfo.getIndexOptions() == IndexOptions.DOCS ? -1 : sumTotalTermFreq;
|
||||
return sumTotalTermFreq;
|
||||
}
|
||||
|
||||
@Override
|
||||
|
|
|
@ -288,7 +288,13 @@ public class SimpleTextTermVectorsReader extends TermVectorsReader {
|
|||
|
||||
@Override
|
||||
public long getSumTotalTermFreq() throws IOException {
|
||||
return -1;
|
||||
// TODO: make it constant-time
|
||||
long ttf = 0;
|
||||
TermsEnum iterator = iterator();
|
||||
for (BytesRef b = iterator.next(); b != null; b = iterator.next()) {
|
||||
ttf += iterator.totalTermFreq();
|
||||
}
|
||||
return ttf;
|
||||
}
|
||||
|
||||
@Override
|
||||
|
|
|
@ -80,13 +80,15 @@
|
|||
<!-- Needed in case a module needs the original build, also for compile-tools to be called from a module -->
|
||||
<property name="common.build.dir" location="${common.dir}/build"/>
|
||||
|
||||
<property name="ivy.bootstrap.version" value="2.3.0" /> <!-- UPGRADE NOTE: update disallowed.ivy.jars regex in ivy-availability-check -->
|
||||
<property name="ivy.bootstrap.version" value="2.4.0" /> <!-- UPGRADE NOTE: update disallowed_ivy_jars_regex below -->
|
||||
<property name="disallowed_ivy_jars_regex" value="ivy-2\.[0123].*\.jar"/>
|
||||
|
||||
<property name="ivy.default.configuration" value="*"/>
|
||||
|
||||
<!-- Running ant targets in parralel may require this set to false because ivy:retrieve tasks may race with resolve -->
|
||||
<property name="ivy.sync" value="true"/>
|
||||
<property name="ivy.resolution-cache.dir" location="${common.build.dir}/ivy-resolution-cache"/>
|
||||
<property name="ivy.lock-strategy" value="artifact-lock"/>
|
||||
<property name="ivy.lock-strategy" value="artifact-lock-nio"/>
|
||||
|
||||
<property name="local.caches" location="${common.dir}/../.caches" />
|
||||
<property name="tests.cachedir" location="${local.caches}/test-stats" />
|
||||
|
@ -413,12 +415,12 @@
|
|||
<property name="ivy_bootstrap_url1" value="http://repo1.maven.org/maven2"/>
|
||||
<!-- you might need to tweak this from china so it works -->
|
||||
<property name="ivy_bootstrap_url2" value="http://uk.maven.org/maven2"/>
|
||||
<property name="ivy_checksum_sha1" value="c5ebf1c253ad4959a29f4acfe696ee48cdd9f473"/>
|
||||
<property name="ivy_checksum_sha1" value="5abe4c24bbe992a9ac07ca563d5bd3e8d569e9ed"/>
|
||||
|
||||
<target name="ivy-availability-check" unless="ivy.available">
|
||||
<path id="disallowed.ivy.jars">
|
||||
<fileset dir="${ivy_install_path}">
|
||||
<filename regex="ivy-2\.[012].*\.jar"/> <!-- TODO: Update this regex to disallow Ivy versions -->
|
||||
<filename regex="${disallowed_ivy_jars_regex}"/>
|
||||
</fileset>
|
||||
</path>
|
||||
<loadresource property="disallowed.ivy.jars.list">
|
||||
|
@ -482,19 +484,20 @@
|
|||
<fail>Ivy is not available</fail>
|
||||
</target>
|
||||
|
||||
<target name="ivy-bootstrap" description="Download and install Ivy in the users ant lib dir" depends="ivy-bootstrap1,ivy-bootstrap2,ivy-checksum"/>
|
||||
<target name="ivy-bootstrap" description="Download and install Ivy in the users ant lib dir"
|
||||
depends="-ivy-bootstrap1,-ivy-bootstrap2,-ivy-checksum,-ivy-remove-old-versions"/>
|
||||
|
||||
<!-- try to download from repo1.maven.org -->
|
||||
<target name="ivy-bootstrap1">
|
||||
<target name="-ivy-bootstrap1">
|
||||
<ivy-download src="${ivy_bootstrap_url1}" dest="${ivy_install_path}"/>
|
||||
<available file="${ivy_install_path}/ivy-${ivy.bootstrap.version}.jar" property="ivy.bootstrap1.success" />
|
||||
</target>
|
||||
|
||||
<target name="ivy-bootstrap2" unless="ivy.bootstrap1.success">
|
||||
<target name="-ivy-bootstrap2" unless="ivy.bootstrap1.success">
|
||||
<ivy-download src="${ivy_bootstrap_url2}" dest="${ivy_install_path}"/>
|
||||
</target>
|
||||
|
||||
<target name="ivy-checksum">
|
||||
<target name="-ivy-checksum">
|
||||
<checksum file="${ivy_install_path}/ivy-${ivy.bootstrap.version}.jar"
|
||||
property="${ivy_checksum_sha1}"
|
||||
algorithm="SHA"
|
||||
|
@ -506,6 +509,14 @@
|
|||
</fail>
|
||||
</target>
|
||||
|
||||
<target name="-ivy-remove-old-versions">
|
||||
<delete verbose="true" failonerror="true">
|
||||
<fileset dir="${ivy_install_path}">
|
||||
<filename regex="${disallowed_ivy_jars_regex}"/>
|
||||
</fileset>
|
||||
</delete>
|
||||
</target>
|
||||
|
||||
<macrodef name="ivy-download">
|
||||
<attribute name="src"/>
|
||||
<attribute name="dest"/>
|
||||
|
@ -949,6 +960,12 @@
|
|||
<istrue value="${tests.useSecurityManager}"/>
|
||||
</condition>
|
||||
|
||||
<!-- additional arguments for Java 9+ -->
|
||||
<local name="tests.runtimespecific.args"/>
|
||||
<condition property="tests.runtimespecific.args" value="" else="--illegal-access=deny">
|
||||
<equals arg1="${build.java.runtime}" arg2="1.8"/>
|
||||
</condition>
|
||||
|
||||
<!-- create a fileset pattern that matches ${tests.class}. -->
|
||||
<loadresource property="tests.explicitclass" quiet="true">
|
||||
<propertyresource name="tests.class" />
|
||||
|
@ -1029,6 +1046,7 @@
|
|||
<jvmarg line="${tests.clover.args}"/>
|
||||
<jvmarg line="@{additional.vm.args}"/>
|
||||
<jvmarg line="${tests.asserts.args}"/>
|
||||
<jvmarg line="${tests.runtimespecific.args}"/>
|
||||
|
||||
<!-- set the number of times tests should run -->
|
||||
<sysproperty key="tests.iters" value="${tests.iters}"/>
|
||||
|
|
|
@ -180,8 +180,9 @@ public final class BlockTreeTermsReader extends FieldsProducer {
|
|||
if (fieldInfo == null) {
|
||||
throw new CorruptIndexException("invalid field number: " + field, termsIn);
|
||||
}
|
||||
final long sumTotalTermFreq = fieldInfo.getIndexOptions() == IndexOptions.DOCS ? -1 : termsIn.readVLong();
|
||||
final long sumDocFreq = termsIn.readVLong();
|
||||
final long sumTotalTermFreq = termsIn.readVLong();
|
||||
// when frequencies are omitted, sumDocFreq=sumTotalTermFreq and only one value is written.
|
||||
final long sumDocFreq = fieldInfo.getIndexOptions() == IndexOptions.DOCS ? sumTotalTermFreq : termsIn.readVLong();
|
||||
final int docCount = termsIn.readVInt();
|
||||
final int longsSize = termsIn.readVInt();
|
||||
if (longsSize < 0) {
|
||||
|
@ -195,7 +196,7 @@ public final class BlockTreeTermsReader extends FieldsProducer {
|
|||
if (sumDocFreq < docCount) { // #postings must be >= #docs with field
|
||||
throw new CorruptIndexException("invalid sumDocFreq: " + sumDocFreq + " docCount: " + docCount, termsIn);
|
||||
}
|
||||
if (sumTotalTermFreq != -1 && sumTotalTermFreq < sumDocFreq) { // #positions must be >= #postings
|
||||
if (sumTotalTermFreq < sumDocFreq) { // #positions must be >= #postings
|
||||
throw new CorruptIndexException("invalid sumTotalTermFreq: " + sumTotalTermFreq + " sumDocFreq: " + sumDocFreq, termsIn);
|
||||
}
|
||||
final long indexStartFP = indexIn.readVLong();
|
||||
|
|
|
@ -288,7 +288,9 @@ final class IntersectTermsEnumFrame {
|
|||
|
||||
// stats
|
||||
termState.docFreq = statsReader.readVInt();
|
||||
if (ite.fr.fieldInfo.getIndexOptions() != IndexOptions.DOCS) {
|
||||
if (ite.fr.fieldInfo.getIndexOptions() == IndexOptions.DOCS) {
|
||||
termState.totalTermFreq = termState.docFreq; // all postings have freq=1
|
||||
} else {
|
||||
termState.totalTermFreq = termState.docFreq + statsReader.readVLong();
|
||||
}
|
||||
// metadata
|
||||
|
|
|
@ -417,7 +417,9 @@ final class SegmentTermsEnumFrame {
|
|||
// stats
|
||||
state.docFreq = statsReader.readVInt();
|
||||
//if (DEBUG) System.out.println(" dF=" + state.docFreq);
|
||||
if (ste.fr.fieldInfo.getIndexOptions() != IndexOptions.DOCS) {
|
||||
if (ste.fr.fieldInfo.getIndexOptions() == IndexOptions.DOCS) {
|
||||
state.totalTermFreq = state.docFreq; // all postings have freq=1
|
||||
} else {
|
||||
state.totalTermFreq = state.docFreq + statsReader.readVLong();
|
||||
//if (DEBUG) System.out.println(" totTF=" + state.totalTermFreq);
|
||||
}
|
||||
|
|
|
@ -745,6 +745,7 @@ public final class CompressingTermVectorsReader extends TermVectorsReader implem
|
|||
private static class TVTerms extends Terms {
|
||||
|
||||
private final int numTerms, flags;
|
||||
private final long totalTermFreq;
|
||||
private final int[] prefixLengths, suffixLengths, termFreqs, positionIndex, positions, startOffsets, lengths, payloadIndex;
|
||||
private final BytesRef termBytes, payloadBytes;
|
||||
|
||||
|
@ -764,6 +765,11 @@ public final class CompressingTermVectorsReader extends TermVectorsReader implem
|
|||
this.payloadIndex = payloadIndex;
|
||||
this.payloadBytes = payloadBytes;
|
||||
this.termBytes = termBytes;
|
||||
long ttf = 0;
|
||||
for (int tf : termFreqs) {
|
||||
ttf += tf;
|
||||
}
|
||||
this.totalTermFreq = ttf;
|
||||
}
|
||||
|
||||
@Override
|
||||
|
@ -782,7 +788,7 @@ public final class CompressingTermVectorsReader extends TermVectorsReader implem
|
|||
|
||||
@Override
|
||||
public long getSumTotalTermFreq() throws IOException {
|
||||
return -1L;
|
||||
return totalTermFreq;
|
||||
}
|
||||
|
||||
@Override
|
||||
|
|
|
@ -21,6 +21,7 @@ import java.util.Arrays;
|
|||
import java.util.Objects;
|
||||
|
||||
import org.apache.lucene.index.FieldInfo;
|
||||
import org.apache.lucene.index.IndexReader;
|
||||
import org.apache.lucene.index.LeafReader;
|
||||
import org.apache.lucene.index.LeafReaderContext;
|
||||
import org.apache.lucene.index.PointValues;
|
||||
|
@ -356,6 +357,11 @@ abstract class RangeFieldQuery extends Query {
|
|||
}
|
||||
return scorerSupplier.get(Long.MAX_VALUE);
|
||||
}
|
||||
|
||||
@Override
|
||||
public IndexReader.CacheHelper getCacheHelper(LeafReaderContext context) {
|
||||
return context.reader().getCoreCacheHelper();
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
|
|
|
@ -138,6 +138,11 @@ abstract class SortedNumericDocValuesRangeQuery extends Query {
|
|||
}
|
||||
return new ConstantScoreScorer(this, score(), iterator);
|
||||
}
|
||||
|
||||
@Override
|
||||
public IndexReader.CacheHelper getCacheHelper(LeafReaderContext context) {
|
||||
return getDocValuesCacheHelper(field, context);
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
|
|
|
@ -181,6 +181,11 @@ abstract class SortedSetDocValuesRangeQuery extends Query {
|
|||
}
|
||||
return new ConstantScoreScorer(this, score(), iterator);
|
||||
}
|
||||
|
||||
@Override
|
||||
public IndexReader.CacheHelper getCacheHelper(LeafReaderContext context) {
|
||||
return getDocValuesCacheHelper(field, context);
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
|
|
|
@ -123,7 +123,10 @@ public abstract class BaseCompositeReader<R extends IndexReader> extends Composi
|
|||
ensureOpen();
|
||||
int total = 0; // sum freqs in subreaders
|
||||
for (int i = 0; i < subReaders.length; i++) {
|
||||
total += subReaders[i].docFreq(term);
|
||||
int sub = subReaders[i].docFreq(term);
|
||||
assert sub >= 0;
|
||||
assert sub <= subReaders[i].getDocCount(term.field());
|
||||
total += sub;
|
||||
}
|
||||
return total;
|
||||
}
|
||||
|
@ -134,9 +137,8 @@ public abstract class BaseCompositeReader<R extends IndexReader> extends Composi
|
|||
long total = 0; // sum freqs in subreaders
|
||||
for (int i = 0; i < subReaders.length; i++) {
|
||||
long sub = subReaders[i].totalTermFreq(term);
|
||||
if (sub == -1) {
|
||||
return -1;
|
||||
}
|
||||
assert sub >= 0;
|
||||
assert sub <= subReaders[i].getSumTotalTermFreq(term.field());
|
||||
total += sub;
|
||||
}
|
||||
return total;
|
||||
|
@ -148,9 +150,8 @@ public abstract class BaseCompositeReader<R extends IndexReader> extends Composi
|
|||
long total = 0; // sum doc freqs in subreaders
|
||||
for (R reader : subReaders) {
|
||||
long sub = reader.getSumDocFreq(field);
|
||||
if (sub == -1) {
|
||||
return -1; // if any of the subs doesn't support it, return -1
|
||||
}
|
||||
assert sub >= 0;
|
||||
assert sub <= reader.getSumTotalTermFreq(field);
|
||||
total += sub;
|
||||
}
|
||||
return total;
|
||||
|
@ -162,9 +163,8 @@ public abstract class BaseCompositeReader<R extends IndexReader> extends Composi
|
|||
int total = 0; // sum doc counts in subreaders
|
||||
for (R reader : subReaders) {
|
||||
int sub = reader.getDocCount(field);
|
||||
if (sub == -1) {
|
||||
return -1; // if any of the subs doesn't support it, return -1
|
||||
}
|
||||
assert sub >= 0;
|
||||
assert sub <= reader.maxDoc();
|
||||
total += sub;
|
||||
}
|
||||
return total;
|
||||
|
@ -176,9 +176,8 @@ public abstract class BaseCompositeReader<R extends IndexReader> extends Composi
|
|||
long total = 0; // sum doc total term freqs in subreaders
|
||||
for (R reader : subReaders) {
|
||||
long sub = reader.getSumTotalTermFreq(field);
|
||||
if (sub == -1) {
|
||||
return -1; // if any of the subs doesn't support it, return -1
|
||||
}
|
||||
assert sub >= 0;
|
||||
assert sub >= reader.getSumDocFreq(field);
|
||||
total += sub;
|
||||
}
|
||||
return total;
|
||||
|
|
|
@ -1253,6 +1253,10 @@ public final class CheckIndex implements Closeable {
|
|||
continue;
|
||||
}
|
||||
|
||||
if (terms.getDocCount() > maxDoc) {
|
||||
throw new RuntimeException("docCount > maxDoc for field: " + field + ", docCount=" + terms.getDocCount() + ", maxDoc=" + maxDoc);
|
||||
}
|
||||
|
||||
final boolean hasFreqs = terms.hasFreqs();
|
||||
final boolean hasPositions = terms.hasPositions();
|
||||
final boolean hasPayloads = terms.hasPayloads();
|
||||
|
@ -1295,12 +1299,6 @@ public final class CheckIndex implements Closeable {
|
|||
throw new RuntimeException("field \"" + field + "\" should have hasFreqs=" + expectedHasFreqs + " but got " + hasFreqs);
|
||||
}
|
||||
|
||||
if (hasFreqs == false) {
|
||||
if (terms.getSumTotalTermFreq() != -1) {
|
||||
throw new RuntimeException("field \"" + field + "\" hasFreqs is false, but Terms.getSumTotalTermFreq()=" + terms.getSumTotalTermFreq() + " (should be -1)");
|
||||
}
|
||||
}
|
||||
|
||||
if (!isVectors) {
|
||||
final boolean expectedHasPositions = fieldInfo.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) >= 0;
|
||||
if (hasPositions != expectedHasPositions) {
|
||||
|
@ -1375,8 +1373,8 @@ public final class CheckIndex implements Closeable {
|
|||
postings = termsEnum.postings(postings, PostingsEnum.ALL);
|
||||
|
||||
if (hasFreqs == false) {
|
||||
if (termsEnum.totalTermFreq() != -1) {
|
||||
throw new RuntimeException("field \"" + field + "\" hasFreqs is false, but TermsEnum.totalTermFreq()=" + termsEnum.totalTermFreq() + " (should be -1)");
|
||||
if (termsEnum.totalTermFreq() != termsEnum.docFreq()) {
|
||||
throw new RuntimeException("field \"" + field + "\" hasFreqs is false, but TermsEnum.totalTermFreq()=" + termsEnum.totalTermFreq() + " (should be " + termsEnum.docFreq() + ")");
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -1406,14 +1404,11 @@ public final class CheckIndex implements Closeable {
|
|||
break;
|
||||
}
|
||||
visitedDocs.set(doc);
|
||||
int freq = -1;
|
||||
if (hasFreqs) {
|
||||
freq = postings.freq();
|
||||
int freq = postings.freq();
|
||||
if (freq <= 0) {
|
||||
throw new RuntimeException("term " + term + ": doc " + doc + ": freq " + freq + " is out of bounds");
|
||||
}
|
||||
totalTermFreq += freq;
|
||||
} else {
|
||||
if (hasFreqs == false) {
|
||||
// When a field didn't index freq, it must
|
||||
// consistently "lie" and pretend that freq was
|
||||
// 1:
|
||||
|
@ -1421,6 +1416,8 @@ public final class CheckIndex implements Closeable {
|
|||
throw new RuntimeException("term " + term + ": doc " + doc + ": freq " + freq + " != 1 when Terms.hasFreqs() is false");
|
||||
}
|
||||
}
|
||||
totalTermFreq += freq;
|
||||
|
||||
if (liveDocs == null || liveDocs.get(doc)) {
|
||||
hasNonDeletedDocs = true;
|
||||
status.totFreq++;
|
||||
|
@ -1490,12 +1487,13 @@ public final class CheckIndex implements Closeable {
|
|||
}
|
||||
|
||||
final long totalTermFreq2 = termsEnum.totalTermFreq();
|
||||
final boolean hasTotalTermFreq = hasFreqs && totalTermFreq2 != -1;
|
||||
|
||||
if (docCount != docFreq) {
|
||||
throw new RuntimeException("term " + term + " docFreq=" + docFreq + " != tot docs w/o deletions " + docCount);
|
||||
}
|
||||
if (hasTotalTermFreq) {
|
||||
if (docFreq > terms.getDocCount()) {
|
||||
throw new RuntimeException("term " + term + " docFreq=" + docFreq + " > docCount=" + terms.getDocCount());
|
||||
}
|
||||
if (totalTermFreq2 <= 0) {
|
||||
throw new RuntimeException("totalTermFreq: " + totalTermFreq2 + " is out of bounds");
|
||||
}
|
||||
|
@ -1503,6 +1501,11 @@ public final class CheckIndex implements Closeable {
|
|||
if (totalTermFreq != totalTermFreq2) {
|
||||
throw new RuntimeException("term " + term + " totalTermFreq=" + totalTermFreq2 + " != recomputed totalTermFreq=" + totalTermFreq);
|
||||
}
|
||||
if (totalTermFreq2 < docFreq) {
|
||||
throw new RuntimeException("totalTermFreq: " + totalTermFreq2 + " is out of bounds, docFreq=" + docFreq);
|
||||
}
|
||||
if (hasFreqs == false && totalTermFreq != docFreq) {
|
||||
throw new RuntimeException("term " + term + " totalTermFreq=" + totalTermFreq + " != docFreq=" + docFreq);
|
||||
}
|
||||
|
||||
// Test skipping
|
||||
|
@ -1626,22 +1629,22 @@ public final class CheckIndex implements Closeable {
|
|||
}
|
||||
status.blockTreeStats.put(field, stats);
|
||||
|
||||
if (sumTotalTermFreq != 0) {
|
||||
final long v = fields.terms(field).getSumTotalTermFreq();
|
||||
if (v != -1 && sumTotalTermFreq != v) {
|
||||
throw new RuntimeException("sumTotalTermFreq for field " + field + "=" + v + " != recomputed sumTotalTermFreq=" + sumTotalTermFreq);
|
||||
}
|
||||
final long actualSumDocFreq = fields.terms(field).getSumDocFreq();
|
||||
if (sumDocFreq != actualSumDocFreq) {
|
||||
throw new RuntimeException("sumDocFreq for field " + field + "=" + actualSumDocFreq + " != recomputed sumDocFreq=" + sumDocFreq);
|
||||
}
|
||||
|
||||
if (sumDocFreq != 0) {
|
||||
final long v = fields.terms(field).getSumDocFreq();
|
||||
if (v != -1 && sumDocFreq != v) {
|
||||
throw new RuntimeException("sumDocFreq for field " + field + "=" + v + " != recomputed sumDocFreq=" + sumDocFreq);
|
||||
final long actualSumTotalTermFreq = fields.terms(field).getSumTotalTermFreq();
|
||||
if (sumTotalTermFreq != actualSumTotalTermFreq) {
|
||||
throw new RuntimeException("sumTotalTermFreq for field " + field + "=" + actualSumTotalTermFreq + " != recomputed sumTotalTermFreq=" + sumTotalTermFreq);
|
||||
}
|
||||
|
||||
if (hasFreqs == false && sumTotalTermFreq != sumDocFreq) {
|
||||
throw new RuntimeException("sumTotalTermFreq for field " + field + " should be " + sumDocFreq + ", got sumTotalTermFreq=" + sumTotalTermFreq);
|
||||
}
|
||||
|
||||
final int v = fieldTerms.getDocCount();
|
||||
if (v != -1 && visitedDocs.cardinality() != v) {
|
||||
if (visitedDocs.cardinality() != v) {
|
||||
throw new RuntimeException("docCount for field " + field + "=" + v + " != recomputed docCount=" + visitedDocs.cardinality());
|
||||
}
|
||||
|
||||
|
|
|
@ -450,25 +450,25 @@ public abstract class IndexReader implements Closeable {
|
|||
|
||||
/**
|
||||
* Returns the total number of occurrences of {@code term} across all
|
||||
* documents (the sum of the freq() for each doc that has this term). This
|
||||
* will be -1 if the codec doesn't support this measure. Note that, like other
|
||||
* term measures, this measure does not take deleted documents into account.
|
||||
* documents (the sum of the freq() for each doc that has this term).
|
||||
* Note that, like other term measures, this measure does not take
|
||||
* deleted documents into account.
|
||||
*/
|
||||
public abstract long totalTermFreq(Term term) throws IOException;
|
||||
|
||||
/**
|
||||
* Returns the sum of {@link TermsEnum#docFreq()} for all terms in this field,
|
||||
* or -1 if this measure isn't stored by the codec. Note that, just like other
|
||||
* term measures, this measure does not take deleted documents into account.
|
||||
* Returns the sum of {@link TermsEnum#docFreq()} for all terms in this field.
|
||||
* Note that, just like other term measures, this measure does not take deleted
|
||||
* documents into account.
|
||||
*
|
||||
* @see Terms#getSumDocFreq()
|
||||
*/
|
||||
public abstract long getSumDocFreq(String field) throws IOException;
|
||||
|
||||
/**
|
||||
* Returns the number of documents that have at least one term for this field,
|
||||
* or -1 if this measure isn't stored by the codec. Note that, just like other
|
||||
* term measures, this measure does not take deleted documents into account.
|
||||
* Returns the number of documents that have at least one term for this field.
|
||||
* Note that, just like other term measures, this measure does not take deleted
|
||||
* documents into account.
|
||||
*
|
||||
* @see Terms#getDocCount()
|
||||
*/
|
||||
|
@ -476,9 +476,8 @@ public abstract class IndexReader implements Closeable {
|
|||
|
||||
/**
|
||||
* Returns the sum of {@link TermsEnum#totalTermFreq} for all terms in this
|
||||
* field, or -1 if this measure isn't stored by the codec (or if this fields
|
||||
* omits term freq and positions). Note that, just like other term measures,
|
||||
* this measure does not take deleted documents into account.
|
||||
* field. Note that, just like other term measures, this measure does not take
|
||||
* deleted documents into account.
|
||||
*
|
||||
* @see Terms#getSumTotalTermFreq()
|
||||
*/
|
||||
|
|
|
@ -149,9 +149,7 @@ public final class MultiTerms extends Terms {
|
|||
long sum = 0;
|
||||
for(Terms terms : subs) {
|
||||
final long v = terms.getSumTotalTermFreq();
|
||||
if (v == -1) {
|
||||
return -1;
|
||||
}
|
||||
assert v != -1;
|
||||
sum += v;
|
||||
}
|
||||
return sum;
|
||||
|
@ -162,9 +160,7 @@ public final class MultiTerms extends Terms {
|
|||
long sum = 0;
|
||||
for(Terms terms : subs) {
|
||||
final long v = terms.getSumDocFreq();
|
||||
if (v == -1) {
|
||||
return -1;
|
||||
}
|
||||
assert v != -1;
|
||||
sum += v;
|
||||
}
|
||||
return sum;
|
||||
|
@ -175,9 +171,7 @@ public final class MultiTerms extends Terms {
|
|||
int sum = 0;
|
||||
for(Terms terms : subs) {
|
||||
final int v = terms.getDocCount();
|
||||
if (v == -1) {
|
||||
return -1;
|
||||
}
|
||||
assert v != -1;
|
||||
sum += v;
|
||||
}
|
||||
return sum;
|
||||
|
|
|
@ -326,9 +326,7 @@ public final class MultiTermsEnum extends TermsEnum {
|
|||
long sum = 0;
|
||||
for(int i=0;i<numTop;i++) {
|
||||
final long v = top[i].terms.totalTermFreq();
|
||||
if (v == -1) {
|
||||
return v;
|
||||
}
|
||||
assert v != -1;
|
||||
sum += v;
|
||||
}
|
||||
return sum;
|
||||
|
|
|
@ -101,7 +101,7 @@ class SortedDocValuesTermsEnum extends TermsEnum {
|
|||
|
||||
@Override
|
||||
public long totalTermFreq() {
|
||||
return -1;
|
||||
throw new UnsupportedOperationException();
|
||||
}
|
||||
|
||||
@Override
|
||||
|
|
|
@ -101,7 +101,7 @@ class SortedSetDocValuesTermsEnum extends TermsEnum {
|
|||
|
||||
@Override
|
||||
public long totalTermFreq() {
|
||||
return -1;
|
||||
throw new UnsupportedOperationException();
|
||||
}
|
||||
|
||||
@Override
|
||||
|
|
|
@ -141,11 +141,11 @@ public final class TermContext {
|
|||
|
||||
/** Expert: Accumulate term statistics. */
|
||||
public void accumulateStatistics(final int docFreq, final long totalTermFreq) {
|
||||
assert docFreq >= 0;
|
||||
assert totalTermFreq >= 0;
|
||||
assert docFreq <= totalTermFreq;
|
||||
this.docFreq += docFreq;
|
||||
if (this.totalTermFreq >= 0 && totalTermFreq >= 0)
|
||||
this.totalTermFreq += totalTermFreq;
|
||||
else
|
||||
this.totalTermFreq = -1;
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
|
@ -101,23 +101,19 @@ public abstract class Terms {
|
|||
public abstract long size() throws IOException;
|
||||
|
||||
/** Returns the sum of {@link TermsEnum#totalTermFreq} for
|
||||
* all terms in this field, or -1 if this measure isn't
|
||||
* stored by the codec (or if this fields omits term freq
|
||||
* and positions). Note that, just like other term
|
||||
* all terms in this field. Note that, just like other term
|
||||
* measures, this measure does not take deleted documents
|
||||
* into account. */
|
||||
public abstract long getSumTotalTermFreq() throws IOException;
|
||||
|
||||
/** Returns the sum of {@link TermsEnum#docFreq()} for
|
||||
* all terms in this field, or -1 if this measure isn't
|
||||
* stored by the codec. Note that, just like other term
|
||||
* all terms in this field. Note that, just like other term
|
||||
* measures, this measure does not take deleted documents
|
||||
* into account. */
|
||||
public abstract long getSumDocFreq() throws IOException;
|
||||
|
||||
/** Returns the number of documents that have at least one
|
||||
* term for this field, or -1 if this measure isn't
|
||||
* stored by the codec. Note that, just like other term
|
||||
* term for this field. Note that, just like other term
|
||||
* measures, this measure does not take deleted documents
|
||||
* into account. */
|
||||
public abstract int getDocCount() throws IOException;
|
||||
|
|
|
@ -131,8 +131,7 @@ public abstract class TermsEnum implements BytesRefIterator {
|
|||
|
||||
/** Returns the total number of occurrences of this term
|
||||
* across all documents (the sum of the freq() for each
|
||||
* doc that has this term). This will be -1 if the
|
||||
* codec doesn't support this measure. Note that, like
|
||||
* doc that has this term). Note that, like
|
||||
* other term measures, this measure does not take
|
||||
* deleted documents into account. */
|
||||
public abstract long totalTermFreq() throws IOException;
|
||||
|
|
|
@ -148,12 +148,8 @@
|
|||
* deleted documents, when segments are merged the statistic is updated as
|
||||
* those deleted documents are merged away.
|
||||
* <li>{@link org.apache.lucene.index.TermsEnum#totalTermFreq}: Returns the number
|
||||
* of occurrences of this term across all documents. Note that this statistic
|
||||
* is unavailable (returns <code>-1</code>) if term frequencies were omitted
|
||||
* from the index
|
||||
* ({@link org.apache.lucene.index.IndexOptions#DOCS DOCS})
|
||||
* for the field. Like docFreq(), it will also count occurrences that appear in
|
||||
* deleted documents.
|
||||
* of occurrences of this term across all documents. Like docFreq(), it will
|
||||
* also count occurrences that appear in deleted documents.
|
||||
* </ul>
|
||||
* <a name="fieldstats"></a>
|
||||
* <h3>
|
||||
|
@ -180,10 +176,7 @@
|
|||
* of tokens for the field. This can be thought of as the sum of
|
||||
* {@link org.apache.lucene.index.TermsEnum#totalTermFreq} across all terms in the
|
||||
* field, and like totalTermFreq() it will also count occurrences that appear in
|
||||
* deleted documents, and will be unavailable (returns <code>-1</code>) if term
|
||||
* frequencies were omitted from the index
|
||||
* ({@link org.apache.lucene.index.IndexOptions#DOCS DOCS})
|
||||
* for the field.
|
||||
* deleted documents.
|
||||
* </ul>
|
||||
* <a name="segmentstats"></a>
|
||||
* <h3>
|
||||
|
|
|
@ -277,12 +277,8 @@ public final class BlendedTermQuery extends Query {
|
|||
long ttf = 0;
|
||||
for (TermContext ctx : contexts) {
|
||||
df = Math.max(df, ctx.docFreq());
|
||||
if (ctx.totalTermFreq() == -1L) {
|
||||
ttf = -1L;
|
||||
} else if (ttf != -1L) {
|
||||
ttf += ctx.totalTermFreq();
|
||||
}
|
||||
}
|
||||
|
||||
for (int i = 0; i < contexts.length; ++i) {
|
||||
contexts[i] = adjustFrequencies(reader.getContext(), contexts[i], df, ttf);
|
||||
|
|
|
@ -26,6 +26,7 @@ import java.util.List;
|
|||
import java.util.Map;
|
||||
import java.util.Set;
|
||||
|
||||
import org.apache.lucene.index.IndexReader;
|
||||
import org.apache.lucene.index.LeafReaderContext;
|
||||
import org.apache.lucene.index.Term;
|
||||
import org.apache.lucene.search.BooleanClause.Occur;
|
||||
|
@ -299,6 +300,11 @@ final class BooleanWeight extends Weight {
|
|||
return scorerSupplier.get(Long.MAX_VALUE);
|
||||
}
|
||||
|
||||
@Override
|
||||
public IndexReader.CacheHelper getCacheHelper(LeafReaderContext context) {
|
||||
return getCacheHelper(context, weights);
|
||||
}
|
||||
|
||||
@Override
|
||||
public ScorerSupplier scorerSupplier(LeafReaderContext context) throws IOException {
|
||||
int minShouldMatch = query.getMinimumNumberShouldMatch();
|
||||
|
|
|
@ -23,7 +23,27 @@ import org.apache.lucene.index.Terms; // javadocs
|
|||
|
||||
|
||||
/**
|
||||
* Contains statistics for a collection (field)
|
||||
* Contains statistics for a collection (field).
|
||||
* <p>
|
||||
* This class holds statistics across all documents for scoring purposes:
|
||||
* <ul>
|
||||
* <li> {@link #maxDoc()}: number of documents.
|
||||
* <li> {@link #docCount()}: number of documents that contain this field.
|
||||
* <li> {@link #sumDocFreq()}: number of postings-list entries.
|
||||
* <li> {@link #sumTotalTermFreq()}: number of tokens.
|
||||
* </ul>
|
||||
* <p>
|
||||
* The following conditions are always true:
|
||||
* <ul>
|
||||
* <li> All statistics are positive integers: never zero or negative.
|
||||
* <li> {@code docCount} <= {@code maxDoc}
|
||||
* <li> {@code docCount} <= {@code sumDocFreq} <= {@code sumTotalTermFreq}
|
||||
* </ul>
|
||||
* <p>
|
||||
* Values may include statistics on deleted documents that have not yet been merged away.
|
||||
* <p>
|
||||
* Be careful when performing calculations on these values because they are represented
|
||||
* as 64-bit integer values, you may need to cast to {@code double} for your use.
|
||||
* @lucene.experimental
|
||||
*/
|
||||
public class CollectionStatistics {
|
||||
|
@ -51,34 +71,24 @@ public class CollectionStatistics {
|
|||
if (maxDoc <= 0) {
|
||||
throw new IllegalArgumentException("maxDoc must be positive, maxDoc: " + maxDoc);
|
||||
}
|
||||
if (docCount != -1) {
|
||||
if (docCount <= 0) {
|
||||
throw new IllegalArgumentException("docCount must be positive, docCount: " + docCount);
|
||||
}
|
||||
if (docCount > maxDoc) {
|
||||
throw new IllegalArgumentException("docCount must not exceed maxDoc, docCount: " + docCount + ", maxDoc: " + maxDoc);
|
||||
}
|
||||
}
|
||||
if (sumDocFreq != -1) {
|
||||
if (sumDocFreq <= 0) {
|
||||
throw new IllegalArgumentException("sumDocFreq must be positive, sumDocFreq: " + sumDocFreq);
|
||||
}
|
||||
if (docCount != -1) {
|
||||
if (sumDocFreq < docCount) {
|
||||
throw new IllegalArgumentException("sumDocFreq must be at least docCount, sumDocFreq: " + sumDocFreq + ", docCount: " + docCount);
|
||||
}
|
||||
}
|
||||
}
|
||||
if (sumTotalTermFreq != -1) {
|
||||
if (sumTotalTermFreq <= 0) {
|
||||
throw new IllegalArgumentException("sumTotalTermFreq must be positive, sumTotalTermFreq: " + sumTotalTermFreq);
|
||||
}
|
||||
if (sumDocFreq != -1) {
|
||||
if (sumTotalTermFreq < sumDocFreq) {
|
||||
throw new IllegalArgumentException("sumTotalTermFreq must be at least sumDocFreq, sumTotalTermFreq: " + sumTotalTermFreq + ", sumDocFreq: " + sumDocFreq);
|
||||
}
|
||||
}
|
||||
}
|
||||
this.field = field;
|
||||
this.maxDoc = maxDoc;
|
||||
this.docCount = docCount;
|
||||
|
@ -86,33 +96,65 @@ public class CollectionStatistics {
|
|||
this.sumDocFreq = sumDocFreq;
|
||||
}
|
||||
|
||||
/** returns the field name */
|
||||
/**
|
||||
* The field's name.
|
||||
* <p>
|
||||
* This value is never {@code null}.
|
||||
* @return field's name, not {@code null}
|
||||
*/
|
||||
public final String field() {
|
||||
return field;
|
||||
}
|
||||
|
||||
/** returns the total number of documents, regardless of
|
||||
/**
|
||||
* The total number of documents, regardless of
|
||||
* whether they all contain values for this field.
|
||||
* @see IndexReader#maxDoc() */
|
||||
* <p>
|
||||
* This value is always a positive number.
|
||||
* @return total number of documents, in the range [1 .. {@link Long#MAX_VALUE}]
|
||||
* @see IndexReader#maxDoc()
|
||||
*/
|
||||
public final long maxDoc() {
|
||||
return maxDoc;
|
||||
}
|
||||
|
||||
/** returns the total number of documents that
|
||||
* have at least one term for this field.
|
||||
* @see Terms#getDocCount() */
|
||||
/**
|
||||
* The total number of documents that have at least
|
||||
* one term for this field.
|
||||
* <p>
|
||||
* This value is always a positive number, and never
|
||||
* exceeds {@link #maxDoc()}.
|
||||
* @return total number of documents containing this field, in the range [1 .. {@link #maxDoc()}]
|
||||
* @see Terms#getDocCount()
|
||||
*/
|
||||
public final long docCount() {
|
||||
return docCount;
|
||||
}
|
||||
|
||||
/** returns the total number of tokens for this field
|
||||
* @see Terms#getSumTotalTermFreq() */
|
||||
/**
|
||||
* The total number of tokens for this field.
|
||||
* This is the "word count" for this field across all documents.
|
||||
* It is the sum of {@link TermStatistics#totalTermFreq()} across all terms.
|
||||
* It is also the sum of each document's field length across all documents.
|
||||
* <p>
|
||||
* This value is always a positive number, and always at least {@link #sumDocFreq()}.
|
||||
* @return total number of tokens in the field, in the range [{@link #sumDocFreq()} .. {@link Long#MAX_VALUE}]
|
||||
* @see Terms#getSumTotalTermFreq()
|
||||
*/
|
||||
public final long sumTotalTermFreq() {
|
||||
return sumTotalTermFreq;
|
||||
}
|
||||
|
||||
/** returns the total number of postings for this field
|
||||
* @see Terms#getSumDocFreq() */
|
||||
/**
|
||||
* The total number of posting list entries for this field.
|
||||
* This is the sum of term-document pairs: the sum of {@link TermStatistics#docFreq()} across all terms.
|
||||
* It is also the sum of each document's unique term count for this field across all documents.
|
||||
* <p>
|
||||
* This value is always a positive number, always at least {@link #docCount()}, and never
|
||||
* exceeds {@link #sumTotalTermFreq()}.
|
||||
* @return number of posting list entries, in the range [{@link #docCount()} .. {@link #sumTotalTermFreq()}]
|
||||
* @see Terms#getSumDocFreq()
|
||||
*/
|
||||
public final long sumDocFreq() {
|
||||
return sumDocFreq;
|
||||
}
|
||||
|
|
|
@ -167,6 +167,11 @@ public final class ConstantScoreQuery extends Query {
|
|||
return scorerSupplier.get(Long.MAX_VALUE);
|
||||
}
|
||||
|
||||
@Override
|
||||
public IndexReader.CacheHelper getCacheHelper(LeafReaderContext context) {
|
||||
return innerWeight.getCacheHelper(context);
|
||||
}
|
||||
|
||||
};
|
||||
} else {
|
||||
return innerWeight;
|
||||
|
|
|
@ -137,6 +137,11 @@ public final class DisjunctionMaxQuery extends Query implements Iterable<Query>
|
|||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public IndexReader.CacheHelper getCacheHelper(LeafReaderContext context) {
|
||||
return getCacheHelper(context, weights);
|
||||
}
|
||||
|
||||
/** Explain the score we computed for doc */
|
||||
@Override
|
||||
public Explanation explain(LeafReaderContext context, int doc) throws IOException {
|
||||
|
|
|
@ -23,6 +23,7 @@ import java.util.Objects;
|
|||
import org.apache.lucene.index.DocValuesType;
|
||||
import org.apache.lucene.index.FieldInfo;
|
||||
import org.apache.lucene.index.FieldInfos;
|
||||
import org.apache.lucene.index.IndexReader;
|
||||
import org.apache.lucene.index.LeafReader;
|
||||
import org.apache.lucene.index.LeafReaderContext;
|
||||
|
||||
|
@ -97,6 +98,11 @@ public final class DocValuesFieldExistsQuery extends Query {
|
|||
|
||||
return new ConstantScoreScorer(this, score(), iterator);
|
||||
}
|
||||
|
||||
@Override
|
||||
public IndexReader.CacheHelper getCacheHelper(LeafReaderContext context) {
|
||||
return getDocValuesCacheHelper(field, context);
|
||||
}
|
||||
};
|
||||
}
|
||||
}
|
||||
|
|
|
@ -86,17 +86,17 @@ public final class DocValuesRewriteMethod extends MultiTermQuery.RewriteMethod {
|
|||
|
||||
@Override
|
||||
public long getSumTotalTermFreq() {
|
||||
return -1;
|
||||
throw new UnsupportedOperationException();
|
||||
}
|
||||
|
||||
@Override
|
||||
public long getSumDocFreq() {
|
||||
return -1;
|
||||
throw new UnsupportedOperationException();
|
||||
}
|
||||
|
||||
@Override
|
||||
public int getDocCount() {
|
||||
return -1;
|
||||
throw new UnsupportedOperationException();
|
||||
}
|
||||
|
||||
@Override
|
||||
|
@ -158,6 +158,11 @@ public final class DocValuesRewriteMethod extends MultiTermQuery.RewriteMethod {
|
|||
}
|
||||
});
|
||||
}
|
||||
|
||||
@Override
|
||||
public IndexReader.CacheHelper getCacheHelper(LeafReaderContext context) {
|
||||
return getDocValuesCacheHelper(query.field, context);
|
||||
}
|
||||
};
|
||||
}
|
||||
}
|
||||
|
|
|
@ -19,6 +19,7 @@ package org.apache.lucene.search;
|
|||
import java.io.IOException;
|
||||
import java.util.Set;
|
||||
|
||||
import org.apache.lucene.index.IndexReader;
|
||||
import org.apache.lucene.index.LeafReaderContext;
|
||||
import org.apache.lucene.index.Term;
|
||||
|
||||
|
@ -55,6 +56,11 @@ public abstract class FilterWeight extends Weight {
|
|||
this.in = weight;
|
||||
}
|
||||
|
||||
@Override
|
||||
public IndexReader.CacheHelper getCacheHelper(LeafReaderContext context) {
|
||||
return in.getCacheHelper(context);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void extractTerms(Set<Term> terms) {
|
||||
in.extractTerms(terms);
|
||||
|
|
|
@ -169,6 +169,13 @@ public final class IndexOrDocValuesQuery extends Query {
|
|||
}
|
||||
return scorerSupplier.get(Long.MAX_VALUE);
|
||||
}
|
||||
|
||||
@Override
|
||||
public IndexReader.CacheHelper getCacheHelper(LeafReaderContext context) {
|
||||
// Both index and dv query should return the same values, so we can use
|
||||
// the index query's cachehelper here
|
||||
return indexWeight.getCacheHelper(context);
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
|
|
|
@ -722,8 +722,7 @@ public class LRUQueryCache implements QueryCache, Accountable {
|
|||
policy.onUse(getQuery());
|
||||
}
|
||||
|
||||
// TODO: should it be pluggable, eg. for queries that run on doc values?
|
||||
final IndexReader.CacheHelper cacheHelper = context.reader().getCoreCacheHelper();
|
||||
final IndexReader.CacheHelper cacheHelper = in.getCacheHelper(context);
|
||||
if (cacheHelper == null) {
|
||||
// this segment is not suitable for caching
|
||||
return in.scorerSupplier(context);
|
||||
|
@ -788,14 +787,18 @@ public class LRUQueryCache implements QueryCache, Accountable {
|
|||
return scorerSupplier.get(Long.MAX_VALUE);
|
||||
}
|
||||
|
||||
@Override
|
||||
public IndexReader.CacheHelper getCacheHelper(LeafReaderContext context) {
|
||||
return in.getCacheHelper(context);
|
||||
}
|
||||
|
||||
@Override
|
||||
public BulkScorer bulkScorer(LeafReaderContext context) throws IOException {
|
||||
if (used.compareAndSet(false, true)) {
|
||||
policy.onUse(getQuery());
|
||||
}
|
||||
|
||||
// TODO: should it be pluggable, eg. for queries that run on doc values?
|
||||
final IndexReader.CacheHelper cacheHelper = context.reader().getCoreCacheHelper();
|
||||
final IndexReader.CacheHelper cacheHelper = in.getCacheHelper(context);
|
||||
if (cacheHelper == null) {
|
||||
// this segment is not suitable for caching
|
||||
return in.bulkScorer(context);
|
||||
|
|
|
@ -19,6 +19,7 @@ package org.apache.lucene.search;
|
|||
|
||||
import java.io.IOException;
|
||||
|
||||
import org.apache.lucene.index.IndexReader;
|
||||
import org.apache.lucene.index.LeafReaderContext;
|
||||
import org.apache.lucene.util.Bits;
|
||||
|
||||
|
@ -39,6 +40,12 @@ public final class MatchAllDocsQuery extends Query {
|
|||
public Scorer scorer(LeafReaderContext context) throws IOException {
|
||||
return new ConstantScoreScorer(this, score(), DocIdSetIterator.all(context.reader().maxDoc()));
|
||||
}
|
||||
|
||||
@Override
|
||||
public IndexReader.CacheHelper getCacheHelper(LeafReaderContext context) {
|
||||
return context.reader().getCoreCacheHelper();
|
||||
}
|
||||
|
||||
@Override
|
||||
public BulkScorer bulkScorer(LeafReaderContext context) throws IOException {
|
||||
final float score = score();
|
||||
|
|
|
@ -20,6 +20,7 @@ package org.apache.lucene.search;
|
|||
import java.io.IOException;
|
||||
import java.util.Set;
|
||||
|
||||
import org.apache.lucene.index.IndexReader;
|
||||
import org.apache.lucene.index.LeafReaderContext;
|
||||
import org.apache.lucene.index.Term;
|
||||
|
||||
|
@ -58,6 +59,11 @@ public class MatchNoDocsQuery extends Query {
|
|||
return null;
|
||||
}
|
||||
|
||||
@Override
|
||||
public IndexReader.CacheHelper getCacheHelper(LeafReaderContext context) {
|
||||
return context.reader().getCoreCacheHelper();
|
||||
}
|
||||
|
||||
};
|
||||
}
|
||||
|
||||
|
|
|
@ -291,6 +291,11 @@ public class MultiPhraseQuery extends Query {
|
|||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public IndexReader.CacheHelper getCacheHelper(LeafReaderContext context) {
|
||||
return context.reader().getCoreCacheHelper();
|
||||
}
|
||||
|
||||
@Override
|
||||
public Explanation explain(LeafReaderContext context, int doc) throws IOException {
|
||||
Scorer scorer = scorer(context);
|
||||
|
|
|
@ -22,6 +22,7 @@ import java.util.ArrayList;
|
|||
import java.util.List;
|
||||
import java.util.Objects;
|
||||
|
||||
import org.apache.lucene.index.IndexReader;
|
||||
import org.apache.lucene.index.LeafReaderContext;
|
||||
import org.apache.lucene.index.PostingsEnum;
|
||||
import org.apache.lucene.index.Term;
|
||||
|
@ -211,6 +212,11 @@ final class MultiTermQueryConstantScoreWrapper<Q extends MultiTermQuery> extends
|
|||
return scorer(weightOrBitSet.set);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public IndexReader.CacheHelper getCacheHelper(LeafReaderContext context) {
|
||||
return context.reader().getCoreCacheHelper();
|
||||
}
|
||||
};
|
||||
}
|
||||
}
|
||||
|
|
|
@ -23,6 +23,7 @@ import java.util.Objects;
|
|||
import org.apache.lucene.document.StringField;
|
||||
import org.apache.lucene.index.FieldInfo;
|
||||
import org.apache.lucene.index.FieldInfos;
|
||||
import org.apache.lucene.index.IndexReader;
|
||||
import org.apache.lucene.index.LeafReader;
|
||||
import org.apache.lucene.index.LeafReaderContext;
|
||||
|
||||
|
@ -75,6 +76,11 @@ public final class NormsFieldExistsQuery extends Query {
|
|||
DocIdSetIterator iterator = reader.getNormValues(field);
|
||||
return new ConstantScoreScorer(this, score(), iterator);
|
||||
}
|
||||
|
||||
@Override
|
||||
public IndexReader.CacheHelper getCacheHelper(LeafReaderContext context) {
|
||||
return context.reader().getCoreCacheHelper();
|
||||
}
|
||||
};
|
||||
}
|
||||
}
|
||||
|
|
|
@ -442,6 +442,11 @@ public class PhraseQuery extends Query {
|
|||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public IndexReader.CacheHelper getCacheHelper(LeafReaderContext context) {
|
||||
return context.reader().getCoreCacheHelper();
|
||||
}
|
||||
|
||||
// only called from assert
|
||||
private boolean termNotInReader(LeafReader reader, Term term) throws IOException {
|
||||
return reader.docFreq(term) == 0;
|
||||
|
@ -492,14 +497,13 @@ public class PhraseQuery extends Query {
|
|||
* of processing the occurrences of a term
|
||||
* in a document that contains the term.
|
||||
* This is for use by {@link TwoPhaseIterator#matchCost} implementations.
|
||||
* <br>This may be inaccurate when {@link TermsEnum#totalTermFreq()} is not available.
|
||||
* @param termsEnum The term is the term at which this TermsEnum is positioned.
|
||||
*/
|
||||
static float termPositionsCost(TermsEnum termsEnum) throws IOException {
|
||||
int docFreq = termsEnum.docFreq();
|
||||
assert docFreq > 0;
|
||||
long totalTermFreq = termsEnum.totalTermFreq(); // -1 when not available
|
||||
float expOccurrencesInMatchingDoc = (totalTermFreq < docFreq) ? 1 : (totalTermFreq / (float) docFreq);
|
||||
long totalTermFreq = termsEnum.totalTermFreq();
|
||||
float expOccurrencesInMatchingDoc = totalTermFreq / (float) docFreq;
|
||||
return TERM_POSNS_SEEK_OPS_PER_DOC + expOccurrencesInMatchingDoc * TERM_OPS_PER_POS;
|
||||
}
|
||||
|
||||
|
|
|
@ -23,6 +23,7 @@ import java.util.Collection;
|
|||
import java.util.Iterator;
|
||||
import java.util.NoSuchElementException;
|
||||
import org.apache.lucene.document.IntPoint;
|
||||
import org.apache.lucene.index.IndexReader;
|
||||
import org.apache.lucene.index.LeafReader;
|
||||
import org.apache.lucene.index.LeafReaderContext;
|
||||
import org.apache.lucene.index.PointValues.IntersectVisitor;
|
||||
|
@ -150,6 +151,11 @@ public abstract class PointInSetQuery extends Query {
|
|||
|
||||
return new ConstantScoreScorer(this, score(), result.build().iterator());
|
||||
}
|
||||
|
||||
@Override
|
||||
public IndexReader.CacheHelper getCacheHelper(LeafReaderContext context) {
|
||||
return context.reader().getCoreCacheHelper();
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
|
|
|
@ -20,6 +20,7 @@ import java.io.IOException;
|
|||
import java.util.Arrays;
|
||||
import java.util.Objects;
|
||||
|
||||
import org.apache.lucene.index.IndexReader;
|
||||
import org.apache.lucene.index.PointValues;
|
||||
import org.apache.lucene.index.PointValues.IntersectVisitor;
|
||||
import org.apache.lucene.index.PointValues.Relation;
|
||||
|
@ -321,6 +322,11 @@ public abstract class PointRangeQuery extends Query {
|
|||
}
|
||||
return scorerSupplier.get(Long.MAX_VALUE);
|
||||
}
|
||||
|
||||
@Override
|
||||
public IndexReader.CacheHelper getCacheHelper(LeafReaderContext context) {
|
||||
return context.reader().getCoreCacheHelper();
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
|
|
|
@ -140,13 +140,9 @@ public final class SynonymQuery extends Query {
|
|||
TermStatistics termStats = searcher.termStatistics(terms[i], termContexts[i]);
|
||||
if (termStats != null) {
|
||||
docFreq = Math.max(termStats.docFreq(), docFreq);
|
||||
if (termStats.totalTermFreq() == -1) {
|
||||
totalTermFreq = -1;
|
||||
} else if (totalTermFreq != -1) {
|
||||
totalTermFreq += termStats.totalTermFreq();
|
||||
}
|
||||
}
|
||||
}
|
||||
this.similarity = searcher.getSimilarity(true);
|
||||
if (docFreq > 0) {
|
||||
TermStatistics pseudoStats = new TermStatistics(new BytesRef("synonym pseudo-term"), docFreq, totalTermFreq);
|
||||
|
@ -217,6 +213,11 @@ public final class SynonymQuery extends Query {
|
|||
return new SynonymScorer(simScorer, this, subScorers);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public IndexReader.CacheHelper getCacheHelper(LeafReaderContext context) {
|
||||
return context.reader().getCoreCacheHelper();
|
||||
}
|
||||
}
|
||||
|
||||
static class SynonymScorer extends DisjunctionScorer {
|
||||
|
|
|
@ -315,6 +315,11 @@ public class TermInSetQuery extends Query implements Accountable {
|
|||
return scorer(weightOrBitSet.set);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public IndexReader.CacheHelper getCacheHelper(LeafReaderContext context) {
|
||||
return context.reader().getCoreCacheHelper();
|
||||
}
|
||||
};
|
||||
}
|
||||
}
|
||||
|
|
|
@ -21,6 +21,7 @@ import java.io.IOException;
|
|||
import java.util.Objects;
|
||||
import java.util.Set;
|
||||
|
||||
import org.apache.lucene.index.IndexReader;
|
||||
import org.apache.lucene.index.IndexReaderContext;
|
||||
import org.apache.lucene.index.LeafReader;
|
||||
import org.apache.lucene.index.LeafReaderContext;
|
||||
|
@ -65,9 +66,9 @@ public class TermQuery extends Query {
|
|||
collectionStats = searcher.collectionStatistics(term.field());
|
||||
termStats = searcher.termStatistics(term, termStates);
|
||||
} else {
|
||||
// we do not need the actual stats, use fake stats with docFreq=maxDoc=1 and ttf=-1
|
||||
collectionStats = new CollectionStatistics(term.field(), 1, -1, -1, -1);
|
||||
termStats = new TermStatistics(term.bytes(), 1, -1);
|
||||
// we do not need the actual stats, use fake stats with docFreq=maxDoc=ttf=1
|
||||
collectionStats = new CollectionStatistics(term.field(), 1, 1, 1, 1);
|
||||
termStats = new TermStatistics(term.bytes(), 1, 1);
|
||||
}
|
||||
|
||||
if (termStats == null) {
|
||||
|
@ -99,6 +100,11 @@ public class TermQuery extends Query {
|
|||
return new TermScorer(this, docs, similarity.simScorer(stats, context));
|
||||
}
|
||||
|
||||
@Override
|
||||
public IndexReader.CacheHelper getCacheHelper(LeafReaderContext context) {
|
||||
return context.reader().getCoreCacheHelper();
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns a {@link TermsEnum} positioned at this weights Term or null if
|
||||
* the term does not exist in the given context
|
||||
|
|
|
@ -24,8 +24,29 @@ import org.apache.lucene.index.TermsEnum; // javadocs
|
|||
import org.apache.lucene.util.BytesRef;
|
||||
/**
|
||||
* Contains statistics for a specific term
|
||||
* <p>
|
||||
* This class holds statistics for this term across all documents for scoring purposes:
|
||||
* <ul>
|
||||
* <li> {@link #docFreq}: number of documents this term occurs in.
|
||||
* <li> {@link #totalTermFreq}: number of tokens for this term.
|
||||
* </ul>
|
||||
* <p>
|
||||
* The following conditions are always true:
|
||||
* <ul>
|
||||
* <li> All statistics are positive integers: never zero or negative.
|
||||
* <li> {@code docFreq} <= {@code totalTermFreq}
|
||||
* <li> {@code docFreq} <= {@code sumDocFreq} of the collection
|
||||
* <li> {@code totalTermFreq} <= {@code sumTotalTermFreq} of the collection
|
||||
* </ul>
|
||||
* <p>
|
||||
* Values may include statistics on deleted documents that have not yet been merged away.
|
||||
* <p>
|
||||
* Be careful when performing calculations on these values because they are represented
|
||||
* as 64-bit integer values, you may need to cast to {@code double} for your use.
|
||||
* @lucene.experimental
|
||||
*/
|
||||
// TODO: actually add missing cross-checks to guarantee TermStatistics is in bounds of CollectionStatistics,
|
||||
// otherwise many similarity functions will implode.
|
||||
public class TermStatistics {
|
||||
private final BytesRef term;
|
||||
private final long docFreq;
|
||||
|
@ -45,29 +66,52 @@ public class TermStatistics {
|
|||
if (docFreq <= 0) {
|
||||
throw new IllegalArgumentException("docFreq must be positive, docFreq: " + docFreq);
|
||||
}
|
||||
if (totalTermFreq != -1) {
|
||||
if (totalTermFreq <= 0) {
|
||||
throw new IllegalArgumentException("totalTermFreq must be positive, totalTermFreq: " + totalTermFreq);
|
||||
}
|
||||
if (totalTermFreq < docFreq) {
|
||||
throw new IllegalArgumentException("totalTermFreq must be at least docFreq, totalTermFreq: " + totalTermFreq + ", docFreq: " + docFreq);
|
||||
}
|
||||
}
|
||||
this.term = term;
|
||||
this.docFreq = docFreq;
|
||||
this.totalTermFreq = totalTermFreq;
|
||||
}
|
||||
|
||||
/** returns the term text */
|
||||
/**
|
||||
* The term text.
|
||||
* <p>
|
||||
* This value is never {@code null}.
|
||||
* @return term's text, not {@code null}
|
||||
*/
|
||||
public final BytesRef term() {
|
||||
return term;
|
||||
}
|
||||
|
||||
/** returns the number of documents this term occurs in
|
||||
* @see TermsEnum#docFreq() */
|
||||
/**
|
||||
* The number of documents this term occurs in.
|
||||
* <p>
|
||||
* This is the document-frequency for the term: the count of documents
|
||||
* where the term appears at least one time.
|
||||
* <p>
|
||||
* This value is always a positive number, and never
|
||||
* exceeds {@link #totalTermFreq}. It also cannot exceed {@link CollectionStatistics#sumDocFreq()}.
|
||||
* @return document frequency, in the range [1 .. {@link #totalTermFreq()}]
|
||||
* @see TermsEnum#docFreq()
|
||||
*/
|
||||
public final long docFreq() {
|
||||
return docFreq;
|
||||
}
|
||||
|
||||
/** returns the total number of occurrences of this term
|
||||
* @see TermsEnum#totalTermFreq() */
|
||||
/**
|
||||
* The total number of occurrences of this term.
|
||||
* <p>
|
||||
* This is the token count for the term: the number of times it appears in the field across all documents.
|
||||
* <p>
|
||||
* This value is always a positive number, always at least {@link #docFreq()},
|
||||
* and never exceeds {@link CollectionStatistics#sumTotalTermFreq()}.
|
||||
* @return number of occurrences, in the range [{@link #docFreq()} .. {@link CollectionStatistics#sumTotalTermFreq()}]
|
||||
* @see TermsEnum#totalTermFreq()
|
||||
*/
|
||||
public final long totalTermFreq() {
|
||||
return totalTermFreq;
|
||||
}
|
||||
|
|
|
@ -18,8 +18,11 @@ package org.apache.lucene.search;
|
|||
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.List;
|
||||
import java.util.Set;
|
||||
|
||||
import org.apache.lucene.index.FieldInfo;
|
||||
import org.apache.lucene.index.IndexReader;
|
||||
import org.apache.lucene.index.IndexReaderContext;
|
||||
import org.apache.lucene.index.LeafReader;
|
||||
import org.apache.lucene.index.LeafReaderContext;
|
||||
|
@ -102,6 +105,55 @@ public abstract class Weight {
|
|||
*/
|
||||
public abstract Scorer scorer(LeafReaderContext context) throws IOException;
|
||||
|
||||
/**
|
||||
* Returns an {@link org.apache.lucene.index.IndexReader.CacheHelper} to cache this query against
|
||||
*
|
||||
* Weights that rely only on Terms or Points can return {@code context.reader().getCoreCacheHelper()}.
|
||||
* Weights that use DocValues should call {@link #getDocValuesCacheHelper(String, LeafReaderContext)}
|
||||
* Weights that should not be cached at all should return {@code null}
|
||||
*
|
||||
* @param context the {@link LeafReaderContext} to cache against
|
||||
* @return an {@link org.apache.lucene.index.IndexReader.CacheHelper} indicating the cache level
|
||||
*/
|
||||
public abstract IndexReader.CacheHelper getCacheHelper(LeafReaderContext context);
|
||||
|
||||
/**
|
||||
* Given a collection of Weights, return an {@link org.apache.lucene.index.IndexReader.CacheHelper} that will satisfy
|
||||
* the requirements of them all.
|
||||
* @param context the {@link LeafReaderContext} to cache against
|
||||
* @param weights an array of {@link Weight} to be cached
|
||||
* @return an {@link org.apache.lucene.index.IndexReader.CacheHelper} indicating the cache level
|
||||
*/
|
||||
protected static IndexReader.CacheHelper getCacheHelper(LeafReaderContext context, List<? extends Weight> weights) {
|
||||
if (weights.size() == 0)
|
||||
return null;
|
||||
IndexReader.CacheHelper helper = weights.get(0).getCacheHelper(context);
|
||||
if (helper == null)
|
||||
return null;
|
||||
for (int i = 1; i < weights.size(); i++) {
|
||||
IndexReader.CacheHelper nextHelper = weights.get(i).getCacheHelper(context);
|
||||
if (nextHelper == null || nextHelper != helper)
|
||||
return null;
|
||||
}
|
||||
return helper;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns an {@link org.apache.lucene.index.IndexReader.CacheHelper} for a Weight using doc values
|
||||
*
|
||||
* This will return the core reader for
|
||||
*
|
||||
* @param field the docvalues field
|
||||
* @param ctx the {@link LeafReaderContext} to cache against
|
||||
* @return an {@link org.apache.lucene.index.IndexReader.CacheHelper} indicating the cache level
|
||||
*/
|
||||
public static IndexReader.CacheHelper getDocValuesCacheHelper(String field, LeafReaderContext ctx) {
|
||||
FieldInfo fi = ctx.reader().getFieldInfos().fieldInfo(field);
|
||||
if (fi == null || fi.getDocValuesGen() == -1)
|
||||
return ctx.reader().getCoreCacheHelper();
|
||||
return null;
|
||||
}
|
||||
|
||||
/**
|
||||
* Optional method.
|
||||
* Get a {@link ScorerSupplier}, which allows to know the cost of the {@link Scorer}
|
||||
|
|
|
@ -85,19 +85,7 @@ public class BM25Similarity extends Similarity {
|
|||
|
||||
/** The default implementation computes the average as <code>sumTotalTermFreq / docCount</code> */
|
||||
protected float avgFieldLength(CollectionStatistics collectionStats) {
|
||||
final long sumTotalTermFreq;
|
||||
if (collectionStats.sumTotalTermFreq() == -1) {
|
||||
// frequencies are omitted (tf=1), its # of postings
|
||||
if (collectionStats.sumDocFreq() == -1) {
|
||||
// theoretical case only: remove!
|
||||
return 1f;
|
||||
}
|
||||
sumTotalTermFreq = collectionStats.sumDocFreq();
|
||||
} else {
|
||||
sumTotalTermFreq = collectionStats.sumTotalTermFreq();
|
||||
}
|
||||
final long docCount = collectionStats.docCount() == -1 ? collectionStats.maxDoc() : collectionStats.docCount();
|
||||
return (float) (sumTotalTermFreq / (double) docCount);
|
||||
return (float) (collectionStats.sumTotalTermFreq() / (double) collectionStats.docCount());
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -161,7 +149,7 @@ public class BM25Similarity extends Similarity {
|
|||
*/
|
||||
public Explanation idfExplain(CollectionStatistics collectionStats, TermStatistics termStats) {
|
||||
final long df = termStats.docFreq();
|
||||
final long docCount = collectionStats.docCount() == -1 ? collectionStats.maxDoc() : collectionStats.docCount();
|
||||
final long docCount = collectionStats.docCount();
|
||||
final float idf = idf(df, docCount);
|
||||
return Explanation.match(idf, "idf, computed as log(1 + (N - n + 0.5) / (n + 0.5)) from:",
|
||||
Explanation.match(df, "n, number of documents containing term"),
|
||||
|
|
|
@ -62,7 +62,7 @@ public class ClassicSimilarity extends TFIDFSimilarity {
|
|||
@Override
|
||||
public Explanation idfExplain(CollectionStatistics collectionStats, TermStatistics termStats) {
|
||||
final long df = termStats.docFreq();
|
||||
final long docCount = collectionStats.docCount() == -1 ? collectionStats.maxDoc() : collectionStats.docCount();
|
||||
final long docCount = collectionStats.docCount();
|
||||
final float idf = idf(df, docCount);
|
||||
return Explanation.match(idf, "idf, computed as log((docCount+1)/(docFreq+1)) + 1 from:",
|
||||
Explanation.match(df, "docFreq, number of documents containing term"),
|
||||
|
|
|
@ -100,42 +100,16 @@ public abstract class SimilarityBase extends Similarity {
|
|||
/** Fills all member fields defined in {@code BasicStats} in {@code stats}.
|
||||
* Subclasses can override this method to fill additional stats. */
|
||||
protected void fillBasicStats(BasicStats stats, CollectionStatistics collectionStats, TermStatistics termStats) {
|
||||
// #positions(field) must be >= #positions(term)
|
||||
assert collectionStats.sumTotalTermFreq() == -1 || collectionStats.sumTotalTermFreq() >= termStats.totalTermFreq();
|
||||
long numberOfDocuments = collectionStats.docCount() == -1 ? collectionStats.maxDoc() : collectionStats.docCount();
|
||||
|
||||
long docFreq = termStats.docFreq();
|
||||
long totalTermFreq = termStats.totalTermFreq();
|
||||
|
||||
// frequencies are omitted, all postings have tf=1, so totalTermFreq = docFreq
|
||||
if (totalTermFreq == -1) {
|
||||
totalTermFreq = docFreq;
|
||||
}
|
||||
|
||||
final long numberOfFieldTokens;
|
||||
final double avgFieldLength;
|
||||
|
||||
if (collectionStats.sumTotalTermFreq() == -1) {
|
||||
// frequencies are omitted, so sumTotalTermFreq = # postings
|
||||
if (collectionStats.sumDocFreq() == -1) {
|
||||
// theoretical case only: remove!
|
||||
numberOfFieldTokens = docFreq;
|
||||
avgFieldLength = 1f;
|
||||
} else {
|
||||
numberOfFieldTokens = collectionStats.sumDocFreq();
|
||||
avgFieldLength = (float) (collectionStats.sumDocFreq() / (double)numberOfDocuments);
|
||||
}
|
||||
} else {
|
||||
numberOfFieldTokens = collectionStats.sumTotalTermFreq();
|
||||
avgFieldLength = (float) (collectionStats.sumTotalTermFreq() / (double)numberOfDocuments);
|
||||
}
|
||||
// TODO: validate this for real, somewhere else
|
||||
assert termStats.totalTermFreq() <= collectionStats.sumTotalTermFreq();
|
||||
assert termStats.docFreq() <= collectionStats.sumDocFreq();
|
||||
|
||||
// TODO: add sumDocFreq for field (numberOfFieldPostings)
|
||||
stats.setNumberOfDocuments(numberOfDocuments);
|
||||
stats.setNumberOfFieldTokens(numberOfFieldTokens);
|
||||
stats.setAvgFieldLength(avgFieldLength);
|
||||
stats.setDocFreq(docFreq);
|
||||
stats.setTotalTermFreq(totalTermFreq);
|
||||
stats.setNumberOfDocuments(collectionStats.docCount());
|
||||
stats.setNumberOfFieldTokens(collectionStats.sumTotalTermFreq());
|
||||
stats.setAvgFieldLength(collectionStats.sumTotalTermFreq() / (double) collectionStats.docCount());
|
||||
stats.setDocFreq(termStats.docFreq());
|
||||
stats.setTotalTermFreq(termStats.totalTermFreq());
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
|
@ -448,7 +448,7 @@ public abstract class TFIDFSimilarity extends Similarity {
|
|||
*/
|
||||
public Explanation idfExplain(CollectionStatistics collectionStats, TermStatistics termStats) {
|
||||
final long df = termStats.docFreq();
|
||||
final long docCount = collectionStats.docCount() == -1 ? collectionStats.maxDoc() : collectionStats.docCount();
|
||||
final long docCount = collectionStats.docCount();
|
||||
final float idf = idf(df, docCount);
|
||||
return Explanation.match(idf, "idf(docFreq, docCount)",
|
||||
Explanation.match(df, "docFreq, number of documents containing term"),
|
||||
|
|
|
@ -32,14 +32,14 @@
|
|||
* <a name="sims"></a>
|
||||
* <h2>Summary of the Ranking Methods</h2>
|
||||
*
|
||||
* <p>{@link org.apache.lucene.search.similarities.ClassicSimilarity} is the original Lucene
|
||||
* scoring function. It is based on a highly optimized
|
||||
* <a href="http://en.wikipedia.org/wiki/Vector_Space_Model">Vector Space Model</a>. For more
|
||||
* information, see {@link org.apache.lucene.search.similarities.TFIDFSimilarity}.
|
||||
*
|
||||
* <p>{@link org.apache.lucene.search.similarities.BM25Similarity} is an optimized
|
||||
* implementation of the successful Okapi BM25 model.
|
||||
*
|
||||
* <p>{@link org.apache.lucene.search.similarities.ClassicSimilarity} is the original Lucene
|
||||
* scoring function. It is based on the
|
||||
* <a href="http://en.wikipedia.org/wiki/Vector_Space_Model">Vector Space Model</a>. For more
|
||||
* information, see {@link org.apache.lucene.search.similarities.TFIDFSimilarity}.
|
||||
*
|
||||
* <p>{@link org.apache.lucene.search.similarities.SimilarityBase} provides a basic
|
||||
* implementation of the Similarity contract and exposes a highly simplified
|
||||
* interface, which makes it an ideal starting point for new ranking functions.
|
||||
|
|
|
@ -19,8 +19,10 @@ package org.apache.lucene.search.spans;
|
|||
|
||||
import java.io.IOException;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Arrays;
|
||||
import java.util.Map;
|
||||
|
||||
import org.apache.lucene.index.IndexReader;
|
||||
import org.apache.lucene.index.LeafReaderContext;
|
||||
import org.apache.lucene.index.Term;
|
||||
import org.apache.lucene.index.TermContext;
|
||||
|
@ -115,5 +117,10 @@ public final class SpanContainingQuery extends SpanContainQuery {
|
|||
}
|
||||
};
|
||||
}
|
||||
|
||||
@Override
|
||||
public IndexReader.CacheHelper getCacheHelper(LeafReaderContext context) {
|
||||
return getCacheHelper(context, Arrays.asList(bigWeight, littleWeight));
|
||||
}
|
||||
}
|
||||
}
|
|
@ -229,6 +229,11 @@ public class SpanNearQuery extends SpanQuery implements Cloneable {
|
|||
w.extractTerms(terms);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public IndexReader.CacheHelper getCacheHelper(LeafReaderContext context) {
|
||||
return getCacheHelper(context, subWeights);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
|
@ -319,6 +324,11 @@ public class SpanNearQuery extends SpanQuery implements Cloneable {
|
|||
public void extractTerms(Set<Term> terms) {
|
||||
|
||||
}
|
||||
|
||||
@Override
|
||||
public IndexReader.CacheHelper getCacheHelper(LeafReaderContext context) {
|
||||
return context.reader().getCoreCacheHelper();
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
|
|
|
@ -18,6 +18,7 @@ package org.apache.lucene.search.spans;
|
|||
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.Arrays;
|
||||
import java.util.Map;
|
||||
import java.util.Objects;
|
||||
import java.util.Set;
|
||||
|
@ -191,6 +192,11 @@ public final class SpanNotQuery extends SpanQuery {
|
|||
public void extractTerms(Set<Term> terms) {
|
||||
includeWeight.extractTerms(terms);
|
||||
}
|
||||
|
||||
@Override
|
||||
public IndexReader.CacheHelper getCacheHelper(LeafReaderContext context) {
|
||||
return getCacheHelper(context, Arrays.asList(includeWeight, excludeWeight));
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
|
|
|
@ -138,6 +138,11 @@ public final class SpanOrQuery extends SpanQuery {
|
|||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public IndexReader.CacheHelper getCacheHelper(LeafReaderContext context) {
|
||||
return getCacheHelper(context, subWeights);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void extractTermContexts(Map<Term, TermContext> contexts) {
|
||||
for (SpanWeight w : subWeights) {
|
||||
|
|
|
@ -86,6 +86,11 @@ public abstract class SpanPositionCheckQuery extends SpanQuery implements Clonea
|
|||
matchWeight.extractTerms(terms);
|
||||
}
|
||||
|
||||
@Override
|
||||
public IndexReader.CacheHelper getCacheHelper(LeafReaderContext context) {
|
||||
return matchWeight.getCacheHelper(context);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void extractTermContexts(Map<Term, TermContext> contexts) {
|
||||
matchWeight.extractTermContexts(contexts);
|
||||
|
|
|
@ -23,6 +23,7 @@ import java.util.Map;
|
|||
import java.util.Objects;
|
||||
import java.util.Set;
|
||||
|
||||
import org.apache.lucene.index.IndexReader;
|
||||
import org.apache.lucene.index.IndexReaderContext;
|
||||
import org.apache.lucene.index.LeafReaderContext;
|
||||
import org.apache.lucene.index.PostingsEnum;
|
||||
|
@ -91,6 +92,11 @@ public class SpanTermQuery extends SpanQuery {
|
|||
terms.add(term);
|
||||
}
|
||||
|
||||
@Override
|
||||
public IndexReader.CacheHelper getCacheHelper(LeafReaderContext context) {
|
||||
return context.reader().getCoreCacheHelper();
|
||||
}
|
||||
|
||||
@Override
|
||||
public void extractTermContexts(Map<Term, TermContext> contexts) {
|
||||
contexts.put(term, termContext);
|
||||
|
@ -135,7 +141,6 @@ public class SpanTermQuery extends SpanQuery {
|
|||
/** Returns an expected cost in simple operations
|
||||
* of processing the occurrences of a term
|
||||
* in a document that contains the term.
|
||||
* <br>This may be inaccurate when {@link TermsEnum#totalTermFreq()} is not available.
|
||||
* @param termsEnum The term is the term at which this TermsEnum is positioned.
|
||||
* <p>
|
||||
* This is a copy of org.apache.lucene.search.PhraseQuery.termPositionsCost().
|
||||
|
@ -146,8 +151,9 @@ public class SpanTermQuery extends SpanQuery {
|
|||
static float termPositionsCost(TermsEnum termsEnum) throws IOException {
|
||||
int docFreq = termsEnum.docFreq();
|
||||
assert docFreq > 0;
|
||||
long totalTermFreq = termsEnum.totalTermFreq(); // -1 when not available
|
||||
float expOccurrencesInMatchingDoc = (totalTermFreq < docFreq) ? 1 : (totalTermFreq / (float) docFreq);
|
||||
long totalTermFreq = termsEnum.totalTermFreq();
|
||||
assert totalTermFreq > 0;
|
||||
float expOccurrencesInMatchingDoc = totalTermFreq / (float) docFreq;
|
||||
return TERM_POSNS_SEEK_OPS_PER_DOC + expOccurrencesInMatchingDoc * TERM_OPS_PER_POS;
|
||||
}
|
||||
|
||||
|
|
|
@ -19,8 +19,10 @@ package org.apache.lucene.search.spans;
|
|||
|
||||
import java.io.IOException;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Arrays;
|
||||
import java.util.Map;
|
||||
|
||||
import org.apache.lucene.index.IndexReader;
|
||||
import org.apache.lucene.index.LeafReaderContext;
|
||||
import org.apache.lucene.index.Term;
|
||||
import org.apache.lucene.index.TermContext;
|
||||
|
@ -116,6 +118,11 @@ public final class SpanWithinQuery extends SpanContainQuery {
|
|||
}
|
||||
};
|
||||
}
|
||||
|
||||
@Override
|
||||
public IndexReader.CacheHelper getCacheHelper(LeafReaderContext context) {
|
||||
return getCacheHelper(context, Arrays.asList(littleWeight, bigWeight));
|
||||
}
|
||||
}
|
||||
|
||||
}
|
|
@ -172,10 +172,11 @@ public class TestBlockPostingsFormat3 extends LuceneTestCase {
|
|||
|
||||
// NOTE: we don't assert hasOffsets/hasPositions/hasPayloads because they are allowed to be different
|
||||
|
||||
boolean bothHaveFreqs = leftTerms.hasFreqs() && rightTerms.hasFreqs();
|
||||
boolean bothHavePositions = leftTerms.hasPositions() && rightTerms.hasPositions();
|
||||
TermsEnum leftTermsEnum = leftTerms.iterator();
|
||||
TermsEnum rightTermsEnum = rightTerms.iterator();
|
||||
assertTermsEnum(leftTermsEnum, rightTermsEnum, true, bothHavePositions);
|
||||
assertTermsEnum(leftTermsEnum, rightTermsEnum, true, bothHaveFreqs, bothHavePositions);
|
||||
|
||||
assertTermsSeeking(leftTerms, rightTerms);
|
||||
|
||||
|
@ -188,7 +189,7 @@ public class TestBlockPostingsFormat3 extends LuceneTestCase {
|
|||
// TODO: test start term too
|
||||
TermsEnum leftIntersection = leftTerms.intersect(automaton, null);
|
||||
TermsEnum rightIntersection = rightTerms.intersect(automaton, null);
|
||||
assertTermsEnum(leftIntersection, rightIntersection, rarely(), bothHavePositions);
|
||||
assertTermsEnum(leftIntersection, rightIntersection, rarely(), bothHaveFreqs, bothHavePositions);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -263,13 +264,9 @@ public class TestBlockPostingsFormat3 extends LuceneTestCase {
|
|||
* checks collection-level statistics on Terms
|
||||
*/
|
||||
public void assertTermsStatistics(Terms leftTerms, Terms rightTerms) throws Exception {
|
||||
if (leftTerms.getDocCount() != -1 && rightTerms.getDocCount() != -1) {
|
||||
assertEquals(leftTerms.getDocCount(), rightTerms.getDocCount());
|
||||
}
|
||||
if (leftTerms.getSumDocFreq() != -1 && rightTerms.getSumDocFreq() != -1) {
|
||||
assertEquals(leftTerms.getSumDocFreq(), rightTerms.getSumDocFreq());
|
||||
}
|
||||
if (leftTerms.getSumTotalTermFreq() != -1 && rightTerms.getSumTotalTermFreq() != -1) {
|
||||
if (leftTerms.hasFreqs() && rightTerms.hasFreqs()) {
|
||||
assertEquals(leftTerms.getSumTotalTermFreq(), rightTerms.getSumTotalTermFreq());
|
||||
}
|
||||
if (leftTerms.size() != -1 && rightTerms.size() != -1) {
|
||||
|
@ -281,7 +278,7 @@ public class TestBlockPostingsFormat3 extends LuceneTestCase {
|
|||
* checks the terms enum sequentially
|
||||
* if deep is false, it does a 'shallow' test that doesnt go down to the docsenums
|
||||
*/
|
||||
public void assertTermsEnum(TermsEnum leftTermsEnum, TermsEnum rightTermsEnum, boolean deep, boolean hasPositions) throws Exception {
|
||||
public void assertTermsEnum(TermsEnum leftTermsEnum, TermsEnum rightTermsEnum, boolean deep, boolean hasFreqs, boolean hasPositions) throws Exception {
|
||||
BytesRef term;
|
||||
PostingsEnum leftPositions = null;
|
||||
PostingsEnum rightPositions = null;
|
||||
|
@ -290,7 +287,7 @@ public class TestBlockPostingsFormat3 extends LuceneTestCase {
|
|||
|
||||
while ((term = leftTermsEnum.next()) != null) {
|
||||
assertEquals(term, rightTermsEnum.next());
|
||||
assertTermStats(leftTermsEnum, rightTermsEnum);
|
||||
assertTermStats(leftTermsEnum, rightTermsEnum, hasFreqs);
|
||||
if (deep) {
|
||||
if (hasPositions) {
|
||||
// with payloads + off
|
||||
|
@ -350,9 +347,9 @@ public class TestBlockPostingsFormat3 extends LuceneTestCase {
|
|||
/**
|
||||
* checks term-level statistics
|
||||
*/
|
||||
public void assertTermStats(TermsEnum leftTermsEnum, TermsEnum rightTermsEnum) throws Exception {
|
||||
public void assertTermStats(TermsEnum leftTermsEnum, TermsEnum rightTermsEnum, boolean bothHaveFreqs) throws Exception {
|
||||
assertEquals(leftTermsEnum.docFreq(), rightTermsEnum.docFreq());
|
||||
if (leftTermsEnum.totalTermFreq() != -1 && rightTermsEnum.totalTermFreq() != -1) {
|
||||
if (bothHaveFreqs) {
|
||||
assertEquals(leftTermsEnum.totalTermFreq(), rightTermsEnum.totalTermFreq());
|
||||
}
|
||||
}
|
||||
|
|
|
@ -148,26 +148,22 @@ public class TestMultiTermsEnum extends LuceneTestCase {
|
|||
|
||||
@Override
|
||||
public long size() throws IOException {
|
||||
// Docs say we can return -1 if we don't know.
|
||||
return -1;
|
||||
throw new UnsupportedOperationException();
|
||||
}
|
||||
|
||||
@Override
|
||||
public long getSumTotalTermFreq() throws IOException {
|
||||
// Docs say we can return -1 if we don't know.
|
||||
return -1;
|
||||
throw new UnsupportedOperationException();
|
||||
}
|
||||
|
||||
@Override
|
||||
public long getSumDocFreq() throws IOException {
|
||||
// Docs say we can return -1 if we don't know.
|
||||
return -1;
|
||||
throw new UnsupportedOperationException();
|
||||
}
|
||||
|
||||
@Override
|
||||
public int getDocCount() throws IOException {
|
||||
// Docs say we can return -1 if we don't know.
|
||||
return -1;
|
||||
throw new UnsupportedOperationException();
|
||||
}
|
||||
|
||||
@Override
|
||||
|
|
|
@ -445,7 +445,7 @@ public class TestOmitTf extends LuceneTestCase {
|
|||
}
|
||||
}
|
||||
|
||||
/** test that when freqs are omitted, that totalTermFreq and sumTotalTermFreq are -1 */
|
||||
/** test that when freqs are omitted, that totalTermFreq and sumTotalTermFreq are docFreq, and sumDocFreq */
|
||||
public void testStats() throws Exception {
|
||||
Directory dir = newDirectory();
|
||||
RandomIndexWriter iw = new RandomIndexWriter(random(), dir,
|
||||
|
@ -459,8 +459,8 @@ public class TestOmitTf extends LuceneTestCase {
|
|||
iw.addDocument(doc);
|
||||
IndexReader ir = iw.getReader();
|
||||
iw.close();
|
||||
assertEquals(-1, ir.totalTermFreq(new Term("foo", new BytesRef("bar"))));
|
||||
assertEquals(-1, ir.getSumTotalTermFreq("foo"));
|
||||
assertEquals(ir.docFreq(new Term("foo", new BytesRef("bar"))), ir.totalTermFreq(new Term("foo", new BytesRef("bar"))));
|
||||
assertEquals(ir.getSumDocFreq("foo"), ir.getSumTotalTermFreq("foo"));
|
||||
ir.close();
|
||||
dir.close();
|
||||
}
|
||||
|
|
|
@ -21,6 +21,7 @@ import java.io.IOException;
|
|||
import java.util.Set;
|
||||
|
||||
import org.apache.lucene.index.FieldInvertState;
|
||||
import org.apache.lucene.index.IndexReader;
|
||||
import org.apache.lucene.index.LeafReaderContext;
|
||||
import org.apache.lucene.index.Term;
|
||||
import org.apache.lucene.search.similarities.Similarity;
|
||||
|
@ -262,6 +263,11 @@ final class JustCompileSearch {
|
|||
throw new UnsupportedOperationException(UNSUPPORTED_MSG);
|
||||
}
|
||||
|
||||
@Override
|
||||
public IndexReader.CacheHelper getCacheHelper(LeafReaderContext context) {
|
||||
return null;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
@ -94,6 +94,11 @@ public class TestBooleanScorer extends LuceneTestCase {
|
|||
throw new UnsupportedOperationException();
|
||||
}
|
||||
|
||||
@Override
|
||||
public IndexReader.CacheHelper getCacheHelper(LeafReaderContext context) {
|
||||
return null;
|
||||
}
|
||||
|
||||
@Override
|
||||
public BulkScorer bulkScorer(LeafReaderContext context) {
|
||||
return new BulkScorer() {
|
||||
|
|
|
@ -29,6 +29,7 @@ import java.util.HashSet;
|
|||
import java.util.LinkedHashMap;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.Set;
|
||||
import java.util.concurrent.ExecutionException;
|
||||
import java.util.concurrent.atomic.AtomicBoolean;
|
||||
import java.util.concurrent.atomic.AtomicInteger;
|
||||
|
@ -36,15 +37,16 @@ import java.util.concurrent.atomic.AtomicLong;
|
|||
import java.util.concurrent.atomic.AtomicReference;
|
||||
|
||||
import com.carrotsearch.randomizedtesting.generators.RandomPicks;
|
||||
|
||||
import org.apache.lucene.document.Document;
|
||||
import org.apache.lucene.document.Field.Store;
|
||||
import org.apache.lucene.document.NumericDocValuesField;
|
||||
import org.apache.lucene.document.StringField;
|
||||
import org.apache.lucene.document.TextField;
|
||||
import org.apache.lucene.index.DirectoryReader;
|
||||
import org.apache.lucene.index.FilterDirectoryReader;
|
||||
import org.apache.lucene.index.FilterLeafReader;
|
||||
import org.apache.lucene.index.IndexReader;
|
||||
import org.apache.lucene.index.IndexWriter;
|
||||
import org.apache.lucene.index.IndexWriterConfig;
|
||||
import org.apache.lucene.index.LeafReader;
|
||||
import org.apache.lucene.index.LeafReaderContext;
|
||||
|
@ -358,6 +360,11 @@ public class TestLRUQueryCache extends LuceneTestCase {
|
|||
public Scorer scorer(LeafReaderContext context) throws IOException {
|
||||
return null;
|
||||
}
|
||||
|
||||
@Override
|
||||
public IndexReader.CacheHelper getCacheHelper(LeafReaderContext context) {
|
||||
return context.reader().getCoreCacheHelper();
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
|
@ -947,6 +954,11 @@ public class TestLRUQueryCache extends LuceneTestCase {
|
|||
public Scorer scorer(LeafReaderContext context) throws IOException {
|
||||
return null;
|
||||
}
|
||||
|
||||
@Override
|
||||
public IndexReader.CacheHelper getCacheHelper(LeafReaderContext context) {
|
||||
return context.reader().getCoreCacheHelper();
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
|
@ -1276,6 +1288,78 @@ public class TestLRUQueryCache extends LuceneTestCase {
|
|||
dir.close();
|
||||
}
|
||||
|
||||
// A query that returns null from Weight.getCacheHelper
|
||||
private static class NoCacheQuery extends Query {
|
||||
|
||||
@Override
|
||||
public Weight createWeight(IndexSearcher searcher, boolean needsScores, float boost) throws IOException {
|
||||
return new Weight(this) {
|
||||
@Override
|
||||
public void extractTerms(Set<Term> terms) {
|
||||
|
||||
}
|
||||
|
||||
@Override
|
||||
public Explanation explain(LeafReaderContext context, int doc) throws IOException {
|
||||
return null;
|
||||
}
|
||||
|
||||
@Override
|
||||
public Scorer scorer(LeafReaderContext context) throws IOException {
|
||||
return null;
|
||||
}
|
||||
|
||||
@Override
|
||||
public IndexReader.CacheHelper getCacheHelper(LeafReaderContext context) {
|
||||
return null;
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString(String field) {
|
||||
return "NoCacheQuery";
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean equals(Object obj) {
|
||||
return sameClassAs(obj);
|
||||
}
|
||||
|
||||
@Override
|
||||
public int hashCode() {
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
public void testQueryNotSuitedForCaching() throws IOException {
|
||||
Directory dir = newDirectory();
|
||||
IndexWriterConfig iwc = newIndexWriterConfig().setMergePolicy(NoMergePolicy.INSTANCE);
|
||||
RandomIndexWriter w = new RandomIndexWriter(random(), dir, iwc);
|
||||
w.addDocument(new Document());
|
||||
DirectoryReader reader = w.getReader();
|
||||
IndexSearcher searcher = newSearcher(reader);
|
||||
searcher.setQueryCachingPolicy(QueryCachingPolicy.ALWAYS_CACHE);
|
||||
|
||||
LRUQueryCache cache = new LRUQueryCache(2, 10000, context -> true);
|
||||
searcher.setQueryCache(cache);
|
||||
|
||||
assertEquals(0, searcher.count(new NoCacheQuery()));
|
||||
assertEquals(0, cache.getCacheCount());
|
||||
|
||||
// BooleanQuery wrapping an uncacheable query should also not be cached
|
||||
BooleanQuery bq = new BooleanQuery.Builder()
|
||||
.add(new NoCacheQuery(), Occur.MUST)
|
||||
.add(new TermQuery(new Term("field", "term")), Occur.MUST).build();
|
||||
assertEquals(0, searcher.count(bq));
|
||||
assertEquals(0, cache.getCacheCount());
|
||||
|
||||
reader.close();
|
||||
w.close();
|
||||
dir.close();
|
||||
|
||||
}
|
||||
|
||||
private static class DummyQuery2 extends Query {
|
||||
|
||||
private final AtomicBoolean scorerCreated;
|
||||
|
@ -1291,6 +1375,12 @@ public class TestLRUQueryCache extends LuceneTestCase {
|
|||
public Scorer scorer(LeafReaderContext context) throws IOException {
|
||||
return scorerSupplier(context).get(Long.MAX_VALUE);
|
||||
}
|
||||
|
||||
@Override
|
||||
public IndexReader.CacheHelper getCacheHelper(LeafReaderContext context) {
|
||||
return context.reader().getCoreCacheHelper();
|
||||
}
|
||||
|
||||
@Override
|
||||
public ScorerSupplier scorerSupplier(LeafReaderContext context) throws IOException {
|
||||
final Weight weight = this;
|
||||
|
@ -1351,4 +1441,110 @@ public class TestLRUQueryCache extends LuceneTestCase {
|
|||
w.close();
|
||||
dir.close();
|
||||
}
|
||||
|
||||
static class DVCacheQuery extends Query {
|
||||
|
||||
final String field;
|
||||
|
||||
AtomicInteger scorerCreatedCount = new AtomicInteger(0);
|
||||
|
||||
DVCacheQuery(String field) {
|
||||
this.field = field;
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString(String field) {
|
||||
return "DVCacheQuery";
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean equals(Object obj) {
|
||||
return sameClassAs(obj);
|
||||
}
|
||||
|
||||
@Override
|
||||
public int hashCode() {
|
||||
return 0;
|
||||
}
|
||||
|
||||
@Override
|
||||
public Weight createWeight(IndexSearcher searcher, boolean needsScores, float boost) throws IOException {
|
||||
return new ConstantScoreWeight(this, 1) {
|
||||
|
||||
@Override
|
||||
public Scorer scorer(LeafReaderContext context) throws IOException {
|
||||
scorerCreatedCount.incrementAndGet();
|
||||
return new ConstantScoreScorer(this, 1, DocIdSetIterator.all(context.reader().maxDoc()));
|
||||
}
|
||||
|
||||
@Override
|
||||
public IndexReader.CacheHelper getCacheHelper(LeafReaderContext context) {
|
||||
return getDocValuesCacheHelper(field, context);
|
||||
}
|
||||
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
public void testDocValuesUpdatesDontBreakCache() throws IOException {
|
||||
Directory dir = newDirectory();
|
||||
IndexWriterConfig iwc = newIndexWriterConfig().setMergePolicy(NoMergePolicy.INSTANCE);
|
||||
//RandomIndexWriter w = new RandomIndexWriter(random(), dir, iwc);
|
||||
IndexWriter w = new IndexWriter(dir, iwc);
|
||||
w.addDocument(new Document());
|
||||
w.commit();
|
||||
DirectoryReader reader = DirectoryReader.open(w);
|
||||
IndexSearcher searcher = newSearcher(reader);
|
||||
searcher.setQueryCachingPolicy(QueryCachingPolicy.ALWAYS_CACHE);
|
||||
|
||||
LRUQueryCache cache = new LRUQueryCache(1, 1000, context -> true);
|
||||
searcher.setQueryCache(cache);
|
||||
|
||||
DVCacheQuery query = new DVCacheQuery("field");
|
||||
assertEquals(1, searcher.count(query));
|
||||
assertEquals(1, query.scorerCreatedCount.get());
|
||||
assertEquals(1, searcher.count(query));
|
||||
assertEquals(1, query.scorerCreatedCount.get()); // should be cached
|
||||
|
||||
Document doc = new Document();
|
||||
doc.add(new NumericDocValuesField("field", 1));
|
||||
doc.add(newTextField("text", "text", Store.NO));
|
||||
w.addDocument(doc);
|
||||
reader.close();
|
||||
reader = DirectoryReader.open(w);
|
||||
searcher = newSearcher(reader);
|
||||
searcher.setQueryCachingPolicy(QueryCachingPolicy.ALWAYS_CACHE);
|
||||
searcher.setQueryCache(cache);
|
||||
|
||||
assertEquals(2, searcher.count(query));
|
||||
assertEquals(2, query.scorerCreatedCount.get()); // first segment cached
|
||||
|
||||
reader.close();
|
||||
reader = DirectoryReader.open(w);
|
||||
searcher = newSearcher(reader);
|
||||
searcher.setQueryCachingPolicy(QueryCachingPolicy.ALWAYS_CACHE);
|
||||
searcher.setQueryCache(cache);
|
||||
|
||||
assertEquals(2, searcher.count(query));
|
||||
assertEquals(2, query.scorerCreatedCount.get()); // both segments cached
|
||||
|
||||
|
||||
w.updateNumericDocValue(new Term("text", "text"), "field", 2l);
|
||||
reader.close();
|
||||
reader = DirectoryReader.open(w);
|
||||
searcher = newSearcher(reader);
|
||||
searcher.setQueryCachingPolicy(QueryCachingPolicy.ALWAYS_CACHE);
|
||||
searcher.setQueryCache(cache);
|
||||
|
||||
assertEquals(2, searcher.count(query));
|
||||
assertEquals(3, query.scorerCreatedCount.get()); // second segment no longer cached due to DV update
|
||||
|
||||
assertEquals(2, searcher.count(query));
|
||||
assertEquals(4, query.scorerCreatedCount.get()); // still no caching
|
||||
|
||||
reader.close();
|
||||
w.close();
|
||||
dir.close();
|
||||
|
||||
}
|
||||
}
|
||||
|
|
|
@ -18,7 +18,6 @@ package org.apache.lucene.search;
|
|||
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.Set;
|
||||
import java.util.Objects;
|
||||
|
||||
import org.apache.lucene.document.Document;
|
||||
|
@ -103,17 +102,7 @@ public class TestNeedsScores extends LuceneTestCase {
|
|||
@Override
|
||||
public Weight createWeight(IndexSearcher searcher, boolean needsScores, float boost) throws IOException {
|
||||
final Weight w = in.createWeight(searcher, needsScores, boost);
|
||||
return new Weight(AssertNeedsScores.this) {
|
||||
@Override
|
||||
public void extractTerms(Set<Term> terms) {
|
||||
w.extractTerms(terms);
|
||||
}
|
||||
|
||||
@Override
|
||||
public Explanation explain(LeafReaderContext context, int doc) throws IOException {
|
||||
return w.explain(context, doc);
|
||||
}
|
||||
|
||||
return new FilterWeight(w) {
|
||||
@Override
|
||||
public Scorer scorer(LeafReaderContext context) throws IOException {
|
||||
assertEquals("query=" + in, value, needsScores);
|
||||
|
|
|
@ -487,6 +487,11 @@ public class TestQueryRescorer extends LuceneTestCase {
|
|||
};
|
||||
}
|
||||
|
||||
@Override
|
||||
public IndexReader.CacheHelper getCacheHelper(LeafReaderContext context) {
|
||||
return null;
|
||||
}
|
||||
|
||||
@Override
|
||||
public Explanation explain(LeafReaderContext context, int doc) throws IOException {
|
||||
return null;
|
||||
|
|
|
@ -155,6 +155,11 @@ public class TestScorerPerf extends LuceneTestCase {
|
|||
public Scorer scorer(LeafReaderContext context) throws IOException {
|
||||
return new ConstantScoreScorer(this, score(), new BitSetIterator(docs, docs.approximateCardinality()));
|
||||
}
|
||||
|
||||
@Override
|
||||
public IndexReader.CacheHelper getCacheHelper(LeafReaderContext context) {
|
||||
return null;
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
|
|
|
@ -249,6 +249,11 @@ public class TestSortRandom extends LuceneTestCase {
|
|||
|
||||
return new ConstantScoreScorer(this, score(), new BitSetIterator(bits, bits.approximateCardinality()));
|
||||
}
|
||||
|
||||
@Override
|
||||
public IndexReader.CacheHelper getCacheHelper(LeafReaderContext context) {
|
||||
return null;
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
|
|
|
@ -124,6 +124,11 @@ public class TestUsageTrackingFilterCachingPolicy extends LuceneTestCase {
|
|||
public Scorer scorer(LeafReaderContext context) throws IOException {
|
||||
return new ConstantScoreScorer(this, score(), DocIdSetIterator.all(1));
|
||||
}
|
||||
|
||||
@Override
|
||||
public IndexReader.CacheHelper getCacheHelper(LeafReaderContext context) {
|
||||
return context.reader().getCoreCacheHelper();
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
|
|
|
@ -40,6 +40,7 @@ import org.apache.lucene.search.TopDocs;
|
|||
import org.apache.lucene.store.Directory;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.apache.lucene.util.LuceneTestCase;
|
||||
import org.apache.lucene.util.TestUtil;
|
||||
import org.apache.lucene.util.Version;
|
||||
|
||||
/**
|
||||
|
@ -183,7 +184,17 @@ public class TestSimilarityBase extends LuceneTestCase {
|
|||
}
|
||||
|
||||
private CollectionStatistics toCollectionStats(BasicStats stats) {
|
||||
return new CollectionStatistics(stats.field, stats.getNumberOfDocuments(), -1, stats.getNumberOfFieldTokens(), -1);
|
||||
long sumTtf = stats.getNumberOfFieldTokens();
|
||||
long sumDf;
|
||||
if (sumTtf == -1) {
|
||||
sumDf = TestUtil.nextLong(random(), stats.getNumberOfDocuments(), 2L * stats.getNumberOfDocuments());
|
||||
} else {
|
||||
sumDf = TestUtil.nextLong(random(), Math.min(stats.getNumberOfDocuments(), sumTtf), sumTtf);
|
||||
}
|
||||
int docCount = Math.toIntExact(Math.min(sumDf, stats.getNumberOfDocuments()));
|
||||
int maxDoc = TestUtil.nextInt(random(), docCount, docCount + 10);
|
||||
|
||||
return new CollectionStatistics(stats.field, maxDoc, docCount, sumTtf, sumDf);
|
||||
}
|
||||
|
||||
private TermStatistics toTermStats(BasicStats stats) {
|
||||
|
|
|
@ -17,8 +17,10 @@
|
|||
package org.apache.lucene.facet;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Arrays;
|
||||
import java.util.Comparator;
|
||||
import java.util.List;
|
||||
import java.util.Objects;
|
||||
import java.util.Set;
|
||||
|
||||
|
@ -101,6 +103,14 @@ class DrillSidewaysQuery extends Query {
|
|||
throw new UnsupportedOperationException();
|
||||
}
|
||||
|
||||
@Override
|
||||
public IndexReader.CacheHelper getCacheHelper(LeafReaderContext context) {
|
||||
List<Weight> weights = new ArrayList<>();
|
||||
weights.add(baseWeight);
|
||||
weights.addAll(Arrays.asList(drillDowns));
|
||||
return getCacheHelper(context, weights);
|
||||
}
|
||||
|
||||
@Override
|
||||
public BulkScorer bulkScorer(LeafReaderContext context) throws IOException {
|
||||
Scorer baseScorer = baseWeight.scorer(context);
|
||||
|
|
|
@ -171,6 +171,11 @@ public final class DoubleRange extends Range {
|
|||
};
|
||||
return new ConstantScoreScorer(this, score(), twoPhase);
|
||||
}
|
||||
|
||||
@Override
|
||||
public IndexReader.CacheHelper getCacheHelper(LeafReaderContext context) {
|
||||
return null; // TODO delegate to LongValuesSource?
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
|
|
|
@ -163,6 +163,11 @@ public final class LongRange extends Range {
|
|||
};
|
||||
return new ConstantScoreScorer(this, score(), twoPhase);
|
||||
}
|
||||
|
||||
@Override
|
||||
public IndexReader.CacheHelper getCacheHelper(LeafReaderContext context) {
|
||||
return null; // TODO delegate to LongValuesSource?
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
|
|
|
@ -16,6 +16,16 @@
|
|||
*/
|
||||
package org.apache.lucene.facet;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Arrays;
|
||||
import java.util.Collections;
|
||||
import java.util.HashMap;
|
||||
import java.util.HashSet;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.Set;
|
||||
|
||||
import org.apache.lucene.analysis.MockAnalyzer;
|
||||
import org.apache.lucene.document.Document;
|
||||
import org.apache.lucene.document.Field;
|
||||
|
@ -58,16 +68,6 @@ import org.apache.lucene.util.InPlaceMergeSorter;
|
|||
import org.apache.lucene.util.InfoStream;
|
||||
import org.apache.lucene.util.TestUtil;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Arrays;
|
||||
import java.util.Collections;
|
||||
import java.util.HashMap;
|
||||
import java.util.HashSet;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.Set;
|
||||
|
||||
public class TestDrillSideways extends FacetTestCase {
|
||||
|
||||
protected DrillSideways getNewDrillSideways(IndexSearcher searcher, FacetsConfig config,
|
||||
|
@ -740,6 +740,11 @@ public class TestDrillSideways extends FacetTestCase {
|
|||
});
|
||||
}
|
||||
|
||||
@Override
|
||||
public IndexReader.CacheHelper getCacheHelper(LeafReaderContext context) {
|
||||
return null;
|
||||
}
|
||||
|
||||
};
|
||||
}
|
||||
|
||||
|
|
|
@ -20,7 +20,6 @@ import java.io.IOException;
|
|||
import java.util.HashMap;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.Set;
|
||||
import java.util.concurrent.atomic.AtomicBoolean;
|
||||
|
||||
import org.apache.lucene.document.Document;
|
||||
|
@ -29,8 +28,8 @@ import org.apache.lucene.document.DoublePoint;
|
|||
import org.apache.lucene.document.LongPoint;
|
||||
import org.apache.lucene.document.NumericDocValuesField;
|
||||
import org.apache.lucene.facet.DrillDownQuery;
|
||||
import org.apache.lucene.facet.DrillSideways.DrillSidewaysResult;
|
||||
import org.apache.lucene.facet.DrillSideways;
|
||||
import org.apache.lucene.facet.DrillSideways.DrillSidewaysResult;
|
||||
import org.apache.lucene.facet.FacetField;
|
||||
import org.apache.lucene.facet.FacetResult;
|
||||
import org.apache.lucene.facet.FacetTestCase;
|
||||
|
@ -46,10 +45,10 @@ import org.apache.lucene.index.IndexReader;
|
|||
import org.apache.lucene.index.IndexWriterConfig;
|
||||
import org.apache.lucene.index.LeafReaderContext;
|
||||
import org.apache.lucene.index.RandomIndexWriter;
|
||||
import org.apache.lucene.index.Term;
|
||||
import org.apache.lucene.search.DoubleValues;
|
||||
import org.apache.lucene.search.DoubleValuesSource;
|
||||
import org.apache.lucene.search.Explanation;
|
||||
import org.apache.lucene.search.FilterWeight;
|
||||
import org.apache.lucene.search.IndexSearcher;
|
||||
import org.apache.lucene.search.LongValuesSource;
|
||||
import org.apache.lucene.search.MatchAllDocsQuery;
|
||||
|
@ -717,24 +716,12 @@ public class TestRangeFacetCounts extends FacetTestCase {
|
|||
@Override
|
||||
public Weight createWeight(IndexSearcher searcher, boolean needsScores, float boost) throws IOException {
|
||||
final Weight in = this.in.createWeight(searcher, needsScores, boost);
|
||||
return new Weight(in.getQuery()) {
|
||||
|
||||
@Override
|
||||
public void extractTerms(Set<Term> terms) {
|
||||
in.extractTerms(terms);
|
||||
}
|
||||
|
||||
@Override
|
||||
public Explanation explain(LeafReaderContext context, int doc) throws IOException {
|
||||
return in.explain(context, doc);
|
||||
}
|
||||
|
||||
return new FilterWeight(in) {
|
||||
@Override
|
||||
public Scorer scorer(LeafReaderContext context) throws IOException {
|
||||
used.set(true);
|
||||
return in.scorer(context);
|
||||
}
|
||||
|
||||
};
|
||||
}
|
||||
|
||||
|
|
|
@ -238,13 +238,8 @@ public final class TokenStreamFromTermVector extends TokenStream {
|
|||
// Estimate the number of position slots we need from term stats. We use some estimation factors taken from
|
||||
// Wikipedia that reduce the likelihood of needing to expand the array.
|
||||
int sumTotalTermFreq = (int) vector.getSumTotalTermFreq();
|
||||
if (sumTotalTermFreq == -1) {//unfortunately term vectors seem to not have this stat
|
||||
int size = (int) vector.size();
|
||||
if (size == -1) {//doesn't happen with term vectors, it seems, but pick a default any way
|
||||
size = 128;
|
||||
}
|
||||
sumTotalTermFreq = (int)(size * 2.4);
|
||||
}
|
||||
assert sumTotalTermFreq != -1;
|
||||
|
||||
final int originalPositionEstimate = (int) (sumTotalTermFreq * 1.5);//less than 1 in 10 docs exceed this
|
||||
|
||||
// This estimate is based on maxStartOffset. Err on the side of this being larger than needed.
|
||||
|
|
|
@ -153,7 +153,7 @@ org.apache.hadoop.version = 2.7.4
|
|||
/org.apache.httpcomponents/httpcore = 4.4.6
|
||||
/org.apache.httpcomponents/httpmime = 4.5.3
|
||||
|
||||
/org.apache.ivy/ivy = 2.3.0
|
||||
/org.apache.ivy/ivy = 2.4.0
|
||||
|
||||
org.apache.james.apache.mime4j.version = 0.7.2
|
||||
/org.apache.james/apache-mime4j-core = ${org.apache.james.apache.mime4j.version}
|
||||
|
|
|
@ -20,6 +20,7 @@ import java.io.IOException;
|
|||
import java.util.Set;
|
||||
|
||||
import org.apache.lucene.index.DocValues;
|
||||
import org.apache.lucene.index.IndexReader;
|
||||
import org.apache.lucene.index.LeafReaderContext;
|
||||
import org.apache.lucene.index.OrdinalMap;
|
||||
import org.apache.lucene.index.SortedDocValues;
|
||||
|
@ -154,6 +155,11 @@ final class GlobalOrdinalsQuery extends Query {
|
|||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public IndexReader.CacheHelper getCacheHelper(LeafReaderContext context) {
|
||||
return getDocValuesCacheHelper(joinField, context);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
final static class OrdinalMapScorer extends BaseGlobalOrdinalScorer {
|
||||
|
|
|
@ -194,6 +194,11 @@ public class ParentChildrenBlockJoinQuery extends Query {
|
|||
}
|
||||
};
|
||||
}
|
||||
|
||||
@Override
|
||||
public IndexReader.CacheHelper getCacheHelper(LeafReaderContext context) {
|
||||
return null; // TODO delegate to BitSetProducer?
|
||||
}
|
||||
};
|
||||
}
|
||||
}
|
||||
|
|
|
@ -29,6 +29,7 @@ import org.apache.lucene.document.FloatPoint;
|
|||
import org.apache.lucene.document.IntPoint;
|
||||
import org.apache.lucene.document.LongPoint;
|
||||
import org.apache.lucene.index.FieldInfo;
|
||||
import org.apache.lucene.index.IndexReader;
|
||||
import org.apache.lucene.index.LeafReader;
|
||||
import org.apache.lucene.index.LeafReaderContext;
|
||||
import org.apache.lucene.index.PointValues;
|
||||
|
@ -186,6 +187,11 @@ abstract class PointInSetIncludingScoreQuery extends Query {
|
|||
|
||||
};
|
||||
}
|
||||
|
||||
@Override
|
||||
public IndexReader.CacheHelper getCacheHelper(LeafReaderContext context) {
|
||||
return context.reader().getCoreCacheHelper();
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
|
|
|
@ -21,6 +21,7 @@ import java.util.Locale;
|
|||
import java.util.Objects;
|
||||
import java.util.Set;
|
||||
|
||||
import org.apache.lucene.index.IndexReader;
|
||||
import org.apache.lucene.index.LeafReaderContext;
|
||||
import org.apache.lucene.index.PostingsEnum;
|
||||
import org.apache.lucene.index.Term;
|
||||
|
@ -140,6 +141,11 @@ class TermsIncludingScoreQuery extends Query {
|
|||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public IndexReader.CacheHelper getCacheHelper(LeafReaderContext context) {
|
||||
return context.reader().getCoreCacheHelper();
|
||||
}
|
||||
|
||||
};
|
||||
}
|
||||
|
||||
|
|
|
@ -562,6 +562,11 @@ public class TestJoinUtil extends LuceneTestCase {
|
|||
}
|
||||
};
|
||||
}
|
||||
|
||||
@Override
|
||||
public IndexReader.CacheHelper getCacheHelper(LeafReaderContext context) {
|
||||
return null;
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
|
|
|
@ -1 +0,0 @@
|
|||
c5ebf1c253ad4959a29f4acfe696ee48cdd9f473
|
|
@ -0,0 +1 @@
|
|||
5abe4c24bbe992a9ac07ca563d5bd3e8d569e9ed
|
|
@ -17,6 +17,7 @@
|
|||
package org.apache.lucene.queries;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.Arrays;
|
||||
import java.util.Objects;
|
||||
import java.util.Set;
|
||||
|
||||
|
@ -121,6 +122,11 @@ public class BoostingQuery extends Query {
|
|||
}
|
||||
};
|
||||
}
|
||||
|
||||
@Override
|
||||
public IndexReader.CacheHelper getCacheHelper(LeafReaderContext context) {
|
||||
return getCacheHelper(context, Arrays.asList(matchWeight, contextWeight));
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
|
|
|
@ -17,9 +17,11 @@
|
|||
package org.apache.lucene.queries;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Arrays;
|
||||
import java.util.Collection;
|
||||
import java.util.Collections;
|
||||
import java.util.List;
|
||||
import java.util.Set;
|
||||
|
||||
import org.apache.lucene.index.IndexReader;
|
||||
|
@ -207,6 +209,14 @@ public class CustomScoreQuery extends Query implements Cloneable {
|
|||
return new CustomScorer(CustomScoreQuery.this.getCustomScoreProvider(context), this, queryWeight, subQueryScorer, valSrcScorers);
|
||||
}
|
||||
|
||||
@Override
|
||||
public IndexReader.CacheHelper getCacheHelper(LeafReaderContext context) {
|
||||
List<Weight> weights = new ArrayList<>();
|
||||
weights.add(subQueryWeight);
|
||||
weights.addAll(Arrays.asList(valSrcWeights));
|
||||
return getCacheHelper(context, weights);
|
||||
}
|
||||
|
||||
@Override
|
||||
public Explanation explain(LeafReaderContext context, int doc) throws IOException {
|
||||
Explanation explain = doExplain(context, doc);
|
||||
|
|
|
@ -88,6 +88,11 @@ public final class BoostedQuery extends Query {
|
|||
return new BoostedQuery.CustomScorer(context, this, subQueryScorer, boostVal);
|
||||
}
|
||||
|
||||
@Override
|
||||
public IndexReader.CacheHelper getCacheHelper(LeafReaderContext context) {
|
||||
return null;
|
||||
}
|
||||
|
||||
@Override
|
||||
public Explanation explain(LeafReaderContext readerContext, int doc) throws IOException {
|
||||
Explanation subQueryExpl = qWeight.explain(readerContext,doc);
|
||||
|
|
|
@ -21,6 +21,7 @@ import java.io.IOException;
|
|||
import java.util.Objects;
|
||||
import java.util.function.DoublePredicate;
|
||||
|
||||
import org.apache.lucene.index.IndexReader;
|
||||
import org.apache.lucene.index.LeafReaderContext;
|
||||
import org.apache.lucene.search.ConstantScoreScorer;
|
||||
import org.apache.lucene.search.ConstantScoreWeight;
|
||||
|
@ -80,6 +81,11 @@ public final class FunctionMatchQuery extends Query {
|
|||
};
|
||||
return new ConstantScoreScorer(this, score(), twoPhase);
|
||||
}
|
||||
|
||||
@Override
|
||||
public IndexReader.CacheHelper getCacheHelper(LeafReaderContext context) {
|
||||
return null; // TODO delegate to DoubleValuesSource?
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
|
|
|
@ -74,6 +74,11 @@ public class FunctionQuery extends Query {
|
|||
return new AllScorer(context, this, boost);
|
||||
}
|
||||
|
||||
@Override
|
||||
public IndexReader.CacheHelper getCacheHelper(LeafReaderContext context) {
|
||||
return null;
|
||||
}
|
||||
|
||||
@Override
|
||||
public Explanation explain(LeafReaderContext context, int doc) throws IOException {
|
||||
return ((AllScorer)scorer(context)).explain(doc);
|
||||
|
|
|
@ -21,6 +21,7 @@ import java.util.Map;
|
|||
import java.util.Objects;
|
||||
import java.util.Set;
|
||||
|
||||
import org.apache.lucene.index.IndexReader;
|
||||
import org.apache.lucene.index.LeafReaderContext;
|
||||
import org.apache.lucene.index.Term;
|
||||
import org.apache.lucene.search.Explanation;
|
||||
|
@ -153,5 +154,10 @@ public class FunctionRangeQuery extends Query {
|
|||
// getRangeScorer takes String args and parses them. Weird.
|
||||
return functionValues.getRangeScorer(context, lowerVal, upperVal, includeLower, includeUpper);
|
||||
}
|
||||
|
||||
@Override
|
||||
public IndexReader.CacheHelper getCacheHelper(LeafReaderContext context) {
|
||||
return null;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -137,5 +137,10 @@ public final class FunctionScoreQuery extends Query {
|
|||
}
|
||||
};
|
||||
}
|
||||
|
||||
@Override
|
||||
public IndexReader.CacheHelper getCacheHelper(LeafReaderContext context) {
|
||||
return null; // TODO delegate to DoubleValuesSource
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -29,8 +29,6 @@ import java.util.Map;
|
|||
/**
|
||||
* <code>SumTotalTermFreqValueSource</code> returns the number of tokens.
|
||||
* (sum of term freqs across all documents, across all terms).
|
||||
* Returns -1 if frequencies were omitted for the field, or if
|
||||
* the codec doesn't support this statistic.
|
||||
* @lucene.internal
|
||||
*/
|
||||
public class SumTotalTermFreqValueSource extends ValueSource {
|
||||
|
@ -61,13 +59,9 @@ public class SumTotalTermFreqValueSource extends ValueSource {
|
|||
Terms terms = readerContext.reader().terms(indexedField);
|
||||
if (terms == null) continue;
|
||||
long v = terms.getSumTotalTermFreq();
|
||||
if (v == -1) {
|
||||
sumTotalTermFreq = -1;
|
||||
break;
|
||||
} else {
|
||||
assert v != -1;
|
||||
sumTotalTermFreq += v;
|
||||
}
|
||||
}
|
||||
final long ttf = sumTotalTermFreq;
|
||||
context.put(this, new LongDocValues(this) {
|
||||
@Override
|
||||
|
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue