mirror of https://github.com/apache/lucene.git
LUCENE-7311: Cached term queries do not seek the terms dictionary anymore.
This commit is contained in:
parent
433ab5ea6d
commit
d5779335aa
|
@ -112,6 +112,9 @@ Optimizations
|
||||||
* LUCENE-7371: Point values are now better compressed using run-length
|
* LUCENE-7371: Point values are now better compressed using run-length
|
||||||
encoding. (Adrien Grand)
|
encoding. (Adrien Grand)
|
||||||
|
|
||||||
|
* LUCENE-7311: Cached term queries do not seek the terms dictionary anymore.
|
||||||
|
(Adrien Grand)
|
||||||
|
|
||||||
Other
|
Other
|
||||||
|
|
||||||
* LUCENE-4787: Fixed some highlighting javadocs. (Michael Dodsworth via Adrien
|
* LUCENE-4787: Fixed some highlighting javadocs. (Michael Dodsworth via Adrien
|
||||||
|
|
|
@ -29,6 +29,7 @@ import org.apache.lucene.index.ReaderUtil;
|
||||||
import org.apache.lucene.index.Term;
|
import org.apache.lucene.index.Term;
|
||||||
import org.apache.lucene.index.TermContext;
|
import org.apache.lucene.index.TermContext;
|
||||||
import org.apache.lucene.index.TermState;
|
import org.apache.lucene.index.TermState;
|
||||||
|
import org.apache.lucene.index.Terms;
|
||||||
import org.apache.lucene.index.TermsEnum;
|
import org.apache.lucene.index.TermsEnum;
|
||||||
import org.apache.lucene.search.similarities.Similarity;
|
import org.apache.lucene.search.similarities.Similarity;
|
||||||
import org.apache.lucene.search.similarities.Similarity.SimScorer;
|
import org.apache.lucene.search.similarities.Similarity.SimScorer;
|
||||||
|
@ -51,8 +52,10 @@ public class TermQuery extends Query {
|
||||||
public TermWeight(IndexSearcher searcher, boolean needsScores, TermContext termStates)
|
public TermWeight(IndexSearcher searcher, boolean needsScores, TermContext termStates)
|
||||||
throws IOException {
|
throws IOException {
|
||||||
super(TermQuery.this);
|
super(TermQuery.this);
|
||||||
|
if (needsScores && termStates == null) {
|
||||||
|
throw new IllegalStateException("termStates are required when scores are needed");
|
||||||
|
}
|
||||||
this.needsScores = needsScores;
|
this.needsScores = needsScores;
|
||||||
assert termStates != null : "TermContext must not be null";
|
|
||||||
this.termStates = termStates;
|
this.termStates = termStates;
|
||||||
this.similarity = searcher.getSimilarity(needsScores);
|
this.similarity = searcher.getSimilarity(needsScores);
|
||||||
|
|
||||||
|
@ -62,12 +65,10 @@ public class TermQuery extends Query {
|
||||||
collectionStats = searcher.collectionStatistics(term.field());
|
collectionStats = searcher.collectionStatistics(term.field());
|
||||||
termStats = searcher.termStatistics(term, termStates);
|
termStats = searcher.termStatistics(term, termStates);
|
||||||
} else {
|
} else {
|
||||||
// do not bother computing actual stats, scores are not needed
|
// we do not need the actual stats, use fake stats with docFreq=maxDoc and ttf=-1
|
||||||
final int maxDoc = searcher.getIndexReader().maxDoc();
|
final int maxDoc = searcher.getIndexReader().maxDoc();
|
||||||
final int docFreq = termStates.docFreq();
|
|
||||||
final long totalTermFreq = termStates.totalTermFreq();
|
|
||||||
collectionStats = new CollectionStatistics(term.field(), maxDoc, -1, -1, -1);
|
collectionStats = new CollectionStatistics(term.field(), maxDoc, -1, -1, -1);
|
||||||
termStats = new TermStatistics(term.bytes(), docFreq, totalTermFreq);
|
termStats = new TermStatistics(term.bytes(), maxDoc, -1);
|
||||||
}
|
}
|
||||||
|
|
||||||
this.stats = similarity.computeWeight(collectionStats, termStats);
|
this.stats = similarity.computeWeight(collectionStats, termStats);
|
||||||
|
@ -95,7 +96,7 @@ public class TermQuery extends Query {
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public Scorer scorer(LeafReaderContext context) throws IOException {
|
public Scorer scorer(LeafReaderContext context) throws IOException {
|
||||||
assert termStates.topReaderContext == ReaderUtil.getTopLevelContext(context) : "The top-reader used to create Weight (" + termStates.topReaderContext + ") is not the same as the current reader's top-reader (" + ReaderUtil.getTopLevelContext(context);
|
assert termStates == null || termStates.topReaderContext == ReaderUtil.getTopLevelContext(context) : "The top-reader used to create Weight (" + termStates.topReaderContext + ") is not the same as the current reader's top-reader (" + ReaderUtil.getTopLevelContext(context);;
|
||||||
final TermsEnum termsEnum = getTermsEnum(context);
|
final TermsEnum termsEnum = getTermsEnum(context);
|
||||||
if (termsEnum == null) {
|
if (termsEnum == null) {
|
||||||
return null;
|
return null;
|
||||||
|
@ -110,17 +111,30 @@ public class TermQuery extends Query {
|
||||||
* the term does not exist in the given context
|
* the term does not exist in the given context
|
||||||
*/
|
*/
|
||||||
private TermsEnum getTermsEnum(LeafReaderContext context) throws IOException {
|
private TermsEnum getTermsEnum(LeafReaderContext context) throws IOException {
|
||||||
final TermState state = termStates.get(context.ord);
|
if (termStates != null) {
|
||||||
if (state == null) { // term is not present in that reader
|
// TermQuery either used as a Query or the term states have been provided at construction time
|
||||||
assert termNotInReader(context.reader(), term) : "no termstate found but term exists in reader term=" + term;
|
assert termStates.topReaderContext == ReaderUtil.getTopLevelContext(context) : "The top-reader used to create Weight (" + termStates.topReaderContext + ") is not the same as the current reader's top-reader (" + ReaderUtil.getTopLevelContext(context);
|
||||||
return null;
|
final TermState state = termStates.get(context.ord);
|
||||||
|
if (state == null) { // term is not present in that reader
|
||||||
|
assert termNotInReader(context.reader(), term) : "no termstate found but term exists in reader term=" + term;
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
final TermsEnum termsEnum = context.reader().terms(term.field()).iterator();
|
||||||
|
termsEnum.seekExact(term.bytes(), state);
|
||||||
|
return termsEnum;
|
||||||
|
} else {
|
||||||
|
// TermQuery used as a filter, so the term states have not been built up front
|
||||||
|
Terms terms = context.reader().terms(term.field());
|
||||||
|
if (terms == null) {
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
final TermsEnum termsEnum = terms.iterator();
|
||||||
|
if (termsEnum.seekExact(term.bytes())) {
|
||||||
|
return termsEnum;
|
||||||
|
} else {
|
||||||
|
return null;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
// System.out.println("LD=" + reader.getLiveDocs() + " set?=" +
|
|
||||||
// (reader.getLiveDocs() != null ? reader.getLiveDocs().get(0) : "null"));
|
|
||||||
final TermsEnum termsEnum = context.reader().terms(term.field())
|
|
||||||
.iterator();
|
|
||||||
termsEnum.seekExact(term.bytes(), state);
|
|
||||||
return termsEnum;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
private boolean termNotInReader(LeafReader reader, Term term) throws IOException {
|
private boolean termNotInReader(LeafReader reader, Term term) throws IOException {
|
||||||
|
@ -178,9 +192,15 @@ public class TermQuery extends Query {
|
||||||
final TermContext termState;
|
final TermContext termState;
|
||||||
if (perReaderTermState == null
|
if (perReaderTermState == null
|
||||||
|| perReaderTermState.topReaderContext != context) {
|
|| perReaderTermState.topReaderContext != context) {
|
||||||
// make TermQuery single-pass if we don't have a PRTS or if the context
|
if (needsScores) {
|
||||||
// differs!
|
// make TermQuery single-pass if we don't have a PRTS or if the context
|
||||||
termState = TermContext.build(context, term);
|
// differs!
|
||||||
|
termState = TermContext.build(context, term);
|
||||||
|
} else {
|
||||||
|
// do not compute the term state, this will help save seeks in the terms
|
||||||
|
// dict on segments that have a cache entry for this query
|
||||||
|
termState = null;
|
||||||
|
}
|
||||||
} else {
|
} else {
|
||||||
// PRTS was pre-build for this IS
|
// PRTS was pre-build for this IS
|
||||||
termState = this.perReaderTermState;
|
termState = this.perReaderTermState;
|
||||||
|
|
|
@ -0,0 +1,154 @@
|
||||||
|
/*
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||||
|
* contributor license agreements. See the NOTICE file distributed with
|
||||||
|
* this work for additional information regarding copyright ownership.
|
||||||
|
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||||
|
* (the "License"); you may not use this file except in compliance with
|
||||||
|
* the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
package org.apache.lucene.search;
|
||||||
|
|
||||||
|
import java.io.IOException;
|
||||||
|
|
||||||
|
import org.apache.lucene.document.Document;
|
||||||
|
import org.apache.lucene.document.Field.Store;
|
||||||
|
import org.apache.lucene.document.StringField;
|
||||||
|
import org.apache.lucene.index.DirectoryReader;
|
||||||
|
import org.apache.lucene.index.Fields;
|
||||||
|
import org.apache.lucene.index.FilterDirectoryReader;
|
||||||
|
import org.apache.lucene.index.FilterLeafReader;
|
||||||
|
import org.apache.lucene.index.LeafReader;
|
||||||
|
import org.apache.lucene.index.MultiReader;
|
||||||
|
import org.apache.lucene.index.NoMergePolicy;
|
||||||
|
import org.apache.lucene.index.RandomIndexWriter;
|
||||||
|
import org.apache.lucene.index.Term;
|
||||||
|
import org.apache.lucene.index.TermContext;
|
||||||
|
import org.apache.lucene.index.TermState;
|
||||||
|
import org.apache.lucene.index.Terms;
|
||||||
|
import org.apache.lucene.index.TermsEnum;
|
||||||
|
import org.apache.lucene.store.Directory;
|
||||||
|
import org.apache.lucene.util.BytesRef;
|
||||||
|
import org.apache.lucene.util.IOUtils;
|
||||||
|
import org.apache.lucene.util.LuceneTestCase;
|
||||||
|
|
||||||
|
public class TestTermQuery extends LuceneTestCase {
|
||||||
|
|
||||||
|
public void testEquals() throws IOException {
|
||||||
|
QueryUtils.checkEqual(
|
||||||
|
new TermQuery(new Term("foo", "bar")),
|
||||||
|
new TermQuery(new Term("foo", "bar")));
|
||||||
|
QueryUtils.checkUnequal(
|
||||||
|
new TermQuery(new Term("foo", "bar")),
|
||||||
|
new TermQuery(new Term("foo", "baz")));
|
||||||
|
QueryUtils.checkEqual(
|
||||||
|
new TermQuery(new Term("foo", "bar")),
|
||||||
|
new TermQuery(new Term("foo", "bar"), TermContext.build(new MultiReader().getContext(), new Term("foo", "bar"))));
|
||||||
|
}
|
||||||
|
|
||||||
|
public void testCreateWeightDoesNotSeekIfScoresAreNotNeeded() throws IOException {
|
||||||
|
Directory dir = newDirectory();
|
||||||
|
RandomIndexWriter w = new RandomIndexWriter(random(), dir, newIndexWriterConfig().setMergePolicy(NoMergePolicy.INSTANCE));
|
||||||
|
// segment that contains the term
|
||||||
|
Document doc = new Document();
|
||||||
|
doc.add(new StringField("foo", "bar", Store.NO));
|
||||||
|
w.addDocument(doc);
|
||||||
|
w.getReader().close();
|
||||||
|
// segment that does not contain the term
|
||||||
|
doc = new Document();
|
||||||
|
doc.add(new StringField("foo", "baz", Store.NO));
|
||||||
|
w.addDocument(doc);
|
||||||
|
w.getReader().close();
|
||||||
|
// segment that does not contain the field
|
||||||
|
w.addDocument(new Document());
|
||||||
|
|
||||||
|
DirectoryReader reader = w.getReader();
|
||||||
|
FilterDirectoryReader noSeekReader = new NoSeekDirectoryReader(reader);
|
||||||
|
IndexSearcher noSeekSearcher = new IndexSearcher(noSeekReader);
|
||||||
|
Query query = new TermQuery(new Term("foo", "bar"));
|
||||||
|
AssertionError e = expectThrows(AssertionError.class,
|
||||||
|
() -> noSeekSearcher.createNormalizedWeight(query, true));
|
||||||
|
assertEquals("no seek", e.getMessage());
|
||||||
|
|
||||||
|
noSeekSearcher.createNormalizedWeight(query, false); // no exception
|
||||||
|
IndexSearcher searcher = new IndexSearcher(reader);
|
||||||
|
// use a collector rather than searcher.count() which would just read the
|
||||||
|
// doc freq instead of creating a scorer
|
||||||
|
TotalHitCountCollector collector = new TotalHitCountCollector();
|
||||||
|
searcher.search(query, collector);
|
||||||
|
assertEquals(1, collector.getTotalHits());
|
||||||
|
TermQuery queryWithContext = new TermQuery(new Term("foo", "bar"),
|
||||||
|
TermContext.build(reader.getContext(), new Term("foo", "bar")));
|
||||||
|
collector = new TotalHitCountCollector();
|
||||||
|
searcher.search(queryWithContext, collector);
|
||||||
|
assertEquals(1, collector.getTotalHits());
|
||||||
|
|
||||||
|
IOUtils.close(reader, w, dir);
|
||||||
|
}
|
||||||
|
|
||||||
|
private static class NoSeekDirectoryReader extends FilterDirectoryReader {
|
||||||
|
|
||||||
|
public NoSeekDirectoryReader(DirectoryReader in) throws IOException {
|
||||||
|
super(in, new SubReaderWrapper() {
|
||||||
|
@Override
|
||||||
|
public LeafReader wrap(LeafReader reader) {
|
||||||
|
return new NoSeekLeafReader(reader);
|
||||||
|
}
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
protected DirectoryReader doWrapDirectoryReader(DirectoryReader in) throws IOException {
|
||||||
|
return new NoSeekDirectoryReader(in);
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
private static class NoSeekLeafReader extends FilterLeafReader {
|
||||||
|
|
||||||
|
public NoSeekLeafReader(LeafReader in) {
|
||||||
|
super(in);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public Fields fields() throws IOException {
|
||||||
|
return new FilterFields(super.fields()) {
|
||||||
|
@Override
|
||||||
|
public Terms terms(String field) throws IOException {
|
||||||
|
return new FilterTerms(super.terms(field)) {
|
||||||
|
@Override
|
||||||
|
public TermsEnum iterator() throws IOException {
|
||||||
|
return new FilterTermsEnum(super.iterator()) {
|
||||||
|
@Override
|
||||||
|
public SeekStatus seekCeil(BytesRef text) throws IOException {
|
||||||
|
throw new AssertionError("no seek");
|
||||||
|
}
|
||||||
|
@Override
|
||||||
|
public void seekExact(BytesRef term, TermState state) throws IOException {
|
||||||
|
throw new AssertionError("no seek");
|
||||||
|
}
|
||||||
|
@Override
|
||||||
|
public boolean seekExact(BytesRef text) throws IOException {
|
||||||
|
throw new AssertionError("no seek");
|
||||||
|
}
|
||||||
|
@Override
|
||||||
|
public void seekExact(long ord) throws IOException {
|
||||||
|
throw new AssertionError("no seek");
|
||||||
|
}
|
||||||
|
};
|
||||||
|
}
|
||||||
|
};
|
||||||
|
}
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
};
|
||||||
|
|
||||||
|
}
|
Loading…
Reference in New Issue