mirror of https://github.com/apache/lucene.git
LUCENE-7311: Cached term queries do not seek the terms dictionary anymore.
This commit is contained in:
parent
433ab5ea6d
commit
d5779335aa
|
@ -112,6 +112,9 @@ Optimizations
|
|||
* LUCENE-7371: Point values are now better compressed using run-length
|
||||
encoding. (Adrien Grand)
|
||||
|
||||
* LUCENE-7311: Cached term queries do not seek the terms dictionary anymore.
|
||||
(Adrien Grand)
|
||||
|
||||
Other
|
||||
|
||||
* LUCENE-4787: Fixed some highlighting javadocs. (Michael Dodsworth via Adrien
|
||||
|
|
|
@ -29,6 +29,7 @@ import org.apache.lucene.index.ReaderUtil;
|
|||
import org.apache.lucene.index.Term;
|
||||
import org.apache.lucene.index.TermContext;
|
||||
import org.apache.lucene.index.TermState;
|
||||
import org.apache.lucene.index.Terms;
|
||||
import org.apache.lucene.index.TermsEnum;
|
||||
import org.apache.lucene.search.similarities.Similarity;
|
||||
import org.apache.lucene.search.similarities.Similarity.SimScorer;
|
||||
|
@ -51,8 +52,10 @@ public class TermQuery extends Query {
|
|||
public TermWeight(IndexSearcher searcher, boolean needsScores, TermContext termStates)
|
||||
throws IOException {
|
||||
super(TermQuery.this);
|
||||
if (needsScores && termStates == null) {
|
||||
throw new IllegalStateException("termStates are required when scores are needed");
|
||||
}
|
||||
this.needsScores = needsScores;
|
||||
assert termStates != null : "TermContext must not be null";
|
||||
this.termStates = termStates;
|
||||
this.similarity = searcher.getSimilarity(needsScores);
|
||||
|
||||
|
@ -62,12 +65,10 @@ public class TermQuery extends Query {
|
|||
collectionStats = searcher.collectionStatistics(term.field());
|
||||
termStats = searcher.termStatistics(term, termStates);
|
||||
} else {
|
||||
// do not bother computing actual stats, scores are not needed
|
||||
// we do not need the actual stats, use fake stats with docFreq=maxDoc and ttf=-1
|
||||
final int maxDoc = searcher.getIndexReader().maxDoc();
|
||||
final int docFreq = termStates.docFreq();
|
||||
final long totalTermFreq = termStates.totalTermFreq();
|
||||
collectionStats = new CollectionStatistics(term.field(), maxDoc, -1, -1, -1);
|
||||
termStats = new TermStatistics(term.bytes(), docFreq, totalTermFreq);
|
||||
termStats = new TermStatistics(term.bytes(), maxDoc, -1);
|
||||
}
|
||||
|
||||
this.stats = similarity.computeWeight(collectionStats, termStats);
|
||||
|
@ -95,7 +96,7 @@ public class TermQuery extends Query {
|
|||
|
||||
@Override
|
||||
public Scorer scorer(LeafReaderContext context) throws IOException {
|
||||
assert termStates.topReaderContext == ReaderUtil.getTopLevelContext(context) : "The top-reader used to create Weight (" + termStates.topReaderContext + ") is not the same as the current reader's top-reader (" + ReaderUtil.getTopLevelContext(context);
|
||||
assert termStates == null || termStates.topReaderContext == ReaderUtil.getTopLevelContext(context) : "The top-reader used to create Weight (" + termStates.topReaderContext + ") is not the same as the current reader's top-reader (" + ReaderUtil.getTopLevelContext(context);;
|
||||
final TermsEnum termsEnum = getTermsEnum(context);
|
||||
if (termsEnum == null) {
|
||||
return null;
|
||||
|
@ -110,17 +111,30 @@ public class TermQuery extends Query {
|
|||
* the term does not exist in the given context
|
||||
*/
|
||||
private TermsEnum getTermsEnum(LeafReaderContext context) throws IOException {
|
||||
final TermState state = termStates.get(context.ord);
|
||||
if (state == null) { // term is not present in that reader
|
||||
assert termNotInReader(context.reader(), term) : "no termstate found but term exists in reader term=" + term;
|
||||
return null;
|
||||
if (termStates != null) {
|
||||
// TermQuery either used as a Query or the term states have been provided at construction time
|
||||
assert termStates.topReaderContext == ReaderUtil.getTopLevelContext(context) : "The top-reader used to create Weight (" + termStates.topReaderContext + ") is not the same as the current reader's top-reader (" + ReaderUtil.getTopLevelContext(context);
|
||||
final TermState state = termStates.get(context.ord);
|
||||
if (state == null) { // term is not present in that reader
|
||||
assert termNotInReader(context.reader(), term) : "no termstate found but term exists in reader term=" + term;
|
||||
return null;
|
||||
}
|
||||
final TermsEnum termsEnum = context.reader().terms(term.field()).iterator();
|
||||
termsEnum.seekExact(term.bytes(), state);
|
||||
return termsEnum;
|
||||
} else {
|
||||
// TermQuery used as a filter, so the term states have not been built up front
|
||||
Terms terms = context.reader().terms(term.field());
|
||||
if (terms == null) {
|
||||
return null;
|
||||
}
|
||||
final TermsEnum termsEnum = terms.iterator();
|
||||
if (termsEnum.seekExact(term.bytes())) {
|
||||
return termsEnum;
|
||||
} else {
|
||||
return null;
|
||||
}
|
||||
}
|
||||
// System.out.println("LD=" + reader.getLiveDocs() + " set?=" +
|
||||
// (reader.getLiveDocs() != null ? reader.getLiveDocs().get(0) : "null"));
|
||||
final TermsEnum termsEnum = context.reader().terms(term.field())
|
||||
.iterator();
|
||||
termsEnum.seekExact(term.bytes(), state);
|
||||
return termsEnum;
|
||||
}
|
||||
|
||||
private boolean termNotInReader(LeafReader reader, Term term) throws IOException {
|
||||
|
@ -178,9 +192,15 @@ public class TermQuery extends Query {
|
|||
final TermContext termState;
|
||||
if (perReaderTermState == null
|
||||
|| perReaderTermState.topReaderContext != context) {
|
||||
// make TermQuery single-pass if we don't have a PRTS or if the context
|
||||
// differs!
|
||||
termState = TermContext.build(context, term);
|
||||
if (needsScores) {
|
||||
// make TermQuery single-pass if we don't have a PRTS or if the context
|
||||
// differs!
|
||||
termState = TermContext.build(context, term);
|
||||
} else {
|
||||
// do not compute the term state, this will help save seeks in the terms
|
||||
// dict on segments that have a cache entry for this query
|
||||
termState = null;
|
||||
}
|
||||
} else {
|
||||
// PRTS was pre-build for this IS
|
||||
termState = this.perReaderTermState;
|
||||
|
|
|
@ -0,0 +1,154 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.lucene.search;
|
||||
|
||||
import java.io.IOException;
|
||||
|
||||
import org.apache.lucene.document.Document;
|
||||
import org.apache.lucene.document.Field.Store;
|
||||
import org.apache.lucene.document.StringField;
|
||||
import org.apache.lucene.index.DirectoryReader;
|
||||
import org.apache.lucene.index.Fields;
|
||||
import org.apache.lucene.index.FilterDirectoryReader;
|
||||
import org.apache.lucene.index.FilterLeafReader;
|
||||
import org.apache.lucene.index.LeafReader;
|
||||
import org.apache.lucene.index.MultiReader;
|
||||
import org.apache.lucene.index.NoMergePolicy;
|
||||
import org.apache.lucene.index.RandomIndexWriter;
|
||||
import org.apache.lucene.index.Term;
|
||||
import org.apache.lucene.index.TermContext;
|
||||
import org.apache.lucene.index.TermState;
|
||||
import org.apache.lucene.index.Terms;
|
||||
import org.apache.lucene.index.TermsEnum;
|
||||
import org.apache.lucene.store.Directory;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.apache.lucene.util.IOUtils;
|
||||
import org.apache.lucene.util.LuceneTestCase;
|
||||
|
||||
public class TestTermQuery extends LuceneTestCase {
|
||||
|
||||
public void testEquals() throws IOException {
|
||||
QueryUtils.checkEqual(
|
||||
new TermQuery(new Term("foo", "bar")),
|
||||
new TermQuery(new Term("foo", "bar")));
|
||||
QueryUtils.checkUnequal(
|
||||
new TermQuery(new Term("foo", "bar")),
|
||||
new TermQuery(new Term("foo", "baz")));
|
||||
QueryUtils.checkEqual(
|
||||
new TermQuery(new Term("foo", "bar")),
|
||||
new TermQuery(new Term("foo", "bar"), TermContext.build(new MultiReader().getContext(), new Term("foo", "bar"))));
|
||||
}
|
||||
|
||||
public void testCreateWeightDoesNotSeekIfScoresAreNotNeeded() throws IOException {
|
||||
Directory dir = newDirectory();
|
||||
RandomIndexWriter w = new RandomIndexWriter(random(), dir, newIndexWriterConfig().setMergePolicy(NoMergePolicy.INSTANCE));
|
||||
// segment that contains the term
|
||||
Document doc = new Document();
|
||||
doc.add(new StringField("foo", "bar", Store.NO));
|
||||
w.addDocument(doc);
|
||||
w.getReader().close();
|
||||
// segment that does not contain the term
|
||||
doc = new Document();
|
||||
doc.add(new StringField("foo", "baz", Store.NO));
|
||||
w.addDocument(doc);
|
||||
w.getReader().close();
|
||||
// segment that does not contain the field
|
||||
w.addDocument(new Document());
|
||||
|
||||
DirectoryReader reader = w.getReader();
|
||||
FilterDirectoryReader noSeekReader = new NoSeekDirectoryReader(reader);
|
||||
IndexSearcher noSeekSearcher = new IndexSearcher(noSeekReader);
|
||||
Query query = new TermQuery(new Term("foo", "bar"));
|
||||
AssertionError e = expectThrows(AssertionError.class,
|
||||
() -> noSeekSearcher.createNormalizedWeight(query, true));
|
||||
assertEquals("no seek", e.getMessage());
|
||||
|
||||
noSeekSearcher.createNormalizedWeight(query, false); // no exception
|
||||
IndexSearcher searcher = new IndexSearcher(reader);
|
||||
// use a collector rather than searcher.count() which would just read the
|
||||
// doc freq instead of creating a scorer
|
||||
TotalHitCountCollector collector = new TotalHitCountCollector();
|
||||
searcher.search(query, collector);
|
||||
assertEquals(1, collector.getTotalHits());
|
||||
TermQuery queryWithContext = new TermQuery(new Term("foo", "bar"),
|
||||
TermContext.build(reader.getContext(), new Term("foo", "bar")));
|
||||
collector = new TotalHitCountCollector();
|
||||
searcher.search(queryWithContext, collector);
|
||||
assertEquals(1, collector.getTotalHits());
|
||||
|
||||
IOUtils.close(reader, w, dir);
|
||||
}
|
||||
|
||||
private static class NoSeekDirectoryReader extends FilterDirectoryReader {
|
||||
|
||||
public NoSeekDirectoryReader(DirectoryReader in) throws IOException {
|
||||
super(in, new SubReaderWrapper() {
|
||||
@Override
|
||||
public LeafReader wrap(LeafReader reader) {
|
||||
return new NoSeekLeafReader(reader);
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
@Override
|
||||
protected DirectoryReader doWrapDirectoryReader(DirectoryReader in) throws IOException {
|
||||
return new NoSeekDirectoryReader(in);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
private static class NoSeekLeafReader extends FilterLeafReader {
|
||||
|
||||
public NoSeekLeafReader(LeafReader in) {
|
||||
super(in);
|
||||
}
|
||||
|
||||
@Override
|
||||
public Fields fields() throws IOException {
|
||||
return new FilterFields(super.fields()) {
|
||||
@Override
|
||||
public Terms terms(String field) throws IOException {
|
||||
return new FilterTerms(super.terms(field)) {
|
||||
@Override
|
||||
public TermsEnum iterator() throws IOException {
|
||||
return new FilterTermsEnum(super.iterator()) {
|
||||
@Override
|
||||
public SeekStatus seekCeil(BytesRef text) throws IOException {
|
||||
throw new AssertionError("no seek");
|
||||
}
|
||||
@Override
|
||||
public void seekExact(BytesRef term, TermState state) throws IOException {
|
||||
throw new AssertionError("no seek");
|
||||
}
|
||||
@Override
|
||||
public boolean seekExact(BytesRef text) throws IOException {
|
||||
throw new AssertionError("no seek");
|
||||
}
|
||||
@Override
|
||||
public void seekExact(long ord) throws IOException {
|
||||
throw new AssertionError("no seek");
|
||||
}
|
||||
};
|
||||
}
|
||||
};
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
};
|
||||
|
||||
}
|
Loading…
Reference in New Issue