LUCENE-6429: Removed the TermQuery(Term,int) constructor.

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1674084 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Adrien Grand 2015-04-16 14:53:09 +00:00
parent bf299ff52e
commit d30e730809
4 changed files with 43 additions and 27 deletions

View File

@ -200,6 +200,9 @@ Bug Fixes
* LUCENE-6395: Seeking by term ordinal was failing to set the term's
bytes in MemoryIndex (Mike McCandless)
* LUCENE-6429: Removed the TermQuery(Term,int) constructor which could lead to
inconsistent term statistics. (Adrien Grand, Robert Muir)
Optimizations
* LUCENE-6183, LUCENE-5647: Avoid recompressing stored fields

View File

@ -53,6 +53,7 @@ public final class TermContext {
assert context != null && context.isTopLevel;
topReaderContext = context;
docFreq = 0;
totalTermFreq = 0;
final int len;
if (context.leaves() == null) {
len = 1;
@ -107,6 +108,7 @@ public final class TermContext {
*/
public void clear() {
docFreq = 0;
totalTermFreq = 0;
Arrays.fill(states, null);
}
@ -160,12 +162,6 @@ public final class TermContext {
public long totalTermFreq() {
return totalTermFreq;
}
/** expert: only available for queries that want to lie about docfreq
* @lucene.internal */
public void setDocFreq(int docFreq) {
this.docFreq = docFreq;
}
/** Returns true if all terms stored here are real (e.g., not auto-prefix terms).
*

View File

@ -18,6 +18,7 @@ package org.apache.lucene.search;
*/
import java.io.IOException;
import java.util.Objects;
import java.util.Set;
import org.apache.lucene.index.IndexReaderContext;
@ -40,7 +41,6 @@ import org.apache.lucene.util.ToStringUtils;
*/
public class TermQuery extends Query {
private final Term term;
private final int docFreq;
private final TermContext perReaderTermState;
final class TermWeight extends Weight {
@ -138,16 +138,7 @@ public class TermQuery extends Query {
/** Constructs a query for the term <code>t</code>. */
public TermQuery(Term t) {
this(t, -1);
}
/**
* Expert: constructs a TermQuery that will use the provided docFreq instead
* of looking up the docFreq against the searcher.
*/
public TermQuery(Term t, int docFreq) {
term = t;
this.docFreq = docFreq;
term = Objects.requireNonNull(t);
perReaderTermState = null;
}
@ -157,9 +148,8 @@ public class TermQuery extends Query {
*/
public TermQuery(Term t, TermContext states) {
assert states != null;
term = t;
docFreq = states.docFreq();
perReaderTermState = states;
term = Objects.requireNonNull(t);
perReaderTermState = Objects.requireNonNull(states);
}
/** Returns the term of this query. */
@ -181,9 +171,6 @@ public class TermQuery extends Query {
termState = this.perReaderTermState;
}
// we must not ignore the given docFreq - if set use the given value (lie)
if (docFreq != -1) termState.setDocFreq(docFreq);
return new TermWeight(searcher, needsScores, termState);
}

View File

@ -27,12 +27,21 @@ import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.index.MultiFields;
import org.apache.lucene.index.Term;
import org.apache.lucene.index.TermContext;
import org.apache.lucene.index.Terms;
import org.apache.lucene.search.*;
import org.apache.lucene.search.similarities.TFIDFSimilarity;
import org.apache.lucene.index.TermsEnum;
import org.apache.lucene.search.BooleanClause;
import org.apache.lucene.search.BooleanQuery;
import org.apache.lucene.search.BoostAttribute;
import org.apache.lucene.search.ConstantScoreQuery;
import org.apache.lucene.search.MaxNonCompetitiveBoostAttribute;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.TermQuery;
import org.apache.lucene.search.similarities.DefaultSimilarity;
import org.apache.lucene.search.similarities.TFIDFSimilarity;
import org.apache.lucene.util.AttributeSource;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.PriorityQueue;
@ -256,6 +265,27 @@ public class FuzzyLikeThisQuery extends Query
}
}
private Query newTermQuery(IndexReader reader, Term term) throws IOException {
if (ignoreTF) {
return new ConstantScoreQuery(new TermQuery(term));
} else {
// we build an artificial TermContext that will give an overall df and ttf
// equal to 1
TermContext context = new TermContext(reader.getContext());
for (LeafReaderContext leafContext : reader.leaves()) {
Terms terms = leafContext.reader().terms(term.field());
if (terms != null) {
TermsEnum termsEnum = terms.iterator();
if (termsEnum.seekExact(term.bytes())) {
int freq = 1 - context.docFreq(); // we want the total df and ttf to be 1
context.register(termsEnum.termState(), leafContext.ord, freq, freq);
}
}
}
return new TermQuery(term, context);
}
}
@Override
public Query rewrite(IndexReader reader) throws IOException
{
@ -298,7 +328,7 @@ public class FuzzyLikeThisQuery extends Query
{
//optimize where only one selected variant
ScoreTerm st= variants.get(0);
Query tq = ignoreTF ? new ConstantScoreQuery(new TermQuery(st.term)) : new TermQuery(st.term, 1);
Query tq = newTermQuery(reader, st.term);
tq.setBoost(st.score); // set the boost to a mix of IDF and score
bq.add(tq, BooleanClause.Occur.SHOULD);
}
@ -310,7 +340,7 @@ public class FuzzyLikeThisQuery extends Query
{
ScoreTerm st = iterator2.next();
// found a match
Query tq = ignoreTF ? new ConstantScoreQuery(new TermQuery(st.term)) : new TermQuery(st.term, 1);
Query tq = newTermQuery(reader, st.term);
tq.setBoost(st.score); // set the boost using the ScoreTerm's score
termVariants.add(tq, BooleanClause.Occur.SHOULD); // add to query
}