diff --git a/CHANGES.txt b/CHANGES.txt index 2ab739d39f3..9f0e8080124 100644 --- a/CHANGES.txt +++ b/CHANGES.txt @@ -21,7 +21,13 @@ Changes in runtime behavior and WildcardQueries before). Use setLowercaseExpandedTerms(false) to disable that behavior but note that this also affects PrefixQueries and WildcardQueries. (Daniel Naber) - + + 4. Document frequency that is computed when MultiSearcher is used is now + computed correctly and "globally" across subsearchers and indices, while + before it used to be computed locally to each index, which caused + ranking across multiple indices not to be equivalent. + (Chuck Williams, Wolf Siberski via Otis) + New features 1. Added support for stored compressed fields (patch #31149) diff --git a/src/java/org/apache/lucene/search/BooleanQuery.java b/src/java/org/apache/lucene/search/BooleanQuery.java index 190a63e51ca..d49eb8d4fc6 100644 --- a/src/java/org/apache/lucene/search/BooleanQuery.java +++ b/src/java/org/apache/lucene/search/BooleanQuery.java @@ -17,6 +17,8 @@ package org.apache.lucene.search; */ import java.io.IOException; +import java.util.Iterator; +import java.util.Set; import java.util.Vector; import java.util.Arrays; @@ -153,16 +155,16 @@ public class BooleanQuery extends Query { } private class BooleanWeight implements Weight { - protected Searcher searcher; + protected Similarity similarity; protected Vector weights = new Vector(); - public BooleanWeight(Searcher searcher) { - this.searcher = searcher; + public BooleanWeight(Searcher searcher) + throws IOException { + this.similarity = getSimilarity(searcher); for (int i = 0 ; i < clauses.size(); i++) { BooleanClause c = (BooleanClause)clauses.elementAt(i); weights.add(c.getQuery().createWeight(searcher)); } - //System.out.println("Creating " + getClass().getName()); } public Query getQuery() { return BooleanQuery.this; } @@ -213,7 +215,7 @@ public class BooleanQuery extends Query { if (allRequired && noneBoolean) { // ConjunctionScorer is okay ConjunctionScorer result = - new ConjunctionScorer(getSimilarity(searcher)); + new ConjunctionScorer(similarity); for (int i = 0 ; i < weights.size(); i++) { Weight w = (Weight)weights.elementAt(i); Scorer subScorer = w.scorer(reader); @@ -225,7 +227,7 @@ public class BooleanQuery extends Query { } // Use good-old BooleanScorer instead. - BooleanScorer result = new BooleanScorer(getSimilarity(searcher)); + BooleanScorer result = new BooleanScorer(similarity); for (int i = 0 ; i < weights.size(); i++) { BooleanClause c = (BooleanClause)clauses.elementAt(i); @@ -269,7 +271,7 @@ public class BooleanQuery extends Query { if (coord == 1) // only one clause matched sumExpl = sumExpl.getDetails()[0]; // eliminate wrapper - float coordFactor = getSimilarity(searcher).coord(coord, maxCoord); + float coordFactor = similarity.coord(coord, maxCoord); if (coordFactor == 1.0f) // coord is no-op return sumExpl; // eliminate wrapper else { @@ -286,13 +288,16 @@ public class BooleanQuery extends Query { private class BooleanWeight2 extends BooleanWeight { /* Merge into BooleanWeight in case the 1.4 BooleanScorer is dropped */ - public BooleanWeight2(Searcher searcher) { super(searcher); } + public BooleanWeight2(Searcher searcher) + throws IOException { + super(searcher); + } /** @return An alternative Scorer that uses and provides skipTo(), * and scores documents in document number order. */ public Scorer scorer(IndexReader reader) throws IOException { - BooleanScorer2 result = new BooleanScorer2(getSimilarity(searcher)); + BooleanScorer2 result = new BooleanScorer2(similarity); for (int i = 0 ; i < weights.size(); i++) { BooleanClause c = (BooleanClause)clauses.elementAt(i); @@ -319,7 +324,7 @@ public class BooleanQuery extends Query { return useScorer14; } - protected Weight createWeight(Searcher searcher) { + protected Weight createWeight(Searcher searcher) throws IOException { return getUseScorer14() ? (Weight) new BooleanWeight(searcher) : (Weight) new BooleanWeight2(searcher); } @@ -358,6 +363,18 @@ public class BooleanQuery extends Query { return this; // no clauses rewrote } + // inherit javadoc + public void extractTerms(Set terms) { + for (Iterator i = clauses.iterator(); i.hasNext();) { + BooleanClause clause = (BooleanClause) i.next(); + clause.getQuery().extractTerms(terms); + } + } + + // inherit javadoc + public Query combine(Query[] queries) { + return Query.mergeBooleanQueries(queries); + } public Object clone() { BooleanQuery clone = (BooleanQuery)super.clone(); diff --git a/src/java/org/apache/lucene/search/FilteredQuery.java b/src/java/org/apache/lucene/search/FilteredQuery.java index 1e37edf94c4..ba51e19b43b 100644 --- a/src/java/org/apache/lucene/search/FilteredQuery.java +++ b/src/java/org/apache/lucene/search/FilteredQuery.java @@ -19,6 +19,7 @@ package org.apache.lucene.search; import org.apache.lucene.index.IndexReader; import java.io.IOException; import java.util.BitSet; +import java.util.Set; /** @@ -56,8 +57,9 @@ extends Query { * Returns a Weight that applies the filter to the enclosed query's Weight. * This is accomplished by overriding the Scorer returned by the Weight. */ - protected Weight createWeight (final Searcher searcher) { + protected Weight createWeight (final Searcher searcher) throws IOException { final Weight weight = query.createWeight (searcher); + final Similarity similarity = query.getSimilarity(searcher); return new Weight() { // pass these methods through to enclosed query's weight @@ -74,7 +76,7 @@ extends Query { public Scorer scorer (IndexReader indexReader) throws IOException { final Scorer scorer = weight.scorer (indexReader); final BitSet bitset = filter.bits (indexReader); - return new Scorer (query.getSimilarity (searcher)) { + return new Scorer (similarity) { // pass these methods through to the enclosed scorer public boolean next() throws IOException { return scorer.next(); } @@ -116,6 +118,11 @@ extends Query { return query; } + // inherit javadoc + public void extractTerms(Set terms) { + getQuery().extractTerms(terms); + } + /** Prints a user-readable version of this query. */ public String toString (String s) { return "filtered("+query.toString(s)+")->"+filter; diff --git a/src/java/org/apache/lucene/search/Hits.java b/src/java/org/apache/lucene/search/Hits.java index 17e50cb40dc..54c4363cabb 100644 --- a/src/java/org/apache/lucene/search/Hits.java +++ b/src/java/org/apache/lucene/search/Hits.java @@ -23,7 +23,7 @@ import org.apache.lucene.document.Document; /** A ranked list of documents, used to hold search results. */ public final class Hits { - private Query query; + private Weight weight; private Searcher searcher; private Filter filter = null; private Sort sort = null; @@ -37,14 +37,14 @@ public final class Hits { private int maxDocs = 200; // max to cache Hits(Searcher s, Query q, Filter f) throws IOException { - query = q; + weight = q.weight(s); searcher = s; filter = f; getMoreDocs(50); // retrieve 100 initially } Hits(Searcher s, Query q, Filter f, Sort o) throws IOException { - query = q; + weight = q.weight(s); searcher = s; filter = f; sort = o; @@ -61,7 +61,7 @@ public final class Hits { } int n = min * 2; // double # retrieved - TopDocs topDocs = (sort == null) ? searcher.search(query, filter, n) : searcher.search(query, filter, n, sort); + TopDocs topDocs = (sort == null) ? searcher.search(weight, filter, n) : searcher.search(weight, filter, n, sort); length = topDocs.totalHits; ScoreDoc[] scoreDocs = topDocs.scoreDocs; diff --git a/src/java/org/apache/lucene/search/IndexSearcher.java b/src/java/org/apache/lucene/search/IndexSearcher.java index b547ca64758..966bad378cf 100644 --- a/src/java/org/apache/lucene/search/IndexSearcher.java +++ b/src/java/org/apache/lucene/search/IndexSearcher.java @@ -88,11 +88,17 @@ public class IndexSearcher extends Searcher { // inherit javadoc public TopDocs search(Query query, Filter filter, final int nDocs) throws IOException { + return search(query.weight(this), filter, nDocs); + } + + // inherit javadoc + public TopDocs search(Weight weight, Filter filter, final int nDocs) + throws IOException { if (nDocs <= 0) // null might be returned from hq.top() below. throw new IllegalArgumentException("nDocs must be > 0"); - Scorer scorer = query.weight(this).scorer(reader); + Scorer scorer = weight.scorer(reader); if (scorer == null) return new TopDocs(0, new ScoreDoc[0]); @@ -124,7 +130,14 @@ public class IndexSearcher extends Searcher { public TopFieldDocs search(Query query, Filter filter, final int nDocs, Sort sort) throws IOException { - Scorer scorer = query.weight(this).scorer(reader); + return search(query.weight(this), filter, nDocs, sort); + } + + // inherit javadoc + public TopFieldDocs search(Weight weight, Filter filter, final int nDocs, + Sort sort) + throws IOException { + Scorer scorer = weight.scorer(reader); if (scorer == null) return new TopFieldDocs(0, new ScoreDoc[0], sort.fields); @@ -153,6 +166,12 @@ public class IndexSearcher extends Searcher { // inherit javadoc public void search(Query query, Filter filter, final HitCollector results) throws IOException { + search(query.weight(this), filter, results); + } + + // inherit javadoc + public void search(Weight weight, Filter filter, + final HitCollector results) throws IOException { HitCollector collector = results; if (filter != null) { final BitSet bits = filter.bits(reader); @@ -165,7 +184,7 @@ public class IndexSearcher extends Searcher { }; } - Scorer scorer = query.weight(this).scorer(reader); + Scorer scorer = weight.scorer(reader); if (scorer == null) return; scorer.score(collector); @@ -181,7 +200,10 @@ public class IndexSearcher extends Searcher { } public Explanation explain(Query query, int doc) throws IOException { - return query.weight(this).explain(reader, doc); + return explain(query.weight(this), doc); } + public Explanation explain(Weight weight, int doc) throws IOException { + return weight.explain(reader, doc); + } } diff --git a/src/java/org/apache/lucene/search/MultiPhraseQuery.java b/src/java/org/apache/lucene/search/MultiPhraseQuery.java index b21d56d0095..71d74bc6f6e 100644 --- a/src/java/org/apache/lucene/search/MultiPhraseQuery.java +++ b/src/java/org/apache/lucene/search/MultiPhraseQuery.java @@ -107,27 +107,30 @@ public class MultiPhraseQuery extends Query { } private class MultiPhraseWeight implements Weight { - private Searcher searcher; + private Similarity similarity; private float value; private float idf; private float queryNorm; private float queryWeight; - public MultiPhraseWeight(Searcher searcher) { - this.searcher = searcher; + public MultiPhraseWeight(Searcher searcher) + throws IOException { + this.similarity = getSimilarity(searcher); + + // compute idf + Iterator i = termArrays.iterator(); + while (i.hasNext()) { + Term[] terms = (Term[])i.next(); + for (int j=0; jterm. Returns an array with these + * document frequencies. Used to minimize number of remote calls. + */ + int[] docFreqs(Term[] terms) throws IOException; + /** Expert: Returns one greater than the largest possible document number. * Called by search code to compute term weights. * @see IndexReader#maxDoc() @@ -78,9 +93,17 @@ public interface Searchable extends java.rmi.Remote { *

Applications should usually call {@link Searcher#search(Query)} or * {@link Searcher#search(Query,Filter)} instead. * @throws BooleanQuery.TooManyClauses + * + * @deprecated */ TopDocs search(Query query, Filter filter, int n) throws IOException; + /** Expert: Low-level search implementation. + * Identical to {@link #search(Query, Filter, int)}, but takes + * a Weight instead of a query. + */ + TopDocs search(Weight weight, Filter filter, int n) throws IOException; + /** Expert: Returns the stored fields of document i. * Called by {@link HitCollector} implementations. * @see IndexReader#document(int) @@ -103,6 +126,12 @@ public interface Searchable extends java.rmi.Remote { */ Explanation explain(Query query, int doc) throws IOException; + /** + * Identical to {@link #search(Query, Filter, HitCollector)}, but takes + * a Weight instead of a query. + */ + Explanation explain(Weight weight, int doc) throws IOException; + /** Expert: Low-level search implementation with arbitrary sorting. Finds * the top n hits for query, applying * filter if non-null, and sorting the hits by the criteria in @@ -111,7 +140,16 @@ public interface Searchable extends java.rmi.Remote { *

Applications should usually call {@link * Searcher#search(Query,Filter,Sort)} instead. * @throws BooleanQuery.TooManyClauses + * + * @deprecated */ TopFieldDocs search(Query query, Filter filter, int n, Sort sort) throws IOException; + + /** Expert: Low-level search implementation. + * Identical to {@link #search(Query, Filter, int, Sort)}, but takes + * a Weight instead of a query. + */ + TopFieldDocs search(Weight weight, Filter filter, int n, Sort sort) + throws IOException; } diff --git a/src/java/org/apache/lucene/search/Searcher.java b/src/java/org/apache/lucene/search/Searcher.java index ac85481dd01..b16bf1e2506 100644 --- a/src/java/org/apache/lucene/search/Searcher.java +++ b/src/java/org/apache/lucene/search/Searcher.java @@ -18,6 +18,8 @@ package org.apache.lucene.search; import java.io.IOException; +import org.apache.lucene.index.Term; + /** An abstract base class for search implementations. * Implements some common utility methods. */ @@ -93,4 +95,15 @@ public abstract class Searcher implements Searchable { public Similarity getSimilarity() { return this.similarity; } + + + // inherit javadoc + public int[] docFreqs(Term[] terms) throws IOException { + int[] result = new int[terms.length]; + for (int i = 0; i < terms.length; i++) { + result[i] = docFreq(terms[i]); + } + return result; + } + } diff --git a/src/java/org/apache/lucene/search/Similarity.java b/src/java/org/apache/lucene/search/Similarity.java index 10751b4b0cc..f3e8af09d34 100644 --- a/src/java/org/apache/lucene/search/Similarity.java +++ b/src/java/org/apache/lucene/search/Similarity.java @@ -17,6 +17,7 @@ package org.apache.lucene.search; */ import java.io.IOException; +import java.io.Serializable; import java.util.Collection; import java.util.Iterator; @@ -84,7 +85,7 @@ import org.apache.lucene.document.Field; // for javadoc * @see IndexWriter#setSimilarity(Similarity) * @see Searcher#setSimilarity(Similarity) */ -public abstract class Similarity { +public abstract class Similarity implements Serializable { /** The Similarity implementation used by default. */ private static Similarity defaultImpl = new DefaultSimilarity(); diff --git a/src/java/org/apache/lucene/search/TermQuery.java b/src/java/org/apache/lucene/search/TermQuery.java index 6ff9b2994a7..12cb998c7cf 100644 --- a/src/java/org/apache/lucene/search/TermQuery.java +++ b/src/java/org/apache/lucene/search/TermQuery.java @@ -17,6 +17,8 @@ package org.apache.lucene.search; */ import java.io.IOException; +import java.util.Set; + import org.apache.lucene.index.Term; import org.apache.lucene.index.TermDocs; import org.apache.lucene.index.IndexReader; @@ -28,14 +30,17 @@ public class TermQuery extends Query { private Term term; private class TermWeight implements Weight { + private Similarity similarity; private Searcher searcher; private float value; private float idf; private float queryNorm; private float queryWeight; - public TermWeight(Searcher searcher) { - this.searcher = searcher; + public TermWeight(Searcher searcher) + throws IOException { + this.similarity = getSimilarity(searcher); + idf = similarity.idf(term, searcher); // compute idf } public String toString() { return "weight(" + TermQuery.this + ")"; } @@ -44,7 +49,6 @@ public class TermQuery extends Query { public float getValue() { return value; } public float sumOfSquaredWeights() throws IOException { - idf = getSimilarity(searcher).idf(term, searcher); // compute idf queryWeight = idf * getBoost(); // compute query weight return queryWeight * queryWeight; // square it } @@ -52,16 +56,16 @@ public class TermQuery extends Query { public void normalize(float queryNorm) { this.queryNorm = queryNorm; queryWeight *= queryNorm; // normalize query weight - value = queryWeight * idf; // idf for document + value = queryWeight * idf; // idf for document } public Scorer scorer(IndexReader reader) throws IOException { TermDocs termDocs = reader.termDocs(term); - + if (termDocs == null) return null; - - return new TermScorer(this, termDocs, getSimilarity(searcher), + + return new TermScorer(this, termDocs, similarity, reader.norms(term.field())); } @@ -72,7 +76,7 @@ public class TermQuery extends Query { result.setDescription("weight("+getQuery()+" in "+doc+"), product of:"); Explanation idfExpl = - new Explanation(idf, "idf(docFreq=" + searcher.docFreq(term) + ")"); + new Explanation(idf, "idf(docFreq=" + reader.docFreq(term) + ")"); // explain query weight Explanation queryExpl = new Explanation(); @@ -82,16 +86,16 @@ public class TermQuery extends Query { if (getBoost() != 1.0f) queryExpl.addDetail(boostExpl); queryExpl.addDetail(idfExpl); - + Explanation queryNormExpl = new Explanation(queryNorm,"queryNorm"); queryExpl.addDetail(queryNormExpl); - + queryExpl.setValue(boostExpl.getValue() * idfExpl.getValue() * queryNormExpl.getValue()); result.addDetail(queryExpl); - + // explain field weight String field = term.field(); Explanation fieldExpl = new Explanation(); @@ -113,7 +117,7 @@ public class TermQuery extends Query { fieldExpl.setValue(tfExpl.getValue() * idfExpl.getValue() * fieldNormExpl.getValue()); - + result.addDetail(fieldExpl); // combine them @@ -134,10 +138,14 @@ public class TermQuery extends Query { /** Returns the term of this query. */ public Term getTerm() { return term; } - protected Weight createWeight(Searcher searcher) { + protected Weight createWeight(Searcher searcher) throws IOException { return new TermWeight(searcher); } + public void extractTerms(Set terms) { + terms.add(getTerm()); + } + /** Prints a user-readable version of this query. */ public String toString(String field) { StringBuffer buffer = new StringBuffer(); diff --git a/src/java/org/apache/lucene/search/spans/SpanQuery.java b/src/java/org/apache/lucene/search/spans/SpanQuery.java index 511317eb574..527f6bdc781 100644 --- a/src/java/org/apache/lucene/search/spans/SpanQuery.java +++ b/src/java/org/apache/lucene/search/spans/SpanQuery.java @@ -37,7 +37,7 @@ public abstract class SpanQuery extends Query { /** Returns a collection of all terms matched by this query.*/ public abstract Collection getTerms(); - protected Weight createWeight(Searcher searcher) { + protected Weight createWeight(Searcher searcher) throws IOException { return new SpanWeight(this, searcher); } diff --git a/src/java/org/apache/lucene/search/spans/SpanWeight.java b/src/java/org/apache/lucene/search/spans/SpanWeight.java index 2a6aba75dc6..c81190c9066 100644 --- a/src/java/org/apache/lucene/search/spans/SpanWeight.java +++ b/src/java/org/apache/lucene/search/spans/SpanWeight.java @@ -32,7 +32,7 @@ import org.apache.lucene.search.Explanation; import org.apache.lucene.search.Similarity; class SpanWeight implements Weight { - private Searcher searcher; + private Similarity similarity; private float value; private float idf; private float queryNorm; @@ -41,17 +41,19 @@ class SpanWeight implements Weight { private Collection terms; private SpanQuery query; - public SpanWeight(SpanQuery query, Searcher searcher) { - this.searcher = searcher; + public SpanWeight(SpanQuery query, Searcher searcher) + throws IOException { + this.similarity = query.getSimilarity(searcher); this.query = query; this.terms = query.getTerms(); + + idf = this.query.getSimilarity(searcher).idf(terms, searcher); } public Query getQuery() { return query; } public float getValue() { return value; } public float sumOfSquaredWeights() throws IOException { - idf = this.query.getSimilarity(searcher).idf(terms, searcher); queryWeight = idf * query.getBoost(); // compute query weight return queryWeight * queryWeight; // square it } @@ -64,7 +66,7 @@ class SpanWeight implements Weight { public Scorer scorer(IndexReader reader) throws IOException { return new SpanScorer(query.getSpans(reader), this, - query.getSimilarity(searcher), + similarity, reader.norms(query.getField())); } @@ -81,7 +83,7 @@ class SpanWeight implements Weight { Term term = (Term)i.next(); docFreqs.append(term.text()); docFreqs.append("="); - docFreqs.append(searcher.docFreq(term)); + docFreqs.append(reader.docFreq(term)); if (i.hasNext()) { docFreqs.append(" ");