- Applied Chuck's and Wolf's patch for bug 31841

http://issues.apache.org/bugzilla/show_bug.cgi?id=31841


git-svn-id: https://svn.apache.org/repos/asf/lucene/java/trunk@162008 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Otis Gospodnetic 2005-04-20 03:28:32 +00:00
parent bc1a7b0f6f
commit e4bd715106
18 changed files with 422 additions and 102 deletions

View File

@ -21,7 +21,13 @@ Changes in runtime behavior
and WildcardQueries before). Use setLowercaseExpandedTerms(false)
to disable that behavior but note that this also affects
PrefixQueries and WildcardQueries. (Daniel Naber)
4. Document frequency that is computed when MultiSearcher is used is now
computed correctly and "globally" across subsearchers and indices, while
before it used to be computed locally to each index, which caused
ranking across multiple indices not to be equivalent.
(Chuck Williams, Wolf Siberski via Otis)
New features
1. Added support for stored compressed fields (patch #31149)

View File

@ -17,6 +17,8 @@ package org.apache.lucene.search;
*/
import java.io.IOException;
import java.util.Iterator;
import java.util.Set;
import java.util.Vector;
import java.util.Arrays;
@ -153,16 +155,16 @@ public class BooleanQuery extends Query {
}
private class BooleanWeight implements Weight {
protected Searcher searcher;
protected Similarity similarity;
protected Vector weights = new Vector();
public BooleanWeight(Searcher searcher) {
this.searcher = searcher;
public BooleanWeight(Searcher searcher)
throws IOException {
this.similarity = getSimilarity(searcher);
for (int i = 0 ; i < clauses.size(); i++) {
BooleanClause c = (BooleanClause)clauses.elementAt(i);
weights.add(c.getQuery().createWeight(searcher));
}
//System.out.println("Creating " + getClass().getName());
}
public Query getQuery() { return BooleanQuery.this; }
@ -213,7 +215,7 @@ public class BooleanQuery extends Query {
if (allRequired && noneBoolean) { // ConjunctionScorer is okay
ConjunctionScorer result =
new ConjunctionScorer(getSimilarity(searcher));
new ConjunctionScorer(similarity);
for (int i = 0 ; i < weights.size(); i++) {
Weight w = (Weight)weights.elementAt(i);
Scorer subScorer = w.scorer(reader);
@ -225,7 +227,7 @@ public class BooleanQuery extends Query {
}
// Use good-old BooleanScorer instead.
BooleanScorer result = new BooleanScorer(getSimilarity(searcher));
BooleanScorer result = new BooleanScorer(similarity);
for (int i = 0 ; i < weights.size(); i++) {
BooleanClause c = (BooleanClause)clauses.elementAt(i);
@ -269,7 +271,7 @@ public class BooleanQuery extends Query {
if (coord == 1) // only one clause matched
sumExpl = sumExpl.getDetails()[0]; // eliminate wrapper
float coordFactor = getSimilarity(searcher).coord(coord, maxCoord);
float coordFactor = similarity.coord(coord, maxCoord);
if (coordFactor == 1.0f) // coord is no-op
return sumExpl; // eliminate wrapper
else {
@ -286,13 +288,16 @@ public class BooleanQuery extends Query {
private class BooleanWeight2 extends BooleanWeight {
/* Merge into BooleanWeight in case the 1.4 BooleanScorer is dropped */
public BooleanWeight2(Searcher searcher) { super(searcher); }
public BooleanWeight2(Searcher searcher)
throws IOException {
super(searcher);
}
/** @return An alternative Scorer that uses and provides skipTo(),
* and scores documents in document number order.
*/
public Scorer scorer(IndexReader reader) throws IOException {
BooleanScorer2 result = new BooleanScorer2(getSimilarity(searcher));
BooleanScorer2 result = new BooleanScorer2(similarity);
for (int i = 0 ; i < weights.size(); i++) {
BooleanClause c = (BooleanClause)clauses.elementAt(i);
@ -319,7 +324,7 @@ public class BooleanQuery extends Query {
return useScorer14;
}
protected Weight createWeight(Searcher searcher) {
protected Weight createWeight(Searcher searcher) throws IOException {
return getUseScorer14() ? (Weight) new BooleanWeight(searcher)
: (Weight) new BooleanWeight2(searcher);
}
@ -358,6 +363,18 @@ public class BooleanQuery extends Query {
return this; // no clauses rewrote
}
// inherit javadoc
public void extractTerms(Set terms) {
for (Iterator i = clauses.iterator(); i.hasNext();) {
BooleanClause clause = (BooleanClause) i.next();
clause.getQuery().extractTerms(terms);
}
}
// inherit javadoc
public Query combine(Query[] queries) {
return Query.mergeBooleanQueries(queries);
}
public Object clone() {
BooleanQuery clone = (BooleanQuery)super.clone();

View File

@ -19,6 +19,7 @@ package org.apache.lucene.search;
import org.apache.lucene.index.IndexReader;
import java.io.IOException;
import java.util.BitSet;
import java.util.Set;
/**
@ -56,8 +57,9 @@ extends Query {
* Returns a Weight that applies the filter to the enclosed query's Weight.
* This is accomplished by overriding the Scorer returned by the Weight.
*/
protected Weight createWeight (final Searcher searcher) {
protected Weight createWeight (final Searcher searcher) throws IOException {
final Weight weight = query.createWeight (searcher);
final Similarity similarity = query.getSimilarity(searcher);
return new Weight() {
// pass these methods through to enclosed query's weight
@ -74,7 +76,7 @@ extends Query {
public Scorer scorer (IndexReader indexReader) throws IOException {
final Scorer scorer = weight.scorer (indexReader);
final BitSet bitset = filter.bits (indexReader);
return new Scorer (query.getSimilarity (searcher)) {
return new Scorer (similarity) {
// pass these methods through to the enclosed scorer
public boolean next() throws IOException { return scorer.next(); }
@ -116,6 +118,11 @@ extends Query {
return query;
}
// inherit javadoc
public void extractTerms(Set terms) {
getQuery().extractTerms(terms);
}
/** Prints a user-readable version of this query. */
public String toString (String s) {
return "filtered("+query.toString(s)+")->"+filter;

View File

@ -23,7 +23,7 @@ import org.apache.lucene.document.Document;
/** A ranked list of documents, used to hold search results. */
public final class Hits {
private Query query;
private Weight weight;
private Searcher searcher;
private Filter filter = null;
private Sort sort = null;
@ -37,14 +37,14 @@ public final class Hits {
private int maxDocs = 200; // max to cache
Hits(Searcher s, Query q, Filter f) throws IOException {
query = q;
weight = q.weight(s);
searcher = s;
filter = f;
getMoreDocs(50); // retrieve 100 initially
}
Hits(Searcher s, Query q, Filter f, Sort o) throws IOException {
query = q;
weight = q.weight(s);
searcher = s;
filter = f;
sort = o;
@ -61,7 +61,7 @@ public final class Hits {
}
int n = min * 2; // double # retrieved
TopDocs topDocs = (sort == null) ? searcher.search(query, filter, n) : searcher.search(query, filter, n, sort);
TopDocs topDocs = (sort == null) ? searcher.search(weight, filter, n) : searcher.search(weight, filter, n, sort);
length = topDocs.totalHits;
ScoreDoc[] scoreDocs = topDocs.scoreDocs;

View File

@ -88,11 +88,17 @@ public class IndexSearcher extends Searcher {
// inherit javadoc
public TopDocs search(Query query, Filter filter, final int nDocs)
throws IOException {
return search(query.weight(this), filter, nDocs);
}
// inherit javadoc
public TopDocs search(Weight weight, Filter filter, final int nDocs)
throws IOException {
if (nDocs <= 0) // null might be returned from hq.top() below.
throw new IllegalArgumentException("nDocs must be > 0");
Scorer scorer = query.weight(this).scorer(reader);
Scorer scorer = weight.scorer(reader);
if (scorer == null)
return new TopDocs(0, new ScoreDoc[0]);
@ -124,7 +130,14 @@ public class IndexSearcher extends Searcher {
public TopFieldDocs search(Query query, Filter filter, final int nDocs,
Sort sort)
throws IOException {
Scorer scorer = query.weight(this).scorer(reader);
return search(query.weight(this), filter, nDocs, sort);
}
// inherit javadoc
public TopFieldDocs search(Weight weight, Filter filter, final int nDocs,
Sort sort)
throws IOException {
Scorer scorer = weight.scorer(reader);
if (scorer == null)
return new TopFieldDocs(0, new ScoreDoc[0], sort.fields);
@ -153,6 +166,12 @@ public class IndexSearcher extends Searcher {
// inherit javadoc
public void search(Query query, Filter filter,
final HitCollector results) throws IOException {
search(query.weight(this), filter, results);
}
// inherit javadoc
public void search(Weight weight, Filter filter,
final HitCollector results) throws IOException {
HitCollector collector = results;
if (filter != null) {
final BitSet bits = filter.bits(reader);
@ -165,7 +184,7 @@ public class IndexSearcher extends Searcher {
};
}
Scorer scorer = query.weight(this).scorer(reader);
Scorer scorer = weight.scorer(reader);
if (scorer == null)
return;
scorer.score(collector);
@ -181,7 +200,10 @@ public class IndexSearcher extends Searcher {
}
public Explanation explain(Query query, int doc) throws IOException {
return query.weight(this).explain(reader, doc);
return explain(query.weight(this), doc);
}
public Explanation explain(Weight weight, int doc) throws IOException {
return weight.explain(reader, doc);
}
}

View File

@ -107,27 +107,30 @@ public class MultiPhraseQuery extends Query {
}
private class MultiPhraseWeight implements Weight {
private Searcher searcher;
private Similarity similarity;
private float value;
private float idf;
private float queryNorm;
private float queryWeight;
public MultiPhraseWeight(Searcher searcher) {
this.searcher = searcher;
public MultiPhraseWeight(Searcher searcher)
throws IOException {
this.similarity = getSimilarity(searcher);
// compute idf
Iterator i = termArrays.iterator();
while (i.hasNext()) {
Term[] terms = (Term[])i.next();
for (int j=0; j<terms.length; j++) {
idf += getSimilarity(searcher).idf(terms[j], searcher);
}
}
}
public Query getQuery() { return MultiPhraseQuery.this; }
public float getValue() { return value; }
public float sumOfSquaredWeights() throws IOException {
Iterator i = termArrays.iterator();
while (i.hasNext()) {
Term[] terms = (Term[])i.next();
for (int j=0; j<terms.length; j++)
idf += getSimilarity(searcher).idf(terms[j], searcher);
}
queryWeight = idf * getBoost(); // compute query weight
return queryWeight * queryWeight; // square it
}
@ -159,10 +162,10 @@ public class MultiPhraseQuery extends Query {
}
if (slop == 0)
return new ExactPhraseScorer(this, tps, getPositions(), getSimilarity(searcher),
return new ExactPhraseScorer(this, tps, getPositions(), similarity,
reader.norms(field));
else
return new SloppyPhraseScorer(this, tps, getPositions(), getSimilarity(searcher),
return new SloppyPhraseScorer(this, tps, getPositions(), similarity,
slop, reader.norms(field));
}
@ -239,7 +242,7 @@ public class MultiPhraseQuery extends Query {
}
}
protected Weight createWeight(Searcher searcher) {
protected Weight createWeight(Searcher searcher) throws IOException {
return new MultiPhraseWeight(searcher);
}

View File

@ -17,6 +17,10 @@ package org.apache.lucene.search;
*/
import java.io.IOException;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Map;
import java.util.Set;
import org.apache.lucene.document.Document;
import org.apache.lucene.index.Term;
@ -27,6 +31,93 @@ import org.apache.lucene.index.Term;
* or {@link #search(Query,Filter)} methods.
*/
public class MultiSearcher extends Searcher {
/**
* Document Frequency cache acting as a Dummy-Searcher.
* This class is no full-fledged Searcher, but only supports
* the methods necessary to initialize Weights.
*/
private static class CachedDfSource extends Searcher {
private Map dfMap; // Map from Terms to corresponding doc freqs
private int maxDoc; // document count
public CachedDfSource(Map dfMap, int maxDoc) {
this.dfMap = dfMap;
this.maxDoc = maxDoc;
}
public int docFreq(Term term) {
int df;
try {
df = ((Integer) dfMap.get(term)).intValue();
} catch (NullPointerException e) {
throw new IllegalArgumentException("df for term " + term.text()
+ " not available");
}
return df;
}
public int[] docFreqs(Term[] terms) throws IOException {
int[] result = new int[terms.length];
for (int i = 0; i < terms.length; i++) {
result[i] = docFreq(terms[i]);
}
return result;
}
public int maxDoc() {
return maxDoc;
}
public Query rewrite(Query query) throws IOException {
// this is a bit of a hack. We know that a query which
// creates a Weight based on this Dummy-Searcher is
// always already rewritten (see preparedWeight()).
// Therefore we just return the unmodified query here
return query;
}
public void close() throws IOException {
throw new UnsupportedOperationException();
}
public Document doc(int i) throws IOException{
throw new UnsupportedOperationException();
}
public Explanation explain(Query query,int doc) throws IOException{
throw new UnsupportedOperationException();
}
public Explanation explain(Weight weight,int doc) throws IOException {
throw new UnsupportedOperationException();
}
public void search(Query query, Filter filter, HitCollector results) throws IOException {
throw new UnsupportedOperationException();
}
public void search(Weight weight, Filter filter, HitCollector results) throws IOException {
throw new UnsupportedOperationException();
}
public TopDocs search(Query query,Filter filter,int n) throws IOException {
throw new UnsupportedOperationException();
}
public TopDocs search(Weight weight,Filter filter,int n) throws IOException {
throw new UnsupportedOperationException();
}
public TopFieldDocs search(Query query,Filter filter,int n,Sort sort) throws IOException {
throw new UnsupportedOperationException();
}
public TopFieldDocs search(Weight weight,Filter filter,int n,Sort sort) throws IOException {
throw new UnsupportedOperationException();
}
};
private Searchable[] searchables;
private int[] starts;
private int maxDoc = 0;
@ -114,11 +205,18 @@ public class MultiSearcher extends Searcher {
public TopDocs search(Query query, Filter filter, int nDocs)
throws IOException {
Weight weight = prepareWeight(query);
return search(weight, filter, nDocs);
}
public TopDocs search(Weight weight, Filter filter, int nDocs)
throws IOException {
HitQueue hq = new HitQueue(nDocs);
int totalHits = 0;
for (int i = 0; i < searchables.length; i++) { // search each searcher
TopDocs docs = searchables[i].search(query, filter, nDocs);
TopDocs docs = searchables[i].search(weight, filter, nDocs);
totalHits += docs.totalHits; // update totalHits
ScoreDoc[] scoreDocs = docs.scoreDocs;
for (int j = 0; j < scoreDocs.length; j++) { // merge scoreDocs into hq
@ -139,11 +237,17 @@ public class MultiSearcher extends Searcher {
public TopFieldDocs search (Query query, Filter filter, int n, Sort sort)
throws IOException {
Weight weight = prepareWeight(query);
return search(weight, filter, n, sort);
}
public TopFieldDocs search (Weight weight, Filter filter, int n, Sort sort)
throws IOException {
FieldDocSortedHitQueue hq = null;
int totalHits = 0;
for (int i = 0; i < searchables.length; i++) { // search each searcher
TopFieldDocs docs = searchables[i].search (query, filter, n, sort);
TopFieldDocs docs = searchables[i].search (weight, filter, n, sort);
if (hq == null) hq = new FieldDocSortedHitQueue (docs.fields, n);
totalHits += docs.totalHits; // update totalHits
ScoreDoc[] scoreDocs = docs.scoreDocs;
@ -165,12 +269,19 @@ public class MultiSearcher extends Searcher {
// inherit javadoc
public void search(Query query, Filter filter, final HitCollector results)
throws IOException {
Weight weight = prepareWeight(query);
search(weight, filter, results);
}
// inherit javadoc
public void search(Weight weight, Filter filter, final HitCollector results)
throws IOException {
for (int i = 0; i < searchables.length; i++) {
final int start = starts[i];
searchables[i].search(query, filter, new HitCollector() {
searchables[i].search(weight, filter, new HitCollector() {
public void collect(int doc, float score) {
results.collect(doc + start, score);
}
@ -184,12 +295,62 @@ public class MultiSearcher extends Searcher {
for (int i = 0; i < searchables.length; i++) {
queries[i] = searchables[i].rewrite(original);
}
return original.combine(queries);
return queries[0].combine(queries);
}
public Explanation explain(Query query, int doc) throws IOException {
Weight weight = prepareWeight(query);
return explain(weight, doc);
}
public Explanation explain(Weight weight, int doc) throws IOException {
int i = subSearcher(doc); // find searcher index
return searchables[i].explain(query,doc-starts[i]); // dispatch to searcher
return searchables[i].explain(weight,doc-starts[i]); // dispatch to searcher
}
/**
* Distributed query processing is done in the following steps:
* 1. rewrite query
* 2. extract necessary terms
* 3. collect dfs for these terms from the Searchables
* 4. create query weight using aggregate dfs.
* 5. distribute that weight to Searchables
* 6. merge results
*
* Steps 1-4 are done here, 5+6 in the search() methods
*
* @return rewritten queries
*/
private Weight prepareWeight(Query original) throws IOException {
// step 1
Query rewrittenQuery = rewrite(original);
// step 2
Set terms = new HashSet();
rewrittenQuery.extractTerms(terms);
// step3
Term[] allTermsArray = new Term[terms.size()];
terms.toArray(allTermsArray);
int[] aggregatedDfs = new int[terms.size()];
for (int i = 0; i < searchables.length; i++) {
int[] dfs = searchables[i].docFreqs(allTermsArray);
for(int j=0; j<aggregatedDfs.length; j++){
aggregatedDfs[j] += dfs[j];
}
}
HashMap dfMap = new HashMap();
for(int i=0; i<allTermsArray.length; i++) {
dfMap.put(allTermsArray[i], new Integer(aggregatedDfs[i]));
}
// step4
int numDocs = maxDoc();
CachedDfSource cacheSim = new CachedDfSource(dfMap, numDocs);
return rewrittenQuery.weight(cacheSim);
}
}

View File

@ -42,10 +42,7 @@ public class ParallelMultiSearcher extends MultiSearcher {
* TODO: parallelize this one too
*/
public int docFreq(Term term) throws IOException {
int docFreq = 0;
for (int i = 0; i < searchables.length; i++)
docFreq += searchables[i].docFreq(term);
return docFreq;
return super.docFreq(term);
}
/**
@ -53,7 +50,7 @@ public class ParallelMultiSearcher extends MultiSearcher {
* Searchable, waits for each search to complete and merge
* the results back together.
*/
public TopDocs search(Query query, Filter filter, int nDocs)
public TopDocs search(Weight weight, Filter filter, int nDocs)
throws IOException {
HitQueue hq = new HitQueue(nDocs);
int totalHits = 0;
@ -64,7 +61,7 @@ public class ParallelMultiSearcher extends MultiSearcher {
msta[i] =
new MultiSearcherThread(
searchables[i],
query,
weight,
filter,
nDocs,
hq,
@ -101,7 +98,7 @@ public class ParallelMultiSearcher extends MultiSearcher {
* Searchable, waits for each search to complete and merges
* the results back together.
*/
public TopFieldDocs search(Query query, Filter filter, int nDocs, Sort sort)
public TopFieldDocs search(Weight weight, Filter filter, int nDocs, Sort sort)
throws IOException {
// don't specify the fields - we'll wait to do this until we get results
FieldDocSortedHitQueue hq = new FieldDocSortedHitQueue (null, nDocs);
@ -112,7 +109,7 @@ public class ParallelMultiSearcher extends MultiSearcher {
msta[i] =
new MultiSearcherThread(
searchables[i],
query,
weight,
filter,
nDocs,
hq,
@ -181,11 +178,7 @@ public class ParallelMultiSearcher extends MultiSearcher {
* @see org.apache.lucene.search.Searchable#rewrite(org.apache.lucene.search.Query)
*/
public Query rewrite(Query original) throws IOException {
Query[] queries = new Query[searchables.length];
for (int i = 0; i < searchables.length; i++) {
queries[i] = searchables[i].rewrite(original);
}
return original.combine(queries);
return super.rewrite(original);
}
}
@ -196,7 +189,7 @@ public class ParallelMultiSearcher extends MultiSearcher {
class MultiSearcherThread extends Thread {
private Searchable searchable;
private Query query;
private Weight weight;
private Filter filter;
private int nDocs;
private TopDocs docs;
@ -208,7 +201,7 @@ class MultiSearcherThread extends Thread {
public MultiSearcherThread(
Searchable searchable,
Query query,
Weight weight,
Filter filter,
int nDocs,
HitQueue hq,
@ -217,7 +210,7 @@ class MultiSearcherThread extends Thread {
String name) {
super(name);
this.searchable = searchable;
this.query = query;
this.weight = weight;
this.filter = filter;
this.nDocs = nDocs;
this.hq = hq;
@ -227,7 +220,7 @@ class MultiSearcherThread extends Thread {
public MultiSearcherThread(
Searchable searchable,
Query query,
Weight weight,
Filter filter,
int nDocs,
FieldDocSortedHitQueue hq,
@ -237,7 +230,7 @@ class MultiSearcherThread extends Thread {
String name) {
super(name);
this.searchable = searchable;
this.query = query;
this.weight = weight;
this.filter = filter;
this.nDocs = nDocs;
this.hq = hq;
@ -248,8 +241,8 @@ class MultiSearcherThread extends Thread {
public void run() {
try {
docs = (sort == null) ? searchable.search (query, filter, nDocs)
: searchable.search (query, filter, nDocs, sort);
docs = (sort == null) ? searchable.search (weight, filter, nDocs)
: searchable.search (weight, filter, nDocs, sort);
}
// Store the IOException for later use by the caller of this thread
catch (IOException ioe) {

View File

@ -108,27 +108,30 @@ public class PhrasePrefixQuery extends Query {
}
private class PhrasePrefixWeight implements Weight {
private Searcher searcher;
private Similarity similarity;
private float value;
private float idf;
private float queryNorm;
private float queryWeight;
public PhrasePrefixWeight(Searcher searcher) {
this.searcher = searcher;
public PhrasePrefixWeight(Searcher searcher)
throws IOException {
this.similarity = getSimilarity(searcher);
// compute idf
Iterator i = termArrays.iterator();
while (i.hasNext()) {
Term[] terms = (Term[])i.next();
for (int j=0; j<terms.length; j++) {
idf += getSimilarity(searcher).idf(terms[j], searcher);
}
}
}
public Query getQuery() { return PhrasePrefixQuery.this; }
public float getValue() { return value; }
public float sumOfSquaredWeights() throws IOException {
Iterator i = termArrays.iterator();
while (i.hasNext()) {
Term[] terms = (Term[])i.next();
for (int j=0; j<terms.length; j++)
idf += getSimilarity(searcher).idf(terms[j], searcher);
}
queryWeight = idf * getBoost(); // compute query weight
return queryWeight * queryWeight; // square it
}
@ -160,10 +163,10 @@ public class PhrasePrefixQuery extends Query {
}
if (slop == 0)
return new ExactPhraseScorer(this, tps, getPositions(), getSimilarity(searcher),
return new ExactPhraseScorer(this, tps, getPositions(), similarity,
reader.norms(field));
else
return new SloppyPhraseScorer(this, tps, getPositions(), getSimilarity(searcher),
return new SloppyPhraseScorer(this, tps, getPositions(), similarity,
slop, reader.norms(field));
}
@ -226,7 +229,7 @@ public class PhrasePrefixQuery extends Query {
}
}
protected Weight createWeight(Searcher searcher) {
protected Weight createWeight(Searcher searcher) throws IOException {
if (termArrays.size() == 1) { // optimize one-term case
Term[] terms = (Term[])termArrays.get(0);
BooleanQuery boq = new BooleanQuery(true);

View File

@ -17,6 +17,7 @@ package org.apache.lucene.search;
*/
import java.io.IOException;
import java.util.Set;
import java.util.Vector;
import org.apache.lucene.index.Term;
@ -100,14 +101,17 @@ public class PhraseQuery extends Query {
}
private class PhraseWeight implements Weight {
private Searcher searcher;
private Similarity similarity;
private float value;
private float idf;
private float queryNorm;
private float queryWeight;
public PhraseWeight(Searcher searcher) {
this.searcher = searcher;
public PhraseWeight(Searcher searcher)
throws IOException {
this.similarity = getSimilarity(searcher);
idf = similarity.idf(terms, searcher);
}
public String toString() { return "weight(" + PhraseQuery.this + ")"; }
@ -116,7 +120,6 @@ public class PhraseQuery extends Query {
public float getValue() { return value; }
public float sumOfSquaredWeights() throws IOException {
idf = getSimilarity(searcher).idf(terms, searcher);
queryWeight = idf * getBoost(); // compute query weight
return queryWeight * queryWeight; // square it
}
@ -140,11 +143,11 @@ public class PhraseQuery extends Query {
}
if (slop == 0) // optimize exact case
return new ExactPhraseScorer(this, tps, getPositions(), getSimilarity(searcher),
return new ExactPhraseScorer(this, tps, getPositions(), similarity,
reader.norms(field));
else
return
new SloppyPhraseScorer(this, tps, getPositions(), getSimilarity(searcher), slop,
new SloppyPhraseScorer(this, tps, getPositions(), similarity, slop,
reader.norms(field));
}
@ -168,7 +171,7 @@ public class PhraseQuery extends Query {
docFreqs.append(term.text());
docFreqs.append("=");
docFreqs.append(searcher.docFreq(term));
docFreqs.append(reader.docFreq(term));
query.append(term.text());
}
@ -228,7 +231,7 @@ public class PhraseQuery extends Query {
}
}
protected Weight createWeight(Searcher searcher) {
protected Weight createWeight(Searcher searcher) throws IOException {
if (terms.size() == 1) { // optimize one-term case
Term term = (Term)terms.elementAt(0);
Query termQuery = new TermQuery(term);
@ -238,6 +241,12 @@ public class PhraseQuery extends Query {
return new PhraseWeight(searcher);
}
/**
* @see org.apache.lucene.search.Query#extractTerms(java.util.Set)
*/
public void extractTerms(Set queryTerms) {
queryTerms.addAll(terms);
}
/** Prints a user-readable version of this query. */
public String toString(String f) {

View File

@ -20,6 +20,7 @@ import java.io.IOException;
import java.util.HashSet;
import java.util.Iterator;
import java.util.Set;
import org.apache.lucene.index.IndexReader;
@ -81,7 +82,7 @@ public abstract class Query implements java.io.Serializable, Cloneable {
*
* <p>Only implemented by primitive queries, which re-write to themselves.
*/
protected Weight createWeight(Searcher searcher) {
protected Weight createWeight(Searcher searcher) throws IOException {
throw new UnsupportedOperationException();
}
@ -107,6 +108,19 @@ public abstract class Query implements java.io.Serializable, Cloneable {
* {@link #createWeight(Searcher)} implementatation.
*/
public Query combine(Query[] queries) {
for (int i = 0; i < queries.length; i++) {
if (!this.equals(queries[i])) {
throw new IllegalArgumentException();
}
}
return this;
}
/**
* Expert: adds all terms occuring in this query to the terms set
*/
public void extractTerms(Set terms) {
// needs to be implemented by query subclasses
throw new UnsupportedOperationException();
}

View File

@ -44,6 +44,11 @@ public class RemoteSearchable
local.search(query, filter, results);
}
public void search(Weight weight, Filter filter, HitCollector results)
throws IOException {
local.search(weight, filter, results);
}
public void close() throws IOException {
local.close();
}
@ -52,6 +57,11 @@ public class RemoteSearchable
return local.docFreq(term);
}
public int[] docFreqs(Term[] terms) throws IOException {
return local.docFreqs(terms);
}
public int maxDoc() throws IOException {
return local.maxDoc();
}
@ -60,11 +70,20 @@ public class RemoteSearchable
return local.search(query, filter, n);
}
public TopDocs search(Weight weight, Filter filter, int n) throws IOException {
return local.search(weight, filter, n);
}
public TopFieldDocs search (Query query, Filter filter, int n, Sort sort)
throws IOException {
return local.search (query, filter, n, sort);
}
public TopFieldDocs search (Weight weight, Filter filter, int n, Sort sort)
throws IOException {
return local.search (weight, filter, n, sort);
}
public Document doc(int i) throws IOException {
return local.doc(i);
}
@ -77,6 +96,10 @@ public class RemoteSearchable
return local.explain(query, doc);
}
public Explanation explain(Weight weight, int doc) throws IOException {
return local.explain(weight, doc);
}
/** Exports a searcher for the index in args[0] named
* "//localhost/Searchable". */
public static void main(String args[]) throws Exception {

View File

@ -48,10 +48,19 @@ public interface Searchable extends java.rmi.Remote {
* @param filter if non-null, a bitset used to eliminate some documents
* @param results to receive hits
* @throws BooleanQuery.TooManyClauses
*
* @deprecated
*/
void search(Query query, Filter filter, HitCollector results)
throws IOException;
/** Expert: Low-level search implementation.
* Identical to {@link #search(Query, Filter, HitCollector)}, but takes
* a Weight instead of a query.
*/
void search(Weight weight, Filter filter, HitCollector results)
throws IOException;
/** Frees resources associated with this Searcher.
* Be careful not to call this method while you are still using objects
* like {@link Hits}.
@ -64,6 +73,12 @@ public interface Searchable extends java.rmi.Remote {
*/
int docFreq(Term term) throws IOException;
/** Expert: For each term in the terms array, calculates the number of
* documents containing <code>term</code>. Returns an array with these
* document frequencies. Used to minimize number of remote calls.
*/
int[] docFreqs(Term[] terms) throws IOException;
/** Expert: Returns one greater than the largest possible document number.
* Called by search code to compute term weights.
* @see IndexReader#maxDoc()
@ -78,9 +93,17 @@ public interface Searchable extends java.rmi.Remote {
* <p>Applications should usually call {@link Searcher#search(Query)} or
* {@link Searcher#search(Query,Filter)} instead.
* @throws BooleanQuery.TooManyClauses
*
* @deprecated
*/
TopDocs search(Query query, Filter filter, int n) throws IOException;
/** Expert: Low-level search implementation.
* Identical to {@link #search(Query, Filter, int)}, but takes
* a Weight instead of a query.
*/
TopDocs search(Weight weight, Filter filter, int n) throws IOException;
/** Expert: Returns the stored fields of document <code>i</code>.
* Called by {@link HitCollector} implementations.
* @see IndexReader#document(int)
@ -103,6 +126,12 @@ public interface Searchable extends java.rmi.Remote {
*/
Explanation explain(Query query, int doc) throws IOException;
/**
* Identical to {@link #search(Query, Filter, HitCollector)}, but takes
* a Weight instead of a query.
*/
Explanation explain(Weight weight, int doc) throws IOException;
/** Expert: Low-level search implementation with arbitrary sorting. Finds
* the top <code>n</code> hits for <code>query</code>, applying
* <code>filter</code> if non-null, and sorting the hits by the criteria in
@ -111,7 +140,16 @@ public interface Searchable extends java.rmi.Remote {
* <p>Applications should usually call {@link
* Searcher#search(Query,Filter,Sort)} instead.
* @throws BooleanQuery.TooManyClauses
*
* @deprecated
*/
TopFieldDocs search(Query query, Filter filter, int n, Sort sort)
throws IOException;
/** Expert: Low-level search implementation.
* Identical to {@link #search(Query, Filter, int, Sort)}, but takes
* a Weight instead of a query.
*/
TopFieldDocs search(Weight weight, Filter filter, int n, Sort sort)
throws IOException;
}

View File

@ -18,6 +18,8 @@ package org.apache.lucene.search;
import java.io.IOException;
import org.apache.lucene.index.Term;
/** An abstract base class for search implementations.
* Implements some common utility methods.
*/
@ -93,4 +95,15 @@ public abstract class Searcher implements Searchable {
public Similarity getSimilarity() {
return this.similarity;
}
// inherit javadoc
public int[] docFreqs(Term[] terms) throws IOException {
int[] result = new int[terms.length];
for (int i = 0; i < terms.length; i++) {
result[i] = docFreq(terms[i]);
}
return result;
}
}

View File

@ -17,6 +17,7 @@ package org.apache.lucene.search;
*/
import java.io.IOException;
import java.io.Serializable;
import java.util.Collection;
import java.util.Iterator;
@ -84,7 +85,7 @@ import org.apache.lucene.document.Field; // for javadoc
* @see IndexWriter#setSimilarity(Similarity)
* @see Searcher#setSimilarity(Similarity)
*/
public abstract class Similarity {
public abstract class Similarity implements Serializable {
/** The Similarity implementation used by default. */
private static Similarity defaultImpl = new DefaultSimilarity();

View File

@ -17,6 +17,8 @@ package org.apache.lucene.search;
*/
import java.io.IOException;
import java.util.Set;
import org.apache.lucene.index.Term;
import org.apache.lucene.index.TermDocs;
import org.apache.lucene.index.IndexReader;
@ -28,14 +30,17 @@ public class TermQuery extends Query {
private Term term;
private class TermWeight implements Weight {
private Similarity similarity;
private Searcher searcher;
private float value;
private float idf;
private float queryNorm;
private float queryWeight;
public TermWeight(Searcher searcher) {
this.searcher = searcher;
public TermWeight(Searcher searcher)
throws IOException {
this.similarity = getSimilarity(searcher);
idf = similarity.idf(term, searcher); // compute idf
}
public String toString() { return "weight(" + TermQuery.this + ")"; }
@ -44,7 +49,6 @@ public class TermQuery extends Query {
public float getValue() { return value; }
public float sumOfSquaredWeights() throws IOException {
idf = getSimilarity(searcher).idf(term, searcher); // compute idf
queryWeight = idf * getBoost(); // compute query weight
return queryWeight * queryWeight; // square it
}
@ -52,16 +56,16 @@ public class TermQuery extends Query {
public void normalize(float queryNorm) {
this.queryNorm = queryNorm;
queryWeight *= queryNorm; // normalize query weight
value = queryWeight * idf; // idf for document
value = queryWeight * idf; // idf for document
}
public Scorer scorer(IndexReader reader) throws IOException {
TermDocs termDocs = reader.termDocs(term);
if (termDocs == null)
return null;
return new TermScorer(this, termDocs, getSimilarity(searcher),
return new TermScorer(this, termDocs, similarity,
reader.norms(term.field()));
}
@ -72,7 +76,7 @@ public class TermQuery extends Query {
result.setDescription("weight("+getQuery()+" in "+doc+"), product of:");
Explanation idfExpl =
new Explanation(idf, "idf(docFreq=" + searcher.docFreq(term) + ")");
new Explanation(idf, "idf(docFreq=" + reader.docFreq(term) + ")");
// explain query weight
Explanation queryExpl = new Explanation();
@ -82,16 +86,16 @@ public class TermQuery extends Query {
if (getBoost() != 1.0f)
queryExpl.addDetail(boostExpl);
queryExpl.addDetail(idfExpl);
Explanation queryNormExpl = new Explanation(queryNorm,"queryNorm");
queryExpl.addDetail(queryNormExpl);
queryExpl.setValue(boostExpl.getValue() *
idfExpl.getValue() *
queryNormExpl.getValue());
result.addDetail(queryExpl);
// explain field weight
String field = term.field();
Explanation fieldExpl = new Explanation();
@ -113,7 +117,7 @@ public class TermQuery extends Query {
fieldExpl.setValue(tfExpl.getValue() *
idfExpl.getValue() *
fieldNormExpl.getValue());
result.addDetail(fieldExpl);
// combine them
@ -134,10 +138,14 @@ public class TermQuery extends Query {
/** Returns the term of this query. */
public Term getTerm() { return term; }
protected Weight createWeight(Searcher searcher) {
protected Weight createWeight(Searcher searcher) throws IOException {
return new TermWeight(searcher);
}
public void extractTerms(Set terms) {
terms.add(getTerm());
}
/** Prints a user-readable version of this query. */
public String toString(String field) {
StringBuffer buffer = new StringBuffer();

View File

@ -37,7 +37,7 @@ public abstract class SpanQuery extends Query {
/** Returns a collection of all terms matched by this query.*/
public abstract Collection getTerms();
protected Weight createWeight(Searcher searcher) {
protected Weight createWeight(Searcher searcher) throws IOException {
return new SpanWeight(this, searcher);
}

View File

@ -32,7 +32,7 @@ import org.apache.lucene.search.Explanation;
import org.apache.lucene.search.Similarity;
class SpanWeight implements Weight {
private Searcher searcher;
private Similarity similarity;
private float value;
private float idf;
private float queryNorm;
@ -41,17 +41,19 @@ class SpanWeight implements Weight {
private Collection terms;
private SpanQuery query;
public SpanWeight(SpanQuery query, Searcher searcher) {
this.searcher = searcher;
public SpanWeight(SpanQuery query, Searcher searcher)
throws IOException {
this.similarity = query.getSimilarity(searcher);
this.query = query;
this.terms = query.getTerms();
idf = this.query.getSimilarity(searcher).idf(terms, searcher);
}
public Query getQuery() { return query; }
public float getValue() { return value; }
public float sumOfSquaredWeights() throws IOException {
idf = this.query.getSimilarity(searcher).idf(terms, searcher);
queryWeight = idf * query.getBoost(); // compute query weight
return queryWeight * queryWeight; // square it
}
@ -64,7 +66,7 @@ class SpanWeight implements Weight {
public Scorer scorer(IndexReader reader) throws IOException {
return new SpanScorer(query.getSpans(reader), this,
query.getSimilarity(searcher),
similarity,
reader.norms(query.getField()));
}
@ -81,7 +83,7 @@ class SpanWeight implements Weight {
Term term = (Term)i.next();
docFreqs.append(term.text());
docFreqs.append("=");
docFreqs.append(searcher.docFreq(term));
docFreqs.append(reader.docFreq(term));
if (i.hasNext()) {
docFreqs.append(" ");