Added ParallelMultiSearcher.

git-svn-id: https://svn.apache.org/repos/asf/lucene/java/trunk@150172 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Doug Cutting 2004-01-20 18:37:09 +00:00
parent f8d25df312
commit b1541ce027
5 changed files with 340 additions and 3 deletions

View File

@ -31,6 +31,10 @@ $Id$
others. (A conjunctive BooleanQuery is a BooleanQuery where all
clauses are required.) (cutting)
4. Added new class ParallelMultiSearcher. Combined with
RemoteSearchable this makes it easy to implement distributed
search systems. (Jean-Francois Halleux via cutting)
1.3 final

View File

@ -80,6 +80,10 @@ public class MultiSearcher extends Searcher {
}
starts[searchables.length] = maxDoc;
}
protected int[] getStarts() {
return starts;
}
/** Frees resources associated with this <code>Searcher</code>. */
public void close() throws IOException {

View File

@ -0,0 +1,248 @@
package org.apache.lucene.search;
/* ====================================================================
* The Apache Software License, Version 1.1
*
* Copyright (c) 2004 The Apache Software Foundation. All rights
* reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
*
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
*
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in
* the documentation and/or other materials provided with the
* distribution.
*
* 3. The end-user documentation included with the redistribution,
* if any, must include the following acknowledgment:
* "This product includes software developed by the
* Apache Software Foundation (http://www.apache.org/)."
* Alternately, this acknowledgment may appear in the software itself,
* if and wherever such third-party acknowledgments normally appear.
*
* 4. The names "Apache" and "Apache Software Foundation" and
* "Apache Lucene" must not be used to endorse or promote products
* derived from this software without prior written permission. For
* written permission, please contact apache@apache.org.
*
* 5. Products derived from this software may not be called "Apache",
* "Apache Lucene", nor may "Apache" appear in their name, without
* prior written permission of the Apache Software Foundation.
*
* THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
* WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* DISCLAIMED. IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
* ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
* USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
* OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
* ====================================================================
*
* This software consists of voluntary contributions made by many
* individuals on behalf of the Apache Software Foundation. For more
* information on the Apache Software Foundation, please see
* <http://www.apache.org/>.
*/
import java.io.IOException;
import org.apache.lucene.index.Term;
/** Implements parallel search over a set of <code>Searchables</code>.
*
* <p>Applications usually need only call the inherited {@link #search(Query)}
* or {@link #search(Query,Filter)} methods.
*/
public class ParallelMultiSearcher extends MultiSearcher {
private Searchable[] searchables;
private int[] starts;
/** Creates a searcher which searches <i>searchables</i>. */
public ParallelMultiSearcher(Searchable[] searchables) throws IOException {
super(searchables);
this.searchables=searchables;
this.starts=getStarts();
}
/**
* TODO: parallelize this one too
*/
public int docFreq(Term term) throws IOException {
int docFreq = 0;
for (int i = 0; i < searchables.length; i++)
docFreq += searchables[i].docFreq(term);
return docFreq;
}
/**
* A search implementation which spans a new thread for each
* Searchable, waits for each search to complete and merge
* the results back together.
*/
public TopDocs search(Query query, Filter filter, int nDocs)
throws IOException {
HitQueue hq = new HitQueue(nDocs);
int totalHits = 0;
MultiSearcherThread[] msta =
new MultiSearcherThread[searchables.length];
for (int i = 0; i < searchables.length; i++) { // search each searcher
// Assume not too many searchables and cost of creating a thread is by far inferior to a search
msta[i] =
new MultiSearcherThread(
searchables[i],
query,
filter,
nDocs,
hq,
i,
starts,
"MultiSearcher thread #" + (i + 1));
msta[i].start();
}
for (int i = 0; i < searchables.length; i++) {
try {
msta[i].join();
} catch (InterruptedException ie) {
; // TODO: what should we do with this???
}
IOException ioe = msta[i].getIOException();
if (ioe == null) {
totalHits += msta[i].hits();
} else {
// if one search produced an IOException, rethrow it
throw ioe;
}
}
ScoreDoc[] scoreDocs = new ScoreDoc[hq.size()];
for (int i = hq.size() - 1; i >= 0; i--) // put docs in array
scoreDocs[i] = (ScoreDoc) hq.pop();
return new TopDocs(totalHits, scoreDocs);
}
/** Lower-level search API.
*
* <p>{@link HitCollector#collect(int,float)} is called for every non-zero
* scoring document.
*
* <p>Applications should only use this if they need <i>all</i> of the
* matching documents. The high-level search API ({@link
* Searcher#search(Query)}) is usually more efficient, as it skips
* non-high-scoring hits.
*
* @param query to match documents
* @param filter if non-null, a bitset used to eliminate some documents
* @param results to receive hits
*
* TODO: parallelize this one too
*/
public void search(Query query, Filter filter, final HitCollector results)
throws IOException {
for (int i = 0; i < searchables.length; i++) {
final int start = starts[i];
searchables[i].search(query, filter, new HitCollector() {
public void collect(int doc, float score) {
results.collect(doc + start, score);
}
});
}
}
/*
* TODO: this one could be parallelized too
* @see org.apache.lucene.search.Searchable#rewrite(org.apache.lucene.search.Query)
*/
public Query rewrite(Query original) throws IOException {
Query[] queries = new Query[searchables.length];
for (int i = 0; i < searchables.length; i++) {
queries[i] = searchables[i].rewrite(original);
}
return original.combine(queries);
}
}
/**
* A thread subclass for searching a single searchable
*/
class MultiSearcherThread extends Thread {
private Searchable searchable;
private Query query;
private Filter filter;
private int nDocs;
private int hits;
private TopDocs docs;
private int i;
private HitQueue hq;
private int[] starts;
private IOException ioe;
public MultiSearcherThread(
Searchable searchable,
Query query,
Filter filter,
int nDocs,
HitQueue hq,
int i,
int[] starts,
String name) {
super(name);
this.searchable = searchable;
this.query = query;
this.filter = filter;
this.nDocs = nDocs;
this.hq = hq;
this.i = i;
this.starts = starts;
}
public void run() {
try {
docs = searchable.search(query, filter, nDocs);
}
// Store the IOException for later use by the caller of this thread
catch (IOException ioe) {
this.ioe = ioe;
}
if (ioe == null) {
ScoreDoc[] scoreDocs = docs.scoreDocs;
for (int j = 0;
j < scoreDocs.length;
j++) { // merge scoreDocs into hq
ScoreDoc scoreDoc = scoreDocs[j];
scoreDoc.doc += starts[i]; // convert doc
//it would be so nice if we had a thread-safe insert
synchronized (hq) {
if (!hq.insert(scoreDoc))
break;
} // no more scores > minScore
}
}
}
public int hits() {
return docs.totalHits;
}
public IOException getIOException() {
return ioe;
}
}

View File

@ -81,6 +81,14 @@ public class TestMultiSearcher extends TestCase
super(name);
}
/**
* Return a new instance of the concrete MultiSearcher class
* used in this test
*/
protected MultiSearcher getMultiSearcherInstance(Searcher[] searchers) throws IOException {
return new MultiSearcher(searchers);
}
public void testEmptyIndex()
throws Exception
{
@ -134,7 +142,7 @@ public class TestMultiSearcher extends TestCase
searchers[0] = new IndexSearcher(indexStoreB);
searchers[1] = new IndexSearcher(indexStoreA);
// creating the multiSearcher
Searcher mSearcher = new MultiSearcher(searchers);
Searcher mSearcher = getMultiSearcherInstance(searchers);
// performing the search
Hits hits = mSearcher.search(query);
@ -171,7 +179,7 @@ public class TestMultiSearcher extends TestCase
searchers2[0] = new IndexSearcher(indexStoreB);
searchers2[1] = new IndexSearcher(indexStoreA);
// creating the mulitSearcher
Searcher mSearcher2 = new MultiSearcher(searchers2);
Searcher mSearcher2 = getMultiSearcherInstance(searchers2);
// performing the same search
Hits hits2 = mSearcher2.search(query);
@ -213,7 +221,7 @@ public class TestMultiSearcher extends TestCase
searchers3[0] = new IndexSearcher(indexStoreB);
searchers3[1] = new IndexSearcher(indexStoreA);
// creating the mulitSearcher
Searcher mSearcher3 = new MultiSearcher(searchers3);
Searcher mSearcher3 = getMultiSearcherInstance(searchers3);
// performing the same search
Hits hits3 = mSearcher3.search(query);

View File

@ -0,0 +1,73 @@
package org.apache.lucene.search;
/* ====================================================================
* The Apache Software License, Version 1.1
*
* Copyright (c) 2001 The Apache Software Foundation. All rights
* reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
*
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
*
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in
* the documentation and/or other materials provided with the
* distribution.
*
* 3. The end-user documentation included with the redistribution,
* if any, must include the following acknowledgment:
* "This product includes software developed by the
* Apache Software Foundation (http://www.apache.org/)."
* Alternately, this acknowledgment may appear in the software itself,
* if and wherever such third-party acknowledgments normally appear.
*
* 4. The names "Apache" and "Apache Software Foundation" and
* "Apache Lucene" must not be used to endorse or promote products
* derived from this software without prior written permission. For
* written permission, please contact apache@apache.org.
*
* 5. Products derived from this software may not be called "Apache",
* "Apache Lucene", nor may "Apache" appear in their name, without
* prior written permission of the Apache Software Foundation.
*
* THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
* WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* DISCLAIMED. IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
* ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
* USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
* OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
* ====================================================================
*
* This software consists of voluntary contributions made by many
* individuals on behalf of the Apache Software Foundation. For more
* information on the Apache Software Foundation, please see
* <http://www.apache.org/>.
*/
import java.io.IOException;
/**
* Unit tests for the ParallelMultiSearcher
*/
public class TestParallelMultiSearcher extends TestMultiSearcher {
public TestParallelMultiSearcher(String name) {
super(name);
}
protected MultiSearcher getMultiSearcherInstance(Searcher[] searchers)
throws IOException {
return new ParallelMultiSearcher(searchers);
}
}