mirror of https://github.com/apache/lucene.git
Added ParallelMultiSearcher.
git-svn-id: https://svn.apache.org/repos/asf/lucene/java/trunk@150172 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
f8d25df312
commit
b1541ce027
|
@ -31,6 +31,10 @@ $Id$
|
||||||
others. (A conjunctive BooleanQuery is a BooleanQuery where all
|
others. (A conjunctive BooleanQuery is a BooleanQuery where all
|
||||||
clauses are required.) (cutting)
|
clauses are required.) (cutting)
|
||||||
|
|
||||||
|
4. Added new class ParallelMultiSearcher. Combined with
|
||||||
|
RemoteSearchable this makes it easy to implement distributed
|
||||||
|
search systems. (Jean-Francois Halleux via cutting)
|
||||||
|
|
||||||
|
|
||||||
1.3 final
|
1.3 final
|
||||||
|
|
||||||
|
|
|
@ -81,6 +81,10 @@ public class MultiSearcher extends Searcher {
|
||||||
starts[searchables.length] = maxDoc;
|
starts[searchables.length] = maxDoc;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
protected int[] getStarts() {
|
||||||
|
return starts;
|
||||||
|
}
|
||||||
|
|
||||||
/** Frees resources associated with this <code>Searcher</code>. */
|
/** Frees resources associated with this <code>Searcher</code>. */
|
||||||
public void close() throws IOException {
|
public void close() throws IOException {
|
||||||
for (int i = 0; i < searchables.length; i++)
|
for (int i = 0; i < searchables.length; i++)
|
||||||
|
|
|
@ -0,0 +1,248 @@
|
||||||
|
package org.apache.lucene.search;
|
||||||
|
|
||||||
|
/* ====================================================================
|
||||||
|
* The Apache Software License, Version 1.1
|
||||||
|
*
|
||||||
|
* Copyright (c) 2004 The Apache Software Foundation. All rights
|
||||||
|
* reserved.
|
||||||
|
*
|
||||||
|
* Redistribution and use in source and binary forms, with or without
|
||||||
|
* modification, are permitted provided that the following conditions
|
||||||
|
* are met:
|
||||||
|
*
|
||||||
|
* 1. Redistributions of source code must retain the above copyright
|
||||||
|
* notice, this list of conditions and the following disclaimer.
|
||||||
|
*
|
||||||
|
* 2. Redistributions in binary form must reproduce the above copyright
|
||||||
|
* notice, this list of conditions and the following disclaimer in
|
||||||
|
* the documentation and/or other materials provided with the
|
||||||
|
* distribution.
|
||||||
|
*
|
||||||
|
* 3. The end-user documentation included with the redistribution,
|
||||||
|
* if any, must include the following acknowledgment:
|
||||||
|
* "This product includes software developed by the
|
||||||
|
* Apache Software Foundation (http://www.apache.org/)."
|
||||||
|
* Alternately, this acknowledgment may appear in the software itself,
|
||||||
|
* if and wherever such third-party acknowledgments normally appear.
|
||||||
|
*
|
||||||
|
* 4. The names "Apache" and "Apache Software Foundation" and
|
||||||
|
* "Apache Lucene" must not be used to endorse or promote products
|
||||||
|
* derived from this software without prior written permission. For
|
||||||
|
* written permission, please contact apache@apache.org.
|
||||||
|
*
|
||||||
|
* 5. Products derived from this software may not be called "Apache",
|
||||||
|
* "Apache Lucene", nor may "Apache" appear in their name, without
|
||||||
|
* prior written permission of the Apache Software Foundation.
|
||||||
|
*
|
||||||
|
* THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
|
||||||
|
* WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
|
||||||
|
* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||||
|
* DISCLAIMED. IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
|
||||||
|
* ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||||
|
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||||
|
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
|
||||||
|
* USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
|
||||||
|
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||||
|
* OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
|
||||||
|
* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||||
|
* SUCH DAMAGE.
|
||||||
|
* ====================================================================
|
||||||
|
*
|
||||||
|
* This software consists of voluntary contributions made by many
|
||||||
|
* individuals on behalf of the Apache Software Foundation. For more
|
||||||
|
* information on the Apache Software Foundation, please see
|
||||||
|
* <http://www.apache.org/>.
|
||||||
|
*/
|
||||||
|
|
||||||
|
import java.io.IOException;
|
||||||
|
|
||||||
|
import org.apache.lucene.index.Term;
|
||||||
|
|
||||||
|
/** Implements parallel search over a set of <code>Searchables</code>.
|
||||||
|
*
|
||||||
|
* <p>Applications usually need only call the inherited {@link #search(Query)}
|
||||||
|
* or {@link #search(Query,Filter)} methods.
|
||||||
|
*/
|
||||||
|
public class ParallelMultiSearcher extends MultiSearcher {
|
||||||
|
|
||||||
|
private Searchable[] searchables;
|
||||||
|
private int[] starts;
|
||||||
|
|
||||||
|
/** Creates a searcher which searches <i>searchables</i>. */
|
||||||
|
public ParallelMultiSearcher(Searchable[] searchables) throws IOException {
|
||||||
|
super(searchables);
|
||||||
|
this.searchables=searchables;
|
||||||
|
this.starts=getStarts();
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* TODO: parallelize this one too
|
||||||
|
*/
|
||||||
|
public int docFreq(Term term) throws IOException {
|
||||||
|
int docFreq = 0;
|
||||||
|
for (int i = 0; i < searchables.length; i++)
|
||||||
|
docFreq += searchables[i].docFreq(term);
|
||||||
|
return docFreq;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* A search implementation which spans a new thread for each
|
||||||
|
* Searchable, waits for each search to complete and merge
|
||||||
|
* the results back together.
|
||||||
|
*/
|
||||||
|
public TopDocs search(Query query, Filter filter, int nDocs)
|
||||||
|
throws IOException {
|
||||||
|
HitQueue hq = new HitQueue(nDocs);
|
||||||
|
int totalHits = 0;
|
||||||
|
MultiSearcherThread[] msta =
|
||||||
|
new MultiSearcherThread[searchables.length];
|
||||||
|
for (int i = 0; i < searchables.length; i++) { // search each searcher
|
||||||
|
// Assume not too many searchables and cost of creating a thread is by far inferior to a search
|
||||||
|
msta[i] =
|
||||||
|
new MultiSearcherThread(
|
||||||
|
searchables[i],
|
||||||
|
query,
|
||||||
|
filter,
|
||||||
|
nDocs,
|
||||||
|
hq,
|
||||||
|
i,
|
||||||
|
starts,
|
||||||
|
"MultiSearcher thread #" + (i + 1));
|
||||||
|
msta[i].start();
|
||||||
|
}
|
||||||
|
|
||||||
|
for (int i = 0; i < searchables.length; i++) {
|
||||||
|
try {
|
||||||
|
msta[i].join();
|
||||||
|
} catch (InterruptedException ie) {
|
||||||
|
; // TODO: what should we do with this???
|
||||||
|
}
|
||||||
|
IOException ioe = msta[i].getIOException();
|
||||||
|
if (ioe == null) {
|
||||||
|
totalHits += msta[i].hits();
|
||||||
|
} else {
|
||||||
|
// if one search produced an IOException, rethrow it
|
||||||
|
throw ioe;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
ScoreDoc[] scoreDocs = new ScoreDoc[hq.size()];
|
||||||
|
for (int i = hq.size() - 1; i >= 0; i--) // put docs in array
|
||||||
|
scoreDocs[i] = (ScoreDoc) hq.pop();
|
||||||
|
|
||||||
|
return new TopDocs(totalHits, scoreDocs);
|
||||||
|
}
|
||||||
|
|
||||||
|
/** Lower-level search API.
|
||||||
|
*
|
||||||
|
* <p>{@link HitCollector#collect(int,float)} is called for every non-zero
|
||||||
|
* scoring document.
|
||||||
|
*
|
||||||
|
* <p>Applications should only use this if they need <i>all</i> of the
|
||||||
|
* matching documents. The high-level search API ({@link
|
||||||
|
* Searcher#search(Query)}) is usually more efficient, as it skips
|
||||||
|
* non-high-scoring hits.
|
||||||
|
*
|
||||||
|
* @param query to match documents
|
||||||
|
* @param filter if non-null, a bitset used to eliminate some documents
|
||||||
|
* @param results to receive hits
|
||||||
|
*
|
||||||
|
* TODO: parallelize this one too
|
||||||
|
*/
|
||||||
|
public void search(Query query, Filter filter, final HitCollector results)
|
||||||
|
throws IOException {
|
||||||
|
for (int i = 0; i < searchables.length; i++) {
|
||||||
|
|
||||||
|
final int start = starts[i];
|
||||||
|
|
||||||
|
searchables[i].search(query, filter, new HitCollector() {
|
||||||
|
public void collect(int doc, float score) {
|
||||||
|
results.collect(doc + start, score);
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* TODO: this one could be parallelized too
|
||||||
|
* @see org.apache.lucene.search.Searchable#rewrite(org.apache.lucene.search.Query)
|
||||||
|
*/
|
||||||
|
public Query rewrite(Query original) throws IOException {
|
||||||
|
Query[] queries = new Query[searchables.length];
|
||||||
|
for (int i = 0; i < searchables.length; i++) {
|
||||||
|
queries[i] = searchables[i].rewrite(original);
|
||||||
|
}
|
||||||
|
return original.combine(queries);
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* A thread subclass for searching a single searchable
|
||||||
|
*/
|
||||||
|
class MultiSearcherThread extends Thread {
|
||||||
|
|
||||||
|
private Searchable searchable;
|
||||||
|
private Query query;
|
||||||
|
private Filter filter;
|
||||||
|
private int nDocs;
|
||||||
|
private int hits;
|
||||||
|
private TopDocs docs;
|
||||||
|
private int i;
|
||||||
|
private HitQueue hq;
|
||||||
|
private int[] starts;
|
||||||
|
private IOException ioe;
|
||||||
|
|
||||||
|
public MultiSearcherThread(
|
||||||
|
Searchable searchable,
|
||||||
|
Query query,
|
||||||
|
Filter filter,
|
||||||
|
int nDocs,
|
||||||
|
HitQueue hq,
|
||||||
|
int i,
|
||||||
|
int[] starts,
|
||||||
|
String name) {
|
||||||
|
super(name);
|
||||||
|
this.searchable = searchable;
|
||||||
|
this.query = query;
|
||||||
|
this.filter = filter;
|
||||||
|
this.nDocs = nDocs;
|
||||||
|
this.hq = hq;
|
||||||
|
this.i = i;
|
||||||
|
this.starts = starts;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void run() {
|
||||||
|
try {
|
||||||
|
docs = searchable.search(query, filter, nDocs);
|
||||||
|
}
|
||||||
|
// Store the IOException for later use by the caller of this thread
|
||||||
|
catch (IOException ioe) {
|
||||||
|
this.ioe = ioe;
|
||||||
|
}
|
||||||
|
if (ioe == null) {
|
||||||
|
ScoreDoc[] scoreDocs = docs.scoreDocs;
|
||||||
|
for (int j = 0;
|
||||||
|
j < scoreDocs.length;
|
||||||
|
j++) { // merge scoreDocs into hq
|
||||||
|
ScoreDoc scoreDoc = scoreDocs[j];
|
||||||
|
scoreDoc.doc += starts[i]; // convert doc
|
||||||
|
//it would be so nice if we had a thread-safe insert
|
||||||
|
synchronized (hq) {
|
||||||
|
if (!hq.insert(scoreDoc))
|
||||||
|
break;
|
||||||
|
} // no more scores > minScore
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public int hits() {
|
||||||
|
return docs.totalHits;
|
||||||
|
}
|
||||||
|
|
||||||
|
public IOException getIOException() {
|
||||||
|
return ioe;
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
|
@ -81,6 +81,14 @@ public class TestMultiSearcher extends TestCase
|
||||||
super(name);
|
super(name);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Return a new instance of the concrete MultiSearcher class
|
||||||
|
* used in this test
|
||||||
|
*/
|
||||||
|
protected MultiSearcher getMultiSearcherInstance(Searcher[] searchers) throws IOException {
|
||||||
|
return new MultiSearcher(searchers);
|
||||||
|
}
|
||||||
|
|
||||||
public void testEmptyIndex()
|
public void testEmptyIndex()
|
||||||
throws Exception
|
throws Exception
|
||||||
{
|
{
|
||||||
|
@ -134,7 +142,7 @@ public class TestMultiSearcher extends TestCase
|
||||||
searchers[0] = new IndexSearcher(indexStoreB);
|
searchers[0] = new IndexSearcher(indexStoreB);
|
||||||
searchers[1] = new IndexSearcher(indexStoreA);
|
searchers[1] = new IndexSearcher(indexStoreA);
|
||||||
// creating the multiSearcher
|
// creating the multiSearcher
|
||||||
Searcher mSearcher = new MultiSearcher(searchers);
|
Searcher mSearcher = getMultiSearcherInstance(searchers);
|
||||||
// performing the search
|
// performing the search
|
||||||
Hits hits = mSearcher.search(query);
|
Hits hits = mSearcher.search(query);
|
||||||
|
|
||||||
|
@ -171,7 +179,7 @@ public class TestMultiSearcher extends TestCase
|
||||||
searchers2[0] = new IndexSearcher(indexStoreB);
|
searchers2[0] = new IndexSearcher(indexStoreB);
|
||||||
searchers2[1] = new IndexSearcher(indexStoreA);
|
searchers2[1] = new IndexSearcher(indexStoreA);
|
||||||
// creating the mulitSearcher
|
// creating the mulitSearcher
|
||||||
Searcher mSearcher2 = new MultiSearcher(searchers2);
|
Searcher mSearcher2 = getMultiSearcherInstance(searchers2);
|
||||||
// performing the same search
|
// performing the same search
|
||||||
Hits hits2 = mSearcher2.search(query);
|
Hits hits2 = mSearcher2.search(query);
|
||||||
|
|
||||||
|
@ -213,7 +221,7 @@ public class TestMultiSearcher extends TestCase
|
||||||
searchers3[0] = new IndexSearcher(indexStoreB);
|
searchers3[0] = new IndexSearcher(indexStoreB);
|
||||||
searchers3[1] = new IndexSearcher(indexStoreA);
|
searchers3[1] = new IndexSearcher(indexStoreA);
|
||||||
// creating the mulitSearcher
|
// creating the mulitSearcher
|
||||||
Searcher mSearcher3 = new MultiSearcher(searchers3);
|
Searcher mSearcher3 = getMultiSearcherInstance(searchers3);
|
||||||
// performing the same search
|
// performing the same search
|
||||||
Hits hits3 = mSearcher3.search(query);
|
Hits hits3 = mSearcher3.search(query);
|
||||||
|
|
||||||
|
|
|
@ -0,0 +1,73 @@
|
||||||
|
package org.apache.lucene.search;
|
||||||
|
|
||||||
|
/* ====================================================================
|
||||||
|
* The Apache Software License, Version 1.1
|
||||||
|
*
|
||||||
|
* Copyright (c) 2001 The Apache Software Foundation. All rights
|
||||||
|
* reserved.
|
||||||
|
*
|
||||||
|
* Redistribution and use in source and binary forms, with or without
|
||||||
|
* modification, are permitted provided that the following conditions
|
||||||
|
* are met:
|
||||||
|
*
|
||||||
|
* 1. Redistributions of source code must retain the above copyright
|
||||||
|
* notice, this list of conditions and the following disclaimer.
|
||||||
|
*
|
||||||
|
* 2. Redistributions in binary form must reproduce the above copyright
|
||||||
|
* notice, this list of conditions and the following disclaimer in
|
||||||
|
* the documentation and/or other materials provided with the
|
||||||
|
* distribution.
|
||||||
|
*
|
||||||
|
* 3. The end-user documentation included with the redistribution,
|
||||||
|
* if any, must include the following acknowledgment:
|
||||||
|
* "This product includes software developed by the
|
||||||
|
* Apache Software Foundation (http://www.apache.org/)."
|
||||||
|
* Alternately, this acknowledgment may appear in the software itself,
|
||||||
|
* if and wherever such third-party acknowledgments normally appear.
|
||||||
|
*
|
||||||
|
* 4. The names "Apache" and "Apache Software Foundation" and
|
||||||
|
* "Apache Lucene" must not be used to endorse or promote products
|
||||||
|
* derived from this software without prior written permission. For
|
||||||
|
* written permission, please contact apache@apache.org.
|
||||||
|
*
|
||||||
|
* 5. Products derived from this software may not be called "Apache",
|
||||||
|
* "Apache Lucene", nor may "Apache" appear in their name, without
|
||||||
|
* prior written permission of the Apache Software Foundation.
|
||||||
|
*
|
||||||
|
* THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
|
||||||
|
* WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
|
||||||
|
* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||||
|
* DISCLAIMED. IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
|
||||||
|
* ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||||
|
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||||
|
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
|
||||||
|
* USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
|
||||||
|
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||||
|
* OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
|
||||||
|
* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||||
|
* SUCH DAMAGE.
|
||||||
|
* ====================================================================
|
||||||
|
*
|
||||||
|
* This software consists of voluntary contributions made by many
|
||||||
|
* individuals on behalf of the Apache Software Foundation. For more
|
||||||
|
* information on the Apache Software Foundation, please see
|
||||||
|
* <http://www.apache.org/>.
|
||||||
|
*/
|
||||||
|
|
||||||
|
import java.io.IOException;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Unit tests for the ParallelMultiSearcher
|
||||||
|
*/
|
||||||
|
public class TestParallelMultiSearcher extends TestMultiSearcher {
|
||||||
|
|
||||||
|
public TestParallelMultiSearcher(String name) {
|
||||||
|
super(name);
|
||||||
|
}
|
||||||
|
|
||||||
|
protected MultiSearcher getMultiSearcherInstance(Searcher[] searchers)
|
||||||
|
throws IOException {
|
||||||
|
return new ParallelMultiSearcher(searchers);
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
Loading…
Reference in New Issue