From 1fc5c8ee22ed65e020dee0af9ec2cff0e843666a Mon Sep 17 00:00:00 2001 From: Shai Erera Date: Thu, 9 Sep 2010 10:49:52 +0000 Subject: [PATCH] LUCENE-2636: Create ChainingCollector (trunk) git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@995375 13f79535-47bb-0310-9956-ffa450edef68 --- lucene/CHANGES.txt | 3 + .../lucene/search/ChainingCollector.java | 100 +++++++++++++++++ .../lucene/search/ChainingCollectorTest.java | 103 ++++++++++++++++++ 3 files changed, 206 insertions(+) create mode 100644 lucene/src/java/org/apache/lucene/search/ChainingCollector.java create mode 100644 lucene/src/test/org/apache/lucene/search/ChainingCollectorTest.java diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt index 6ba5a524a2d..51e050a6302 100644 --- a/lucene/CHANGES.txt +++ b/lucene/CHANGES.txt @@ -213,6 +213,9 @@ New features to gather the hit-count per sub-clause and per document while a search is running. (Simon Willnauer, Mike McCandless) +* LUCENE-2636: Added ChainingCollector which allows chaining several Collectors + together and be passed to IndexSearcher.search methods. (Shai Erera) + Optimizations * LUCENE-2410: ~20% speedup on exact (slop=0) PhraseQuery matching. diff --git a/lucene/src/java/org/apache/lucene/search/ChainingCollector.java b/lucene/src/java/org/apache/lucene/search/ChainingCollector.java new file mode 100644 index 00000000000..dfd3e3bb4fc --- /dev/null +++ b/lucene/src/java/org/apache/lucene/search/ChainingCollector.java @@ -0,0 +1,100 @@ +package org.apache.lucene.search; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.io.IOException; + +import org.apache.lucene.index.IndexReader; +import org.apache.lucene.search.Collector; +import org.apache.lucene.search.Scorer; + +/** + * A {@link Collector} which allows chaining several {@link Collector}s in order + * to process the matching documents emitted to {@link #collect(int)}. This + * collector accepts a list of {@link Collector}s in its constructor, allowing + * for some of them to be null. It optimizes away those + * null collectors, so that they are not acessed during collection + * time. + *

+ * NOTE: if all the collectors passed to the constructor are null, then + * {@link IllegalArgumentException} is thrown - it is useless to run the search + * with 0 collectors. + */ +public class ChainingCollector extends Collector { + + private final Collector[] collectors; + + public ChainingCollector(Collector... collectors) { + // For the user's convenience, we allow null collectors to be passed. + // However, to improve performance, these null collectors are found + // and dropped from the array we save for actual collection time. + int n = 0; + for (Collector c : collectors) { + if (c != null) { + n++; + } + } + + if (n == 0) { + throw new IllegalArgumentException("At least 1 collector must not be null"); + } else if (n == collectors.length) { + // No null collectors, can use the given list as is. + this.collectors = collectors; + } else { + this.collectors = new Collector[n]; + n = 0; + for (Collector c : collectors) { + if (c != null) { + this.collectors[n++] = c; + } + } + } + } + + @Override + public boolean acceptsDocsOutOfOrder() { + for (Collector c : collectors) { + if (!c.acceptsDocsOutOfOrder()) { + return false; + } + } + return true; + } + + @Override + public void collect(int doc) throws IOException { + for (Collector c : collectors) { + c.collect(doc); + } + } + + @Override + public void setNextReader(IndexReader reader, int o) throws IOException { + for (Collector c : collectors) { + c.setNextReader(reader, o); + } + } + + @Override + public void setScorer(Scorer s) throws IOException { + for (Collector c : collectors) { + c.setScorer(s); + } + } + +} diff --git a/lucene/src/test/org/apache/lucene/search/ChainingCollectorTest.java b/lucene/src/test/org/apache/lucene/search/ChainingCollectorTest.java new file mode 100644 index 00000000000..81879c31a98 --- /dev/null +++ b/lucene/src/test/org/apache/lucene/search/ChainingCollectorTest.java @@ -0,0 +1,103 @@ +package org.apache.lucene.search; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import static org.junit.Assert.*; + +import java.io.IOException; + +import org.apache.lucene.index.IndexReader; +import org.apache.lucene.search.Collector; +import org.apache.lucene.search.Scorer; +import org.apache.lucene.util.LuceneTestCaseJ4; +import org.junit.Test; + +public class ChainingCollectorTest extends LuceneTestCaseJ4 { + + private static class DummyCollector extends Collector { + + boolean acceptsDocsOutOfOrderCalled = false; + boolean collectCalled = false; + boolean setNextReaderCalled = false; + boolean setScorerCalled = false; + + @Override + public boolean acceptsDocsOutOfOrder() { + acceptsDocsOutOfOrderCalled = true; + return true; + } + + @Override + public void collect(int doc) throws IOException { + collectCalled = true; + } + + @Override + public void setNextReader(IndexReader reader, int docBase) throws IOException { + setNextReaderCalled = true; + } + + @Override + public void setScorer(Scorer scorer) throws IOException { + setScorerCalled = true; + } + + } + + @Test + public void testNullCollectors() throws Exception { + // Tests that the collector rejects all null collectors. + try { + new ChainingCollector(null, null); + fail("all collectors null should not be supported"); + } catch (IllegalArgumentException e) { + // expected + } + + // Tests that the collector handles some null collectors well. If it + // doesn't, an NPE would be thrown. + Collector c = new ChainingCollector(new DummyCollector(), null, new DummyCollector()); + assertTrue(c.acceptsDocsOutOfOrder()); + c.collect(1); + c.setNextReader(null, 0); + c.setScorer(null); + } + + @Test + public void testCollector() throws Exception { + // Tests that the collector delegates calls to input collectors properly. + + // Tests that the collector handles some null collectors well. If it + // doesn't, an NPE would be thrown. + DummyCollector[] dcs = new DummyCollector[] { new DummyCollector(), new DummyCollector() }; + Collector c = new ChainingCollector(dcs); + assertTrue(c.acceptsDocsOutOfOrder()); + c.collect(1); + c.setNextReader(null, 0); + c.setScorer(null); + + for (DummyCollector dc : dcs) { + assertTrue(dc.acceptsDocsOutOfOrderCalled); + assertTrue(dc.collectCalled); + assertTrue(dc.setNextReaderCalled); + assertTrue(dc.setScorerCalled); + } + + } + +}