From ad79d914e48c5c2b0683c8331afffe8187ae4fb2 Mon Sep 17 00:00:00 2001 From: Michael McCandless Date: Mon, 13 Apr 2009 18:33:56 +0000 Subject: [PATCH] LUCENE-1575: switch to new Collector API git-svn-id: https://svn.apache.org/repos/asf/lucene/java/trunk@764551 13f79535-47bb-0310-9956-ffa450edef68 --- CHANGES.txt | 53 ++ .../benchmark/byTask/tasks/ReadTask.java | 19 +- .../byTask/tasks/SearchWithSortTask.java | 32 +- .../lucene/index/TestFieldNormModifier.java | 58 +- .../lucene/misc/TestLengthNormModifier.java | 68 +- .../tier/DistanceFieldComparatorSource.java | 8 +- .../apache/lucene/search/BooleanScorer.java | 81 +- .../apache/lucene/search/BooleanScorer2.java | 29 +- .../org/apache/lucene/search/Collector.java | 160 ++++ .../lucene/search/DisjunctionSumScorer.java | 27 +- .../apache/lucene/search/FieldComparator.java | 110 +-- .../lucene/search/FieldComparatorSource.java | 3 +- .../lucene/search/FieldValueHitQueue.java | 239 +++--- .../apache/lucene/search/HitCollector.java | 12 +- .../lucene/search/HitCollectorWrapper.java | 50 ++ .../apache/lucene/search/IndexSearcher.java | 80 +- .../search/MultiReaderHitCollector.java | 53 -- .../apache/lucene/search/MultiSearcher.java | 54 +- .../lucene/search/ParallelMultiSearcher.java | 73 +- .../search/PositiveScoresOnlyCollector.java | 56 ++ .../lucene/search/QueryWrapperFilter.java | 9 +- .../lucene/search/RemoteSearchable.java | 7 +- .../search/ScoreCachingWrappingScorer.java | 83 ++ src/java/org/apache/lucene/search/Scorer.java | 30 +- .../org/apache/lucene/search/Searchable.java | 32 +- .../org/apache/lucene/search/Searcher.java | 53 +- .../org/apache/lucene/search/SortField.java | 8 +- .../org/apache/lucene/search/TermScorer.java | 34 +- .../lucene/search/TimeLimitedCollector.java | 33 +- .../lucene/search/TimeLimitingCollector.java | 219 ++++++ .../org/apache/lucene/search/TopDocs.java | 12 +- .../lucene/search/TopDocsCollector.java | 138 ++++ .../lucene/search/TopFieldCollector.java | 708 +++++++++++++----- .../lucene/search/TopFieldDocCollector.java | 8 +- .../lucene/search/TopScoreDocCollector.java | 117 ++- .../apache/lucene/index/TestIndexReader.java | 21 +- .../org/apache/lucene/index/TestOmitTf.java | 51 +- .../org/apache/lucene/search/CheckHits.java | 38 +- .../lucene/search/JustCompileSearch.java | 580 ++++++++++++++ .../org/apache/lucene/search/QueryUtils.java | 22 +- .../apache/lucene/search/TestDocBoost.java | 14 +- .../search/TestMultiTermConstantScore.java | 12 +- .../TestPositiveScoresOnlyCollector.java | 85 +++ .../TestScoreCachingWrappingScorer.java | 111 +++ .../apache/lucene/search/TestScorerPerf.java | 13 +- .../org/apache/lucene/search/TestSetNorm.java | 14 +- .../apache/lucene/search/TestSimilarity.java | 43 +- .../org/apache/lucene/search/TestSort.java | 93 ++- .../apache/lucene/search/TestTermScorer.java | 51 +- .../search/TestTimeLimitedCollector.java | 14 +- .../search/TestTimeLimitingCollector.java | 337 +++++++++ .../lucene/search/TestTopDocsCollector.java | 198 +++++ .../function/JustCompileSearchSpans.java | 89 +++ .../search/spans/JustCompileSearchSpans.java | 112 +++ 54 files changed, 3854 insertions(+), 800 deletions(-) create mode 100644 src/java/org/apache/lucene/search/Collector.java create mode 100644 src/java/org/apache/lucene/search/HitCollectorWrapper.java delete mode 100644 src/java/org/apache/lucene/search/MultiReaderHitCollector.java create mode 100644 src/java/org/apache/lucene/search/PositiveScoresOnlyCollector.java create mode 100644 src/java/org/apache/lucene/search/ScoreCachingWrappingScorer.java create mode 100644 src/java/org/apache/lucene/search/TimeLimitingCollector.java create mode 100644 src/java/org/apache/lucene/search/TopDocsCollector.java create mode 100644 src/test/org/apache/lucene/search/JustCompileSearch.java create mode 100644 src/test/org/apache/lucene/search/TestPositiveScoresOnlyCollector.java create mode 100644 src/test/org/apache/lucene/search/TestScoreCachingWrappingScorer.java create mode 100644 src/test/org/apache/lucene/search/TestTimeLimitingCollector.java create mode 100644 src/test/org/apache/lucene/search/TestTopDocsCollector.java create mode 100644 src/test/org/apache/lucene/search/function/JustCompileSearchSpans.java create mode 100644 src/test/org/apache/lucene/search/spans/JustCompileSearchSpans.java diff --git a/CHANGES.txt b/CHANGES.txt index bffefc730cb..e9ee002de2c 100644 --- a/CHANGES.txt +++ b/CHANGES.txt @@ -3,6 +3,32 @@ $Id$ ======================= Trunk (not yet released) ======================= +Changes in backwards compatibility policy + + 1. LUCENE-1575: Searchable.search(Weight, Filter, int, Sort) + currently tracks document scores (including maxScore), and sets + the score in each returned FieldDoc. However, in 3.0 it will stop + tracking document scores. If document scores tracking is still + needed, you can use Searchable.search(Weight, Filter, Collector) + and pass in a TopFieldCollector instance, using the following code + sample: + + + TopFieldCollector tfc = TopFieldCollector.create(sort, numHits, fillFields, + true /* trackDocScores */, + true /* trackMaxScore */); + searcher.search(weight, filter, tfc); + TopDocs results = tfc.topDocs(); + + + Also, the method search(Weight, Filter, Collector) was added to + the Searchable interface and the Searcher abstract class, to + replace the deprecated HitCollector versions. If you either + implement Searchable or extend Searcher, you should change you + code to implement this method. If you already extend + IndexSearcher, no further changes are needed to use Collector. + (Shai Erera via Mike McCandless) + Changes in runtime behavior 1. LUCENE-1424: QueryParser now by default uses constant score query @@ -10,6 +36,19 @@ Changes in runtime behavior already does so for RangeQuery, as well). Call setConstantScoreRewrite(false) to revert to BooleanQuery rewriting method. (Mark Miller via Mike McCandless) + + 2. LUCENE-1575: As of 2.9, the core collectors as well as + IndexSearcher's search methods that return top N results, no + longer filter out zero scoring documents. If you rely on this + functionaliy you can use PositiveScoresOnlyCollector like this: + + + TopDocsCollector tdc = new TopScoreDocCollector(10); + Collector c = new PositiveScoresOnlyCollector(tdc); + searcher.search(query, c); + TopDocs hits = tdc.topDocs(); + ... + API Changes @@ -69,6 +108,14 @@ API Changes 12. LUCENE-1500: Added new InvalidTokenOffsetsException to Highlighter methods to denote issues when offsets in TokenStream tokens exceed the length of the provided text. (Mark Harwood) + +13. LUCENE-1575: HitCollector is now deprecated in favor of a new + Collector abstract class. For easy migration, people can use + HitCollectorWrapper which translates (wraps) HitCollector into + Collector. Note that this class is also deprecated and will be + removed when HitCollector is removed. Also TimeLimitedCollector + is deprecated in favor of the new TimeLimitingCollector which + extends Collector. (Shai Erera via Mike McCandless) Bug fixes @@ -258,6 +305,12 @@ Optimizations those segments that did not change, and also speeds up searches that sort by relevance or by field values. (Mark Miller, Mike McCandless) + + 7. LUCENE-1575: The new Collector class decouples collect() from + score computation. Collector.setScorer is called to establish the + current Scorer in-use per segment. Collectors that require the + score should then call Scorer.score() per hit inside + collect(). (Shai Erera via Mike McCandless) Documentation diff --git a/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/ReadTask.java b/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/ReadTask.java index d4345672b2b..723e1b38462 100644 --- a/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/ReadTask.java +++ b/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/ReadTask.java @@ -32,6 +32,7 @@ import org.apache.lucene.document.Document; import org.apache.lucene.document.Fieldable; import org.apache.lucene.index.IndexReader; import org.apache.lucene.search.TopDocs; +import org.apache.lucene.search.TopFieldCollector; import org.apache.lucene.search.ScoreDoc; import org.apache.lucene.search.IndexSearcher; import org.apache.lucene.search.Query; @@ -62,7 +63,6 @@ public abstract class ReadTask extends PerfTask { public ReadTask(PerfRunData runData) { super(runData); } - public int doLogic() throws Exception { int res = 0; boolean closeReader = false; @@ -102,7 +102,10 @@ public abstract class ReadTask extends PerfTask { final int numHits = numHits(); if (numHits > 0) { if (sort != null) { - hits = searcher.search(q, null, numHits, sort); + TopFieldCollector collector = TopFieldCollector.create(sort, numHits, + true, withScore(), withMaxScore()); + searcher.search(q, collector); + hits = collector.topDocs(); } else { hits = searcher.search(q, numHits); } @@ -180,6 +183,18 @@ public abstract class ReadTask extends PerfTask { */ public abstract boolean withTraverse(); + /** Whether scores should be computed (only useful with + * field sort) */ + public boolean withScore() { + return true; + } + + /** Whether maxScores should be computed (only useful with + * field sort) */ + public boolean withMaxScore() { + return true; + } + /** * Specify the number of hits to traverse. Tasks should override this if they want to restrict the number * of hits that are traversed when {@link #withTraverse()} is true. Must be greater than 0. diff --git a/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/SearchWithSortTask.java b/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/SearchWithSortTask.java index 26ee127f39a..870d538f114 100644 --- a/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/SearchWithSortTask.java +++ b/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/SearchWithSortTask.java @@ -27,6 +27,8 @@ import org.apache.lucene.search.SortField; */ public class SearchWithSortTask extends ReadTask { + private boolean doScore = true; + private boolean doMaxScore = true; private Sort sort; public SearchWithSortTask(PerfRunData runData) { @@ -34,7 +36,12 @@ public class SearchWithSortTask extends ReadTask { } /** - * SortFields: field:type,field:type + * SortFields: field:type,field:type[,noscore][,nomaxscore] + * + * If noscore is present, then we turn off score tracking + * in {@link org.apache.lucene.search.TopFieldCollector}. + * If nomaxscore is present, then we turn off maxScore tracking + * in {@link org.apache.lucene.search.TopFieldCollector}. * * name,byline:int,subject:auto * @@ -43,11 +50,18 @@ public class SearchWithSortTask extends ReadTask { super.setParams(sortField); String[] fields = sortField.split(","); SortField[] sortFields = new SortField[fields.length]; + int upto = 0; for (int i = 0; i < fields.length; i++) { String field = fields[i]; SortField sortField0; if (field.equals("doc")) { sortField0 = SortField.FIELD_DOC; + } else if (field.equals("noscore")) { + doScore = false; + continue; + } else if (field.equals("nomaxscore")) { + doMaxScore = false; + continue; } else { int index = field.lastIndexOf(":"); String fieldName; @@ -62,7 +76,13 @@ public class SearchWithSortTask extends ReadTask { int type = getType(typeString); sortField0 = new SortField(fieldName, type); } - sortFields[i] = sortField0; + sortFields[upto++] = sortField0; + } + + if (upto < sortFields.length) { + SortField[] newSortFields = new SortField[upto]; + System.arraycopy(sortFields, 0, newSortFields, 0, upto); + sortFields = newSortFields; } this.sort = new Sort(sortFields); } @@ -107,6 +127,14 @@ public class SearchWithSortTask extends ReadTask { return false; } + public boolean withScore() { + return doScore; + } + + public boolean withMaxScore() { + return doMaxScore; + } + public Sort getSort() { if (sort == null) { throw new IllegalStateException("No sort field was set"); diff --git a/contrib/miscellaneous/src/test/org/apache/lucene/index/TestFieldNormModifier.java b/contrib/miscellaneous/src/test/org/apache/lucene/index/TestFieldNormModifier.java index 487db91af65..7bc5da71e62 100644 --- a/contrib/miscellaneous/src/test/org/apache/lucene/index/TestFieldNormModifier.java +++ b/contrib/miscellaneous/src/test/org/apache/lucene/index/TestFieldNormModifier.java @@ -22,16 +22,18 @@ import java.util.Arrays; import junit.framework.TestCase; -import org.apache.lucene.search.IndexSearcher; -import org.apache.lucene.search.MultiReaderHitCollector; -import org.apache.lucene.search.Similarity; -import org.apache.lucene.search.DefaultSimilarity; -import org.apache.lucene.search.TermQuery; -import org.apache.lucene.store.RAMDirectory; -import org.apache.lucene.store.Directory; import org.apache.lucene.analysis.SimpleAnalyzer; import org.apache.lucene.document.Document; import org.apache.lucene.document.Field; +import org.apache.lucene.index.IndexWriter.MaxFieldLength; +import org.apache.lucene.search.Collector; +import org.apache.lucene.search.DefaultSimilarity; +import org.apache.lucene.search.IndexSearcher; +import org.apache.lucene.search.Scorer; +import org.apache.lucene.search.Similarity; +import org.apache.lucene.search.TermQuery; +import org.apache.lucene.store.Directory; +import org.apache.lucene.store.RAMDirectory; /** * Tests changing of field norms with a custom similarity and with fake norms. @@ -52,12 +54,12 @@ public class TestFieldNormModifier extends TestCase { /** inverts the normal notion of lengthNorm */ public static Similarity s = new DefaultSimilarity() { public float lengthNorm(String fieldName, int numTokens) { - return (float)numTokens; + return numTokens; } }; public void setUp() throws Exception { - IndexWriter writer = new IndexWriter(store, new SimpleAnalyzer(), true); + IndexWriter writer = new IndexWriter(store, new SimpleAnalyzer(), true, MaxFieldLength.UNLIMITED); for (int i = 0; i < NUM_DOCS; i++) { Document d = new Document(); @@ -123,14 +125,19 @@ public class TestFieldNormModifier extends TestCase { float lastScore = 0.0f; // default similarity should put docs with shorter length first - searcher.search(new TermQuery(new Term("field", "word")), new MultiReaderHitCollector() { - private int docBase = -1; - public final void collect(int doc, float score) { - scores[doc + docBase] = score; + searcher.search(new TermQuery(new Term("field", "word")), new Collector() { + private int docBase = 0; + private Scorer scorer; + + public final void collect(int doc) throws IOException { + scores[doc + docBase] = scorer.score(); } public void setNextReader(IndexReader reader, int docBase) { this.docBase = docBase; } + public void setScorer(Scorer scorer) throws IOException { + this.scorer = scorer; + } }); searcher.close(); @@ -147,14 +154,18 @@ public class TestFieldNormModifier extends TestCase { // new norm (with default similarity) should put longer docs first searcher = new IndexSearcher(store); - searcher.search(new TermQuery(new Term("field", "word")), new MultiReaderHitCollector() { - private int docBase = -1; - public final void collect(int doc, float score) { - scores[doc + docBase] = score; + searcher.search(new TermQuery(new Term("field", "word")), new Collector() { + private int docBase = 0; + private Scorer scorer; + public final void collect(int doc) throws IOException { + scores[doc + docBase] = scorer.score(); } public void setNextReader(IndexReader reader, int docBase) { this.docBase = docBase; } + public void setScorer(Scorer scorer) throws IOException { + this.scorer = scorer; + } }); searcher.close(); @@ -188,15 +199,18 @@ public class TestFieldNormModifier extends TestCase { float lastScore = 0.0f; // default similarity should return the same score for all documents for this query - searcher.search(new TermQuery(new Term("untokfield", "20061212")), new MultiReaderHitCollector() { - private int docBase = -1; - private int lastMax; - public final void collect(int doc, float score) { - scores[doc + docBase] = score; + searcher.search(new TermQuery(new Term("untokfield", "20061212")), new Collector() { + private int docBase = 0; + private Scorer scorer; + public final void collect(int doc) throws IOException { + scores[doc + docBase] = scorer.score(); } public void setNextReader(IndexReader reader, int docBase) { this.docBase = docBase; } + public void setScorer(Scorer scorer) throws IOException { + this.scorer = scorer; + } }); searcher.close(); diff --git a/contrib/miscellaneous/src/test/org/apache/lucene/misc/TestLengthNormModifier.java b/contrib/miscellaneous/src/test/org/apache/lucene/misc/TestLengthNormModifier.java index 9e69153406d..95610db7ae8 100644 --- a/contrib/miscellaneous/src/test/org/apache/lucene/misc/TestLengthNormModifier.java +++ b/contrib/miscellaneous/src/test/org/apache/lucene/misc/TestLengthNormModifier.java @@ -17,21 +17,26 @@ package org.apache.lucene.misc; * limitations under the License. */ +import java.io.IOException; + import junit.framework.TestCase; -import org.apache.lucene.index.Term; -import org.apache.lucene.index.IndexWriter; -import org.apache.lucene.index.IndexReader; -import org.apache.lucene.search.IndexSearcher; -import org.apache.lucene.search.MultiReaderHitCollector; -import org.apache.lucene.search.Similarity; -import org.apache.lucene.search.DefaultSimilarity; -import org.apache.lucene.search.TermQuery; -import org.apache.lucene.store.RAMDirectory; -import org.apache.lucene.store.Directory; import org.apache.lucene.analysis.SimpleAnalyzer; import org.apache.lucene.document.Document; import org.apache.lucene.document.Field; +import org.apache.lucene.index.FieldNormModifier; +import org.apache.lucene.index.IndexReader; +import org.apache.lucene.index.IndexWriter; +import org.apache.lucene.index.Term; +import org.apache.lucene.index.IndexWriter.MaxFieldLength; +import org.apache.lucene.search.Collector; +import org.apache.lucene.search.DefaultSimilarity; +import org.apache.lucene.search.IndexSearcher; +import org.apache.lucene.search.Scorer; +import org.apache.lucene.search.Similarity; +import org.apache.lucene.search.TermQuery; +import org.apache.lucene.store.Directory; +import org.apache.lucene.store.RAMDirectory; /** * Tests changing the norms after changing the simularity @@ -52,13 +57,12 @@ public class TestLengthNormModifier extends TestCase { /** inverts the normal notion of lengthNorm */ public static Similarity s = new DefaultSimilarity() { public float lengthNorm(String fieldName, int numTokens) { - return (float)numTokens; + return numTokens; } }; public void setUp() throws Exception { - IndexWriter writer = new - IndexWriter(store, new SimpleAnalyzer(), true); + IndexWriter writer = new IndexWriter(store, new SimpleAnalyzer(), true, MaxFieldLength.UNLIMITED); for (int i = 0; i < NUM_DOCS; i++) { Document d = new Document(); @@ -79,9 +83,9 @@ public class TestLengthNormModifier extends TestCase { } public void testMissingField() { - LengthNormModifier lnm = new LengthNormModifier(store, s); + FieldNormModifier fnm = new FieldNormModifier(store, s); try { - lnm.reSetNorms("nobodyherebutuschickens"); + fnm.reSetNorms("nobodyherebutuschickens"); } catch (Exception e) { assertNull("caught something", e); } @@ -100,9 +104,9 @@ public class TestLengthNormModifier extends TestCase { r.close(); - LengthNormModifier lnm = new LengthNormModifier(store, s); + FieldNormModifier fnm = new FieldNormModifier(store, s); try { - lnm.reSetNorms("nonorm"); + fnm.reSetNorms("nonorm"); } catch (Exception e) { assertNull("caught something", e); } @@ -129,14 +133,18 @@ public class TestLengthNormModifier extends TestCase { // default similarity should put docs with shorter length first searcher = new IndexSearcher(store); - searcher.search(new TermQuery(new Term("field", "word")), new MultiReaderHitCollector() { - private int docBase = -1; - public final void collect(int doc, float score) { - scores[doc + docBase] = score; + searcher.search(new TermQuery(new Term("field", "word")), new Collector() { + private int docBase = 0; + private Scorer scorer; + public final void collect(int doc) throws IOException { + scores[doc + docBase] = scorer.score(); } public void setNextReader(IndexReader reader, int docBase) { this.docBase = docBase; } + public void setScorer(Scorer scorer) throws IOException { + this.scorer = scorer; + } }); searcher.close(); @@ -151,22 +159,26 @@ public class TestLengthNormModifier extends TestCase { // override the norms to be inverted Similarity s = new DefaultSimilarity() { public float lengthNorm(String fieldName, int numTokens) { - return (float)numTokens; + return numTokens; } }; - LengthNormModifier lnm = new LengthNormModifier(store, s); - lnm.reSetNorms("field"); + FieldNormModifier fnm = new FieldNormModifier(store, s); + fnm.reSetNorms("field"); // new norm (with default similarity) should put longer docs first searcher = new IndexSearcher(store); - searcher.search(new TermQuery(new Term("field", "word")), new MultiReaderHitCollector() { - private int docBase = -1; - public final void collect(int doc, float score) { - scores[doc + docBase] = score; + searcher.search(new TermQuery(new Term("field", "word")), new Collector() { + private int docBase = 0; + private Scorer scorer; + public final void collect(int doc) throws IOException { + scores[doc + docBase] = scorer.score(); } public void setNextReader(IndexReader reader, int docBase) { this.docBase = docBase; } + public void setScorer(Scorer scorer) throws IOException { + this.scorer = scorer; + } }); searcher.close(); diff --git a/contrib/spatial/src/java/org/apache/lucene/spatial/tier/DistanceFieldComparatorSource.java b/contrib/spatial/src/java/org/apache/lucene/spatial/tier/DistanceFieldComparatorSource.java index 902f88ea67f..8050c46dd27 100644 --- a/contrib/spatial/src/java/org/apache/lucene/spatial/tier/DistanceFieldComparatorSource.java +++ b/contrib/spatial/src/java/org/apache/lucene/spatial/tier/DistanceFieldComparatorSource.java @@ -21,7 +21,6 @@ import java.io.IOException; import org.apache.lucene.index.IndexReader; import org.apache.lucene.search.Filter; -import org.apache.lucene.search.ScoreDoc; import org.apache.lucene.search.FieldComparator; import org.apache.lucene.search.FieldComparatorSource; import org.apache.lucene.search.SortField; @@ -49,8 +48,7 @@ public class DistanceFieldComparatorSource extends FieldComparatorSource { } @Override - public FieldComparator newComparator(String fieldname, - IndexReader[] subReaders, int numHits, int sortPos, boolean reversed) + public FieldComparator newComparator(String fieldname, int numHits, int sortPos, boolean reversed) throws IOException { dsdlc = new DistanceScoreDocLookupComparator(distanceFilter, numHits); return dsdlc; @@ -87,7 +85,7 @@ public class DistanceFieldComparatorSource extends FieldComparatorSource { } @Override - public int compareBottom(int doc, float score) { + public int compareBottom(int doc) { final double v2 = distanceFilter.getDistance(doc); if (bottom > v2) { return 1; @@ -98,7 +96,7 @@ public class DistanceFieldComparatorSource extends FieldComparatorSource { } @Override - public void copy(int slot, int doc, float score) { + public void copy(int slot, int doc) { values[slot] = distanceFilter.getDistance(doc); } diff --git a/src/java/org/apache/lucene/search/BooleanScorer.java b/src/java/org/apache/lucene/search/BooleanScorer.java index 066e57b6744..96c0e39f8a4 100644 --- a/src/java/org/apache/lucene/search/BooleanScorer.java +++ b/src/java/org/apache/lucene/search/BooleanScorer.java @@ -80,11 +80,11 @@ final class BooleanScorer extends Scorer { public boolean done; public boolean required = false; public boolean prohibited = false; - public MultiReaderHitCollector collector; + public Collector collector; public SubScorer next; public SubScorer(Scorer scorer, boolean required, boolean prohibited, - MultiReaderHitCollector collector, SubScorer next) + Collector collector, SubScorer next) throws IOException { this.scorer = scorer; this.done = !scorer.next(); @@ -128,18 +128,32 @@ final class BooleanScorer extends Scorer { private int end; private Bucket current; + /** @deprecated use {@link #score(Collector)} instead. */ public void score(HitCollector hc) throws IOException { next(); score(hc, Integer.MAX_VALUE); } + + public void score(Collector collector) throws IOException { + next(); + score(collector, Integer.MAX_VALUE); + } + /** @deprecated use {@link #score(Collector, int)} instead. */ protected boolean score(HitCollector hc, int max) throws IOException { + return score(new HitCollectorWrapper(hc), max); + } + + protected boolean score(Collector collector, int max) throws IOException { if (coordFactors == null) computeCoordFactors(); boolean more; Bucket tmp; + BucketScorer bs = new BucketScorer(); + // The internal loop will set the score and doc before calling collect. + collector.setScorer(bs); do { bucketTable.first = null; @@ -158,7 +172,9 @@ final class BooleanScorer extends Scorer { } if (current.coord >= minNrShouldMatch) { - hc.collect(current.doc, current.score * coordFactors[current.coord]); + bs.score = current.score * coordFactors[current.coord]; + bs.doc = current.doc; + collector.collect(current.doc); } } @@ -210,8 +226,9 @@ final class BooleanScorer extends Scorer { end += BucketTable.SIZE; for (SubScorer sub = scorers; sub != null; sub = sub.next) { Scorer scorer = sub.scorer; + sub.collector.setScorer(scorer); while (!sub.done && scorer.doc() < end) { - sub.collector.collect(scorer.doc(), scorer.score()); + sub.collector.collect(scorer.doc()); sub.done = !scorer.next(); } if (!sub.done) { @@ -237,6 +254,42 @@ final class BooleanScorer extends Scorer { Bucket next; // next valid bucket } + // An internal class which is used in score(Collector, int) for setting the + // current score. This is required since Collector exposes a setScorer method + // and implementations that need the score will call scorer.score(). + // Therefore the only methods that are implemented are score() and doc(). + private static final class BucketScorer extends Scorer { + + float score; + int doc; + + public BucketScorer() { + super(null); + } + + + public Explanation explain(int doc) throws IOException { + return null; + } + + public float score() throws IOException { + return score; + } + + public int doc() { + return doc; + } + + public boolean next() throws IOException { + return false; + } + + public boolean skipTo(int target) throws IOException { + return false; + } + + } + /** A simple hash table of document scores within a range. */ static final class BucketTable { public static final int SIZE = 1 << 11; @@ -249,19 +302,25 @@ final class BooleanScorer extends Scorer { public final int size() { return SIZE; } - public MultiReaderHitCollector newCollector(int mask) { - return new Collector(mask, this); + public Collector newCollector(int mask) { + return new BolleanScorerCollector(mask, this); } } - static final class Collector extends MultiReaderHitCollector { + private static final class BolleanScorerCollector extends Collector { private BucketTable bucketTable; private int mask; - public Collector(int mask, BucketTable bucketTable) { + private Scorer scorer; + + public BolleanScorerCollector(int mask, BucketTable bucketTable) { this.mask = mask; this.bucketTable = bucketTable; } - public final void collect(final int doc, final float score) { + public void setScorer(Scorer scorer) throws IOException { + this.scorer = scorer; + } + + public final void collect(final int doc) throws IOException { final BucketTable table = bucketTable; final int i = doc & BucketTable.MASK; Bucket bucket = table.buckets[i]; @@ -270,14 +329,14 @@ final class BooleanScorer extends Scorer { if (bucket.doc != doc) { // invalid bucket bucket.doc = doc; // set doc - bucket.score = score; // initialize score + bucket.score = scorer.score(); // initialize score bucket.bits = mask; // initialize mask bucket.coord = 1; // initialize coord bucket.next = table.first; // push onto valid list table.first = bucket; } else { // valid bucket - bucket.score += score; // increment score + bucket.score += scorer.score(); // increment score bucket.bits |= mask; // add bits in mask bucket.coord++; // increment coord } diff --git a/src/java/org/apache/lucene/search/BooleanScorer2.java b/src/java/org/apache/lucene/search/BooleanScorer2.java index 1572e186cb9..98cf6ea7950 100644 --- a/src/java/org/apache/lucene/search/BooleanScorer2.java +++ b/src/java/org/apache/lucene/search/BooleanScorer2.java @@ -300,8 +300,17 @@ class BooleanScorer2 extends Scorer { * @param hc The collector to which all matching documents are passed through * {@link HitCollector#collect(int, float)}. *
When this method is used the {@link #explain(int)} method should not be used. + * @deprecated use {@link #score(Collector)} instead. */ public void score(HitCollector hc) throws IOException { + score(new HitCollectorWrapper(hc)); + } + + /** Scores and collects all matching documents. + * @param collector The collector to which all matching documents are passed through. + *
When this method is used the {@link #explain(int)} method should not be used. + */ + public void score(Collector collector) throws IOException { if (allowDocsOutOfOrder && requiredScorers.size() == 0 && prohibitedScorers.size() < 32) { // fall back to BooleanScorer, scores documents somewhat out of order @@ -314,13 +323,14 @@ class BooleanScorer2 extends Scorer { while (si.hasNext()) { bs.add((Scorer) si.next(), false /* required */, true /* prohibited */); } - bs.score(hc); + bs.score(collector); } else { if (countingSumScorer == null) { initCountingSumScorer(); } + collector.setScorer(this); while (countingSumScorer.next()) { - hc.collect(countingSumScorer.doc(), score()); + collector.collect(countingSumScorer.doc()); } } } @@ -332,12 +342,25 @@ class BooleanScorer2 extends Scorer { * {@link HitCollector#collect(int, float)}. * @param max Do not score documents past this. * @return true if more matching documents may remain. + * @deprecated use {@link #score(Collector, int)} instead. */ protected boolean score(HitCollector hc, int max) throws IOException { + return score(new HitCollectorWrapper(hc), max); + } + + /** Expert: Collects matching documents in a range. + *
Note that {@link #next()} must be called once before this method is + * called for the first time. + * @param collector The collector to which all matching documents are passed through. + * @param max Do not score documents past this. + * @return true if more matching documents may remain. + */ + protected boolean score(Collector collector, int max) throws IOException { // null pointer exception when next() was not called before: int docNr = countingSumScorer.doc(); + collector.setScorer(this); while (docNr < max) { - hc.collect(docNr, score()); + collector.collect(docNr); if (! countingSumScorer.next()) { return false; } diff --git a/src/java/org/apache/lucene/search/Collector.java b/src/java/org/apache/lucene/search/Collector.java new file mode 100644 index 00000000000..c6089f7cd2a --- /dev/null +++ b/src/java/org/apache/lucene/search/Collector.java @@ -0,0 +1,160 @@ +package org.apache.lucene.search; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.io.IOException; + +import org.apache.lucene.index.IndexReader; + +/** + *

Expert: Collectors are primarily meant to be used to + * gather raw results from a search, and implement sorting + * or custom result filtering, collation, etc.

+ * + *

As of 2.9, this class replaces the deprecated + * HitCollector, and offers an API for efficient collection + * of hits across sequential {@link IndexReader}s. {@link + * IndexSearcher} advances the collector through each of the + * sub readers, in an arbitrary order. This results in a + * higher performance means of collection.

+ * + *

Lucene's core collectors are derived from Collector. + * Likely your application can use one of these classes, or + * subclass {@link TopDocsCollector}, instead of + * implementing Collector directly: + * + *

+ * + *

Collector decouples the score from the collected doc: + * the score computation is skipped entirely if it's not + * needed. Collectors that do need the score should + * implement the {@link #setScorer} method, to hold onto the + * passed {@link Scorer} instance, and call {@link + * Scorer#score()} within the collect method to compute the + * current hit's score. If your collector may request the + * score for a single hit multiple times, you should use + * {@link ScoreCachingWrappingScorer}.

+ * + *

NOTE: The doc that is passed to the collect + * method is relative to the current reader. If your + * collector needs to resolve this to the docID space of the + * Multi*Reader, you must re-base it by recording the + * docBase from the most recent setNextReader call. Here's + * a simple example showing how to collect docIDs into a + * BitSet:

+ * + *
+ * Searcher searcher = new IndexSearcher(indexReader);
+ * final BitSet bits = new BitSet(indexReader.maxDoc());
+ * searcher.search(query, new Collector() {
+ *   private int docBase;
+ * 
+ *   // ignore scorer
+ *   public void setScorer(Scorer scorer) {
+ *   }
+ * 
+ *   public void collect(int doc) {
+ *     bits.set(doc + docBase);
+ *   }
+ * 
+ *   public void setNextReader(IndexReader reader, int docBase) {
+ *     this.docBase = docBase;
+ *   }
+ * });
+ * 
+ * + *

Not all collectors will need to rebase the docID. For + * example, a collector that simply counts the total number + * of hits would skip it.

+ * + *

NOTE: Prior to 2.9, Lucene silently filtered + * out hits with score <= 0. As of 2.9, the core Collectors + * no longer do that. It's very unusual to have such hits + * (a negative query boost, or function query returning + * negative custom scores, could cause it to happen). If + * you need that behavior, use {@link + * PositiveScoresOnlyCollector}.

+ * + *

NOTE: This API is experimental and might change + * in incompatible ways in the next release.

+ */ +public abstract class Collector { + + /** + * Called before successive calls to {@link #collect(int)}. Implementations + * that need the score of the current document (passed-in to + * {@link #collect(int)}), should save the passed-in Scorer and call + * scorer.score() when needed. + */ + public abstract void setScorer(Scorer scorer) throws IOException; + + /** + * Called once for every document matching a query, with the unbased document + * number. + * + *

+ * Note: This is called in an inner search loop. For good search performance, + * implementations of this method should not call {@link Searcher#doc(int)} or + * {@link org.apache.lucene.index.IndexReader#document(int)} on every hit. + * Doing so can slow searches by an order of magnitude or more. + */ + public abstract void collect(int doc) throws IOException; + + /** + * Called before collecting from each IndexReader. All doc ids in + * {@link #collect(int)} will correspond to reader. + * + * Add docBase to the current IndexReaders internal document id to re-base ids + * in {@link #collect(int)}. + * + * @param reader + * next IndexReader + * @param docBase + */ + public abstract void setNextReader(IndexReader reader, int docBase) throws IOException; + +} diff --git a/src/java/org/apache/lucene/search/DisjunctionSumScorer.java b/src/java/org/apache/lucene/search/DisjunctionSumScorer.java index 0f741cd4e5c..faba7c17da9 100644 --- a/src/java/org/apache/lucene/search/DisjunctionSumScorer.java +++ b/src/java/org/apache/lucene/search/DisjunctionSumScorer.java @@ -112,10 +112,20 @@ class DisjunctionSumScorer extends Scorer { * @param hc The collector to which all matching documents are passed through * {@link HitCollector#collect(int, float)}. *
When this method is used the {@link #explain(int)} method should not be used. + * @deprecated use {@link #score(Collector)} instead. */ public void score(HitCollector hc) throws IOException { + score(new HitCollectorWrapper(hc)); + } + + /** Scores and collects all matching documents. + * @param collector The collector to which all matching documents are passed through. + *
When this method is used the {@link #explain(int)} method should not be used. + */ + public void score(Collector collector) throws IOException { + collector.setScorer(this); while (next()) { - hc.collect(currentDoc, currentScore); + collector.collect(currentDoc); } } @@ -126,10 +136,23 @@ class DisjunctionSumScorer extends Scorer { * {@link HitCollector#collect(int, float)}. * @param max Do not score documents past this. * @return true if more matching documents may remain. + * @deprecated use {@link #score(Collector, int)} instead. */ protected boolean score(HitCollector hc, int max) throws IOException { + return score(new HitCollectorWrapper(hc), max); + } + + /** Expert: Collects matching documents in a range. Hook for optimization. + * Note that {@link #next()} must be called once before this method is called + * for the first time. + * @param collector The collector to which all matching documents are passed through. + * @param max Do not score documents past this. + * @return true if more matching documents may remain. + */ + protected boolean score(Collector collector, int max) throws IOException { + collector.setScorer(this); while (currentDoc < max) { - hc.collect(currentDoc, currentScore); + collector.collect(currentDoc); if (!next()) { return false; } diff --git a/src/java/org/apache/lucene/search/FieldComparator.java b/src/java/org/apache/lucene/search/FieldComparator.java index 9d3ed99d09a..5991c031f7a 100644 --- a/src/java/org/apache/lucene/search/FieldComparator.java +++ b/src/java/org/apache/lucene/search/FieldComparator.java @@ -62,11 +62,11 @@ public abstract class FieldComparator { return values[slot1] - values[slot2]; } - public int compareBottom(int doc, float score) { + public int compareBottom(int doc) { return bottom - currentReaderValues[doc]; } - public void copy(int slot, int doc, float score) { + public void copy(int slot, int doc) { values[slot] = currentReaderValues[doc]; } @@ -87,7 +87,7 @@ public abstract class FieldComparator { public Comparable value(int slot) { return new Byte(values[slot]); } - }; + } /** Sorts by ascending docID */ public static final class DocComparator extends FieldComparator { @@ -104,12 +104,12 @@ public abstract class FieldComparator { return docIDs[slot1] - docIDs[slot2]; } - public int compareBottom(int doc, float score) { + public int compareBottom(int doc) { // No overflow risk because docIDs are non-negative return bottom - (docBase + doc); } - public void copy(int slot, int doc, float score) { + public void copy(int slot, int doc) { docIDs[slot] = docBase + doc; } @@ -131,7 +131,7 @@ public abstract class FieldComparator { public Comparable value(int slot) { return new Integer(docIDs[slot]); } - }; + } /** Parses field's values as double (using {@link * ExtendedFieldCache#getDoubles} and sorts by ascending value */ @@ -160,7 +160,7 @@ public abstract class FieldComparator { } } - public int compareBottom(int doc, float score) { + public int compareBottom(int doc) { final double v2 = currentReaderValues[doc]; if (bottom > v2) { return 1; @@ -171,7 +171,7 @@ public abstract class FieldComparator { } } - public void copy(int slot, int doc, float score) { + public void copy(int slot, int doc) { values[slot] = currentReaderValues[doc]; } @@ -192,7 +192,7 @@ public abstract class FieldComparator { public Comparable value(int slot) { return new Double(values[slot]); } - }; + } /** Parses field's values as float (using {@link * FieldCache#getFloats} and sorts by ascending value */ @@ -223,7 +223,7 @@ public abstract class FieldComparator { } } - public int compareBottom(int doc, float score) { + public int compareBottom(int doc) { // TODO: are there sneaky non-branch ways to compute // sign of float? final float v2 = currentReaderValues[doc]; @@ -236,7 +236,7 @@ public abstract class FieldComparator { } } - public void copy(int slot, int doc, float score) { + public void copy(int slot, int doc) { values[slot] = currentReaderValues[doc]; } @@ -256,7 +256,7 @@ public abstract class FieldComparator { public Comparable value(int slot) { return new Float(values[slot]); } - }; + } /** Parses field's values as int (using {@link * FieldCache#getInts} and sorts by ascending value */ @@ -289,7 +289,7 @@ public abstract class FieldComparator { } } - public int compareBottom(int doc, float score) { + public int compareBottom(int doc) { // TODO: there are sneaky non-branch ways to compute // -1/+1/0 sign // Cannot return bottom - values[slot2] because that @@ -304,7 +304,7 @@ public abstract class FieldComparator { } } - public void copy(int slot, int doc, float score) { + public void copy(int slot, int doc) { values[slot] = currentReaderValues[doc]; } @@ -324,7 +324,7 @@ public abstract class FieldComparator { public Comparable value(int slot) { return new Integer(values[slot]); } - }; + } /** Parses field's values as long (using {@link * ExtendedFieldCache#getLongs} and sorts by ascending value */ @@ -355,7 +355,7 @@ public abstract class FieldComparator { } } - public int compareBottom(int doc, float score) { + public int compareBottom(int doc) { // TODO: there are sneaky non-branch ways to compute // -1/+1/0 sign final long v2 = currentReaderValues[doc]; @@ -368,7 +368,7 @@ public abstract class FieldComparator { } } - public void copy(int slot, int doc, float score) { + public void copy(int slot, int doc) { values[slot] = currentReaderValues[doc]; } @@ -389,7 +389,7 @@ public abstract class FieldComparator { public Comparable value(int slot) { return new Long(values[slot]); } - }; + } /** Sorts by descending relevance. NOTE: if you are * sorting only by descending relevance and then @@ -400,7 +400,8 @@ public abstract class FieldComparator { public static final class RelevanceComparator extends FieldComparator { private final float[] scores; private float bottom; - + private Scorer scorer; + RelevanceComparator(int numHits) { scores = new float[numHits]; } @@ -408,27 +409,16 @@ public abstract class FieldComparator { public int compare(int slot1, int slot2) { final float score1 = scores[slot1]; final float score2 = scores[slot2]; - if (score1 > score2) { - return -1; - } else if (score1 < score2) { - return 1; - } else { - return 0; - } + return score1 > score2 ? -1 : (score1 < score2 ? 1 : 0); } - public int compareBottom(int doc, float score) { - if (bottom > score) { - return -1; - } else if (bottom < score) { - return 1; - } else { - return 0; - } + public int compareBottom(int doc) throws IOException { + float score = scorer.score(); + return bottom > score ? -1 : (bottom < score ? 1 : 0); } - public void copy(int slot, int doc, float score) { - scores[slot] = score; + public void copy(int slot, int doc) throws IOException { + scores[slot] = scorer.score(); } public void setNextReader(IndexReader reader, int docBase, int numSlotsFull) { @@ -438,6 +428,12 @@ public abstract class FieldComparator { this.bottom = scores[bottom]; } + public void setScorer(Scorer scorer) { + // wrap with a ScoreCachingWrappingScorer so that successive calls to + // score() will not incur score computation over and over again. + this.scorer = new ScoreCachingWrappingScorer(scorer); + } + public int sortType() { return SortField.SCORE; } @@ -445,7 +441,7 @@ public abstract class FieldComparator { public Comparable value(int slot) { return new Float(scores[slot]); } - }; + } /** Parses field's values as short (using {@link * FieldCache#getShorts} and sorts by ascending value */ @@ -466,11 +462,11 @@ public abstract class FieldComparator { return values[slot1] - values[slot2]; } - public int compareBottom(int doc, float score) { + public int compareBottom(int doc) { return bottom - currentReaderValues[doc]; } - public void copy(int slot, int doc, float score) { + public void copy(int slot, int doc) { values[slot] = currentReaderValues[doc]; } @@ -491,7 +487,7 @@ public abstract class FieldComparator { public Comparable value(int slot) { return new Short(values[slot]); } - }; + } /** Sorts by a field's value using the Collator for a * given Locale.*/ @@ -523,7 +519,7 @@ public abstract class FieldComparator { return collator.compare(val1, val2); } - public int compareBottom(int doc, float score) { + public int compareBottom(int doc) { final String val2 = currentReaderValues[doc]; if (bottom == null) { if (val2 == null) { @@ -536,7 +532,7 @@ public abstract class FieldComparator { return collator.compare(bottom, val2); } - public void copy(int slot, int doc, float score) { + public void copy(int slot, int doc) { values[slot] = currentReaderValues[doc]; } @@ -556,7 +552,7 @@ public abstract class FieldComparator { public Comparable value(int slot) { return values[slot]; } - }; + } // NOTE: there were a number of other interesting String // comparators explored, but this one seemed to perform @@ -608,7 +604,7 @@ public abstract class FieldComparator { return val1.compareTo(val2); } - public int compareBottom(int doc, float score) { + public int compareBottom(int doc) { assert bottomSlot != -1; int order = this.order[doc]; final int cmp = bottomOrd - order; @@ -659,7 +655,7 @@ public abstract class FieldComparator { ords[slot] = index; } - public void copy(int slot, int doc, float score) { + public void copy(int slot, int doc) { final int ord = order[doc]; ords[slot] = ord; assert ord >= 0; @@ -709,7 +705,7 @@ public abstract class FieldComparator { public String getField() { return field; } - }; + } /** Sorts by field's natural String sort order. All * comparisons are done using String.compareTo, which is @@ -742,7 +738,7 @@ public abstract class FieldComparator { return val1.compareTo(val2); } - public int compareBottom(int doc, float score) { + public int compareBottom(int doc) { final String val2 = currentReaderValues[doc]; if (bottom == null) { if (val2 == null) { @@ -755,7 +751,7 @@ public abstract class FieldComparator { return bottom.compareTo(val2); } - public void copy(int slot, int doc, float score) { + public void copy(int slot, int doc) { values[slot] = currentReaderValues[doc]; } @@ -775,11 +771,11 @@ public abstract class FieldComparator { public Comparable value(int slot) { return values[slot]; } - }; + } final protected static int binarySearch(String[] a, String key) { return binarySearch(a, key, 0, a.length-1); - }; + } final protected static int binarySearch(String[] a, String key, int low, int high) { @@ -801,7 +797,7 @@ public abstract class FieldComparator { return mid; } return -(low + 1); - }; + } /** * Compare hit at slot1 with hit at slot2. Return @@ -827,22 +823,20 @@ public abstract class FieldComparator { * only invoked after setBottom has been called. * * @param doc that was hit - * @param score of the hit * @return any N < 0 if the doc's value is sorted after * the bottom entry (not competitive), any N > 0 if the * doc's value is sorted before the bottom entry and 0 if * they are equal. */ - public abstract int compareBottom(int doc, float score); + public abstract int compareBottom(int doc) throws IOException; /** * Copy hit (doc,score) to hit slot. * * @param slot which slot to copy the hit to * @param doc docID relative to current reader - * @param score hit score */ - public abstract void copy(int slot, int doc, float score); + public abstract void copy(int slot, int doc) throws IOException; /** * Set a new Reader. All doc correspond to the current Reader. @@ -854,6 +848,12 @@ public abstract class FieldComparator { */ public abstract void setNextReader(IndexReader reader, int docBase, int numSlotsFull) throws IOException; + /** Sets the Scorer to use in case a document's score is needed. */ + public void setScorer(Scorer scorer) { + // Empty implementation since most comparators don't need the score. This + // can be overridden by those that need it. + } + /** * @return SortField.TYPE */ diff --git a/src/java/org/apache/lucene/search/FieldComparatorSource.java b/src/java/org/apache/lucene/search/FieldComparatorSource.java index d1df034bc89..9a5613ccc0f 100644 --- a/src/java/org/apache/lucene/search/FieldComparatorSource.java +++ b/src/java/org/apache/lucene/search/FieldComparatorSource.java @@ -18,7 +18,6 @@ package org.apache.lucene.search; */ import java.io.IOException; -import org.apache.lucene.index.IndexReader; /** * Provides a {@link FieldComparator} for custom field sorting. @@ -38,6 +37,6 @@ public abstract class FieldComparatorSource { * @throws IOException * If an error occurs reading the index. */ - public abstract FieldComparator newComparator(String fieldname, IndexReader[] subReaders, int numHits, int sortPos, boolean reversed) + public abstract FieldComparator newComparator(String fieldname, int numHits, int sortPos, boolean reversed) throws IOException; } diff --git a/src/java/org/apache/lucene/search/FieldValueHitQueue.java b/src/java/org/apache/lucene/search/FieldValueHitQueue.java index 60960f9af56..90652b29b86 100644 --- a/src/java/org/apache/lucene/search/FieldValueHitQueue.java +++ b/src/java/org/apache/lucene/search/FieldValueHitQueue.java @@ -17,13 +17,13 @@ package org.apache.lucene.search; * limitations under the License. */ +import java.io.IOException; + import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.Term; import org.apache.lucene.index.TermEnum; import org.apache.lucene.util.PriorityQueue; -import java.io.IOException;; - /** * Expert: A hit queue for sorting by hits by terms in more than one field. * Uses FieldCache.DEFAULT for maintaining @@ -32,12 +32,12 @@ import java.io.IOException;; * NOTE: This API is experimental and might change in * incompatible ways in the next release. * - * @since lucene 2.9 + * @since 2.9 * @version $Id: * @see Searcher#search(Query,Filter,int,Sort) * @see FieldCache */ -public class FieldValueHitQueue extends PriorityQueue { +public abstract class FieldValueHitQueue extends PriorityQueue { final static class Entry { int slot; @@ -56,136 +56,185 @@ public class FieldValueHitQueue extends PriorityQueue { } /** - * Creates a hit queue sorted by the given list of fields. - * @param fields SortField array we are sorting by in - * priority order (highest priority first); cannot be null or empty - * @param size The number of hits to retain. Must be - * greater than zero. - * @param subReaders Array of IndexReaders we will search, - * in order that they will be searched - * @throws IOException + * An implementation of {@link FieldValueHitQueue} which is optimized in case + * there is just one comparator. */ - public FieldValueHitQueue(SortField[] fields, int size, IndexReader[] subReaders) throws IOException { - numComparators = fields.length; - comparators = new FieldComparator[numComparators]; - reverseMul = new int[numComparators]; + private static final class OneComparatorFieldValueHitQueue extends FieldValueHitQueue { - if (fields.length == 0) { - throw new IllegalArgumentException("Sort must contain at least one field"); - } - - this.fields = fields; - for (int i=0; ia is less relevant than b. + * @param a ScoreDoc + * @param b ScoreDoc + * @return true if document a should be sorted after document b. + */ + protected boolean lessThan(final Object a, final Object b) { + final Entry hitA = (Entry) a; + final Entry hitB = (Entry) b; - initialize(size); - } - - /** Stores a comparator corresponding to each field being sorted by */ - private final FieldComparator[] comparators; - private final FieldComparator comparator1; - private final int numComparators; - private final int[] reverseMul; - private final int reverseMul1; + assert hitA != hitB; + assert hitA.slot != hitB.slot; - FieldComparator[] getComparators() { - return comparators; - } - - int[] getReverseMul() { - return reverseMul; - } - - /** Stores the sort criteria being used. */ - private final SortField[] fields; - - /** - * Returns whether a is less relevant than b. - * @param a ScoreDoc - * @param b ScoreDoc - * @return true if document a should be sorted after document b. - */ - protected boolean lessThan (final Object a, final Object b) { - final Entry hitA = (Entry) a; - final Entry hitB = (Entry) b; - - assert hitA != hitB; - assert hitA.slot != hitB.slot; - - if (numComparators == 1) { - // Common case - final int c = reverseMul1 * comparator1.compare(hitA.slot, hitB.slot); + final int c = oneReverseMul * comparator.compare(hitA.slot, hitB.slot); if (c != 0) { return c > 0; } - } else { - // run comparators - for (int i=0; i hitB.docID; + } + + } + + /** + * An implementation of {@link FieldValueHitQueue} which is optimized in case + * there is more than one comparator. + */ + private static final class MultiComparatorsFieldValueHitQueue extends FieldValueHitQueue { + + public MultiComparatorsFieldValueHitQueue(SortField[] fields, int size) + throws IOException { + super(fields); + + int numComparators = comparators.length; + for (int i = 0; i < numComparators; ++i) { + SortField field = fields[i]; + + // AUTO is resolved before we are called + assert field.getType() != SortField.AUTO; + + reverseMul[i] = field.reverse ? -1 : 1; + comparators[i] = field.getComparator(size, i, field.reverse); + } + + initialize(size); + } + + protected boolean lessThan(Object a, Object b) { + final Entry hitA = (Entry) a; + final Entry hitB = (Entry) b; + + assert hitA != hitB; + assert hitA.slot != hitB.slot; + + int numComparators = comparators.length; + for (int i = 0; i < numComparators; ++i) { final int c = reverseMul[i] * comparators[i].compare(hitA.slot, hitB.slot); if (c != 0) { // Short circuit return c > 0; } } + + // avoid random sort order that could lead to duplicates (bug #31241): + return hitA.docID > hitB.docID; } - - // avoid random sort order that could lead to duplicates (bug #31241): - return hitA.docID > hitB.docID; + + } + + // prevent instantiation and extension. + private FieldValueHitQueue(SortField[] fields) { + // When we get here, fields.length is guaranteed to be > 0, therefore no + // need to check it again. + + // All these are required by this class's API - need to return arrays. + // Therefore even in the case of a single comparator, create an array + // anyway. + this.fields = fields; + int numComparators = fields.length; + comparators = new FieldComparator[numComparators]; + reverseMul = new int[numComparators]; } - /** - * Given a FieldDoc object, stores the values used - * to sort the given document. These values are not the raw - * values out of the index, but the internal representation - * of them. This is so the given search hit can be collated - * by a MultiSearcher with other search hits. - * @param doc The FieldDoc to store sort values into. - * @return The same FieldDoc passed in. + * Creates a hit queue sorted by the given list of fields. + * + * @param fields + * SortField array we are sorting by in priority order (highest + * priority first); cannot be null or empty + * @param size + * The number of hits to retain. Must be greater than zero. + * @throws IOException + */ + public static FieldValueHitQueue create(SortField[] fields, int size) throws IOException { + + if (fields.length == 0) { + throw new IllegalArgumentException("Sort must contain at least one field"); + } + + if (fields.length == 1) { + return new OneComparatorFieldValueHitQueue(fields, size); + } else { + return new MultiComparatorsFieldValueHitQueue(fields, size); + } + } + + FieldComparator[] getComparators() { return comparators; } + + int[] getReverseMul() { return reverseMul; } + + /** Stores the sort criteria being used. */ + protected final SortField[] fields; + protected final FieldComparator[] comparators; + protected final int[] reverseMul; + + protected abstract boolean lessThan (final Object a, final Object b); + + /** + * Given a FieldDoc object, stores the values used to sort the given document. + * These values are not the raw values out of the index, but the internal + * representation of them. This is so the given search hit can be collated by + * a MultiSearcher with other search hits. + * + * @param doc + * The FieldDoc to store sort values into. + * @return The same FieldDoc passed in. * @see Searchable#search(Weight,Filter,int,Sort) */ - FieldDoc fillFields (final Entry entry) { + FieldDoc fillFields(final Entry entry) { final int n = comparators.length; final Comparable[] fields = new Comparable[n]; - for (int i=0; i 1.0f) doc.score /= maxscore; // normalize scores - return new FieldDoc(entry.docID, - entry.score, - fields); + return new FieldDoc(entry.docID, entry.score, fields); } - /** Returns the SortFields being used by this hit queue. */ SortField[] getFields() { return fields; } - /** - * Attempts to detect the given field type for an IndexReader. - */ + /** Attempts to detect the given field type for an IndexReader. */ static int detectFieldType(IndexReader reader, String fieldKey) throws IOException { - String field = ((String)fieldKey).intern(); - TermEnum enumerator = reader.terms (new Term (field)); + String field = fieldKey.intern(); + TermEnum enumerator = reader.terms(new Term(field)); try { Term term = enumerator.term(); if (term == null) { - throw new RuntimeException ("no terms in field " + field + " - cannot determine sort type"); + throw new RuntimeException("no terms in field " + field + " - cannot determine sort type"); } int ret = 0; if (term.field() == field) { @@ -219,7 +268,7 @@ public class FieldValueHitQueue extends PriorityQueue { } } } else { - throw new RuntimeException ("field \"" + field + "\" does not appear to be indexed"); + throw new RuntimeException("field \"" + field + "\" does not appear to be indexed"); } return ret; } finally { diff --git a/src/java/org/apache/lucene/search/HitCollector.java b/src/java/org/apache/lucene/search/HitCollector.java index bfa9ab05465..481e7598bf1 100644 --- a/src/java/org/apache/lucene/search/HitCollector.java +++ b/src/java/org/apache/lucene/search/HitCollector.java @@ -17,13 +17,15 @@ package org.apache.lucene.search; * limitations under the License. */ -/** Lower-level search API. - *
HitCollectors are primarily meant to be used to implement queries, - * sorting and filtering. See {@link - * MultiReaderHitCollector} for a lower level and - * higher performance (on a multi-segment index) API. +/** + * Lower-level search API.
+ * HitCollectors are primarily meant to be used to implement queries, sorting + * and filtering. See {@link Collector} for a lower level and higher performance + * (on a multi-segment index) API. + * * @see Searcher#search(Query,HitCollector) * @version $Id$ + * @deprecated Please use {@link Collector} instead. */ public abstract class HitCollector { /** Called once for every document matching a query, with the document diff --git a/src/java/org/apache/lucene/search/HitCollectorWrapper.java b/src/java/org/apache/lucene/search/HitCollectorWrapper.java new file mode 100644 index 00000000000..d7d9f21d936 --- /dev/null +++ b/src/java/org/apache/lucene/search/HitCollectorWrapper.java @@ -0,0 +1,50 @@ +package org.apache.lucene.search; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.io.IOException; + +import org.apache.lucene.index.IndexReader; + +/** + * Wrapper for ({@link HitCollector}) implementations, which + * simply re-bases the incoming docID before calling {@link + * HitCollector#collect}. + * @deprecated this class will be removed when {@link HitCollector} is removed. + */ +class HitCollectorWrapper extends Collector { + private HitCollector collector; + private int base = 0; + private Scorer scorer = null; + + public HitCollectorWrapper(HitCollector collector) { + this.collector = collector; + } + + public void setNextReader(IndexReader reader, int docBase) { + base = docBase; + } + + public void collect(int doc) throws IOException { + collector.collect(doc + base, scorer.score()); + } + + public void setScorer(Scorer scorer) throws IOException { + this.scorer = scorer; + } +} diff --git a/src/java/org/apache/lucene/search/IndexSearcher.java b/src/java/org/apache/lucene/search/IndexSearcher.java index 0e1e3c2a29b..d307b02c376 100644 --- a/src/java/org/apache/lucene/search/IndexSearcher.java +++ b/src/java/org/apache/lucene/search/IndexSearcher.java @@ -188,12 +188,16 @@ public class IndexSearcher extends Searcher { throws IOException { return search(weight, filter, nDocs, sort, true); } - - /** - * Just like {@link #search(Weight, Filter, int, Sort)}, - * but you choose whether or not the fields in the - * returned {@link FieldDoc} instances should be set by - * specifying fillFields. + + /** + * Just like {@link #search(Weight, Filter, int, Sort)}, but you choose + * whether or not the fields in the returned {@link FieldDoc} instances should + * be set by specifying fillFields.
+ * NOTE: currently, this method tracks document scores and sets them in + * the returned {@link FieldDoc}, however in 3.0 it will move to not track + * document scores. If document scores tracking is still needed, you can use + * {@link #search(Weight, Filter, Collector)} and pass in a + * {@link TopFieldCollector} instance. */ public TopFieldDocs search(Weight weight, Filter filter, final int nDocs, Sort sort, boolean fillFields) @@ -222,29 +226,32 @@ public class IndexSearcher extends Searcher { if (legacy) { // Search the single top-level reader - TopScoreDocCollector collector = new TopFieldDocCollector(reader, sort, nDocs); - collector.setNextReader(reader, 0); - doSearch(reader, weight, filter, collector); - return (TopFieldDocs) collector.topDocs(); - } else { - // Search each sub-reader - TopFieldCollector collector = new TopFieldCollector(sort, nDocs, sortedSubReaders, fillFields); - search(weight, filter, collector); + TopDocCollector collector = new TopFieldDocCollector(reader, sort, nDocs); + HitCollectorWrapper hcw = new HitCollectorWrapper(collector); + hcw.setNextReader(reader, 0); + doSearch(reader, weight, filter, hcw); return (TopFieldDocs) collector.topDocs(); } + // Search each sub-reader + // TODO: by default we should create a TopFieldCollector which does not + // track document scores and maxScore. Currently the default is set to true, + // however it will change in 3.0. + TopFieldCollector collector = TopFieldCollector.create(sort, nDocs, fillFields, true, true); + search(weight, filter, collector); + return (TopFieldDocs) collector.topDocs(); } // inherit javadoc + /** @deprecated use {@link #search(Weight, Filter, Collector)} instead. */ public void search(Weight weight, Filter filter, HitCollector results) throws IOException { - - final MultiReaderHitCollector collector; - if (results instanceof MultiReaderHitCollector) { - collector = (MultiReaderHitCollector) results; - } else { - collector = new MultiReaderCollectorWrapper(results); - } - + search(weight, filter, new HitCollectorWrapper(results)); + } + + // inherit javadoc + public void search(Weight weight, Filter filter, Collector collector) + throws IOException { + for (int i = 0; i < sortedSubReaders.length; i++) { // search each subreader collector.setNextReader(sortedSubReaders[i], sortedStarts[i]); doSearch(sortedSubReaders[i], weight, filter, collector); @@ -252,14 +259,14 @@ public class IndexSearcher extends Searcher { } private void doSearch(IndexReader reader, Weight weight, Filter filter, - final HitCollector results) throws IOException { + final Collector collector) throws IOException { Scorer scorer = weight.scorer(reader); if (scorer == null) return; if (filter == null) { - scorer.score(results); + scorer.score(collector); return; } @@ -267,6 +274,7 @@ public class IndexSearcher extends Searcher { boolean more = filterDocIdIterator.next() && scorer.skipTo(filterDocIdIterator.doc()); + collector.setScorer(scorer); while (more) { int filterDocId = filterDocIdIterator.doc(); if (filterDocId > scorer.doc() && !scorer.skipTo(filterDocId)) { @@ -274,7 +282,7 @@ public class IndexSearcher extends Searcher { } else { int scorerDocId = scorer.doc(); if (scorerDocId == filterDocId) { // permitted by filter - results.collect(scorerDocId, scorer.score()); + collector.collect(scorerDocId); more = filterDocIdIterator.next(); } else { more = filterDocIdIterator.skipTo(scorerDocId); @@ -295,26 +303,4 @@ public class IndexSearcher extends Searcher { public Explanation explain(Weight weight, int doc) throws IOException { return weight.explain(reader, doc); } - - /** - * Wrapper for non expert ({@link HitCollector}) - * implementations, which simply re-bases the incoming - * docID before calling {@link HitCollector#collect}. - */ - static class MultiReaderCollectorWrapper extends MultiReaderHitCollector { - private HitCollector collector; - private int base = -1; - - public MultiReaderCollectorWrapper(HitCollector collector) { - this.collector = collector; - } - - public void collect(int doc, float score) { - collector.collect(doc + base, score); - } - - public void setNextReader(IndexReader reader, int docBase) { - base = docBase; - } - } } diff --git a/src/java/org/apache/lucene/search/MultiReaderHitCollector.java b/src/java/org/apache/lucene/search/MultiReaderHitCollector.java deleted file mode 100644 index 1b5c86e6c08..00000000000 --- a/src/java/org/apache/lucene/search/MultiReaderHitCollector.java +++ /dev/null @@ -1,53 +0,0 @@ -package org.apache.lucene.search; - -/** - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -import java.io.IOException; - -import org.apache.lucene.index.IndexReader; - -/** - * Expert: A HitCollector that can be used to collect hits - * across sequential IndexReaders. For a Multi*Reader, this - * collector advances through each of the sub readers, in an - * arbitrary order. This results in a higher performance - * means of collection. - * - * NOTE: The doc that is passed to the collect method - * is relative to the current reader. You must re-base the - * doc, by recording the docBase from the last setNextReader - * call, to map it to the docID space of the - * Multi*Reader. - * - * NOTE: This API is experimental and might change in - * incompatible ways in the next release. - */ -public abstract class MultiReaderHitCollector extends HitCollector { - /** - * Called before collecting from each IndexReader. All doc - * ids in {@link #collect(int, float)} will correspond to reader. - * - * Add docBase to the current IndexReaders internal document id to - * re-base ids in {@link #collect(int, float)}. - * - * @param reader next IndexReader - * @param docBase - * @throws IOException - */ - public abstract void setNextReader(IndexReader reader, int docBase) throws IOException; -} diff --git a/src/java/org/apache/lucene/search/MultiSearcher.java b/src/java/org/apache/lucene/search/MultiSearcher.java index 111b47e786f..3edf542df80 100644 --- a/src/java/org/apache/lucene/search/MultiSearcher.java +++ b/src/java/org/apache/lucene/search/MultiSearcher.java @@ -97,9 +97,14 @@ public class MultiSearcher extends Searcher { throw new UnsupportedOperationException(); } + /** @deprecated use {@link #search(Weight, Filter, Collector)} instead. */ public void search(Weight weight, Filter filter, HitCollector results) { throw new UnsupportedOperationException(); } + + public void search(Weight weight, Filter filter, Collector collector) { + throw new UnsupportedOperationException(); + } public TopDocs search(Weight weight,Filter filter,int n) { throw new UnsupportedOperationException(); @@ -251,40 +256,31 @@ public class MultiSearcher extends Searcher { return new TopFieldDocs (totalHits, scoreDocs, hq.getFields(), maxScore); } - // inherit javadoc + /** @deprecated use {@link #search(Weight, Filter, Collector)} instead. */ public void search(Weight weight, Filter filter, final HitCollector results) throws IOException { + search(weight, filter, new HitCollectorWrapper(results)); + } + + // inherit javadoc + public void search(Weight weight, Filter filter, final Collector collector) + throws IOException { for (int i = 0; i < searchables.length; i++) { - + final int start = starts[i]; - - final MultiReaderHitCollector hc; - if (results instanceof MultiReaderHitCollector) { - // results can shift - final MultiReaderHitCollector resultsMulti = (MultiReaderHitCollector) results; - hc = new MultiReaderHitCollector() { - public void collect(int doc, float score) { - resultsMulti.collect(doc, score); - } - - public void setNextReader(IndexReader reader, int docBase) throws IOException { - resultsMulti.setNextReader(reader, start+docBase); - } - }; - } else { - // We must shift the docIDs - hc = new MultiReaderHitCollector() { - private int docBase; - public void collect(int doc, float score) { - results.collect(doc + docBase + start, score); - } - - public void setNextReader(IndexReader reader, int docBase) { - this.docBase = docBase; - } - }; - } + + final Collector hc = new Collector() { + public void setScorer(Scorer scorer) throws IOException { + collector.setScorer(scorer); + } + public void collect(int doc) throws IOException { + collector.collect(doc); + } + public void setNextReader(IndexReader reader, int docBase) throws IOException { + collector.setNextReader(reader, start + docBase); + } + }; searchables[i].search(weight, filter, hc); } diff --git a/src/java/org/apache/lucene/search/ParallelMultiSearcher.java b/src/java/org/apache/lucene/search/ParallelMultiSearcher.java index af7df6ccf72..1dc8d0e8fd6 100644 --- a/src/java/org/apache/lucene/search/ParallelMultiSearcher.java +++ b/src/java/org/apache/lucene/search/ParallelMultiSearcher.java @@ -170,44 +170,51 @@ public class ParallelMultiSearcher extends MultiSearcher { * @param results to receive hits * * @todo parallelize this one too + * @deprecated use {@link #search(Weight, Filter, Collector)} instead. */ public void search(Weight weight, Filter filter, final HitCollector results) throws IOException { - for (int i = 0; i < searchables.length; i++) { - - final int start = starts[i]; - - final MultiReaderHitCollector hc; - if (results instanceof MultiReaderHitCollector) { - // results can shift - final MultiReaderHitCollector resultsMulti = (MultiReaderHitCollector) results; - hc = new MultiReaderHitCollector() { - public void collect(int doc, float score) { - resultsMulti.collect(doc, score); - } - - public void setNextReader(IndexReader reader, int docBase) throws IOException { - resultsMulti.setNextReader(reader, start+docBase); - } - }; - } else { - // We must shift the docIDs - hc = new MultiReaderHitCollector() { - private int docBase; - public void collect(int doc, float score) { - results.collect(doc + docBase + start, score); - } - - public void setNextReader(IndexReader reader, int docBase) { - this.docBase = docBase; - } - }; - } - - searchables[i].search(weight, filter, hc); - } + search(weight, filter, new HitCollectorWrapper(results)); } + /** Lower-level search API. + * + *

{@link Collector#collect(int)} is called for every matching document. + * + *

Applications should only use this if they need all of the + * matching documents. The high-level search API ({@link + * Searcher#search(Query)}) is usually more efficient, as it skips + * non-high-scoring hits. + * + * @param weight to match documents + * @param filter if non-null, a bitset used to eliminate some documents + * @param collector to receive hits + * + * @todo parallelize this one too + */ + public void search(Weight weight, Filter filter, final Collector collector) + throws IOException { + for (int i = 0; i < searchables.length; i++) { + + final int start = starts[i]; + + final Collector hc = new Collector() { + public void setScorer(Scorer scorer) throws IOException { + collector.setScorer(scorer); + } + public void collect(int doc) throws IOException { + collector.collect(doc); + } + + public void setNextReader(IndexReader reader, int docBase) throws IOException { + collector.setNextReader(reader, start + docBase); + } + }; + + searchables[i].search(weight, filter, hc); + } + } + /* * TODO: this one could be parallelized too * @see org.apache.lucene.search.Searchable#rewrite(org.apache.lucene.search.Query) diff --git a/src/java/org/apache/lucene/search/PositiveScoresOnlyCollector.java b/src/java/org/apache/lucene/search/PositiveScoresOnlyCollector.java new file mode 100644 index 00000000000..b5aab42cb53 --- /dev/null +++ b/src/java/org/apache/lucene/search/PositiveScoresOnlyCollector.java @@ -0,0 +1,56 @@ +package org.apache.lucene.search; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.io.IOException; + +import org.apache.lucene.index.IndexReader; + +/** + * A {@link Collector} implementation which wraps another + * {@link Collector} and makes sure only documents with + * scores > 0 are collected. + */ + +public class PositiveScoresOnlyCollector extends Collector { + + final private Collector c; + private Scorer scorer; + + public PositiveScoresOnlyCollector(Collector c) { + this.c = c; + } + + public void collect(int doc) throws IOException { + if (scorer.score() > 0) { + c.collect(doc); + } + } + + public void setNextReader(IndexReader reader, int docBase) throws IOException { + c.setNextReader(reader, docBase); + } + + public void setScorer(Scorer scorer) throws IOException { + // Set a ScoreCachingWrappingScorer in case the wrapped Collector will call + // score() also. + this.scorer = new ScoreCachingWrappingScorer(scorer); + c.setScorer(this.scorer); + } + +} diff --git a/src/java/org/apache/lucene/search/QueryWrapperFilter.java b/src/java/org/apache/lucene/search/QueryWrapperFilter.java index 3bc03cf6f9d..75959e7d7a2 100644 --- a/src/java/org/apache/lucene/search/QueryWrapperFilter.java +++ b/src/java/org/apache/lucene/search/QueryWrapperFilter.java @@ -50,9 +50,12 @@ public class QueryWrapperFilter extends Filter { public BitSet bits(IndexReader reader) throws IOException { final BitSet bits = new BitSet(reader.maxDoc()); - new IndexSearcher(reader).search(query, new MultiReaderHitCollector() { - private int base = -1; - public final void collect(int doc, float score) { + new IndexSearcher(reader).search(query, new Collector() { + private int base = 0; + public void setScorer(Scorer scorer) throws IOException { + // score is not needed by this collector + } + public final void collect(int doc) { bits.set(doc + base); // set bit for hit } public void setNextReader(IndexReader reader, int docBase) { diff --git a/src/java/org/apache/lucene/search/RemoteSearchable.java b/src/java/org/apache/lucene/search/RemoteSearchable.java index 4cb6290310b..ba0b1ab513a 100644 --- a/src/java/org/apache/lucene/search/RemoteSearchable.java +++ b/src/java/org/apache/lucene/search/RemoteSearchable.java @@ -45,12 +45,17 @@ public class RemoteSearchable this.local = local; } - + /** @deprecated use {@link #search(Weight, Filter, Collector)} instead. */ public void search(Weight weight, Filter filter, HitCollector results) throws IOException { local.search(weight, filter, results); } + public void search(Weight weight, Filter filter, Collector results) + throws IOException { + local.search(weight, filter, results); + } + public void close() throws IOException { local.close(); } diff --git a/src/java/org/apache/lucene/search/ScoreCachingWrappingScorer.java b/src/java/org/apache/lucene/search/ScoreCachingWrappingScorer.java new file mode 100644 index 00000000000..6dcd9c8b5d3 --- /dev/null +++ b/src/java/org/apache/lucene/search/ScoreCachingWrappingScorer.java @@ -0,0 +1,83 @@ +package org.apache.lucene.search; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.io.IOException; + +/** + * A {@link Scorer} which wraps another scorer and caches the score of the + * current document. Successive calls to {@link #score()} will return the same + * result and will not invoke the wrapped Scorer's score() method, unless the + * current document has changed.
+ * This class might be useful due to the changes done to the {@link Collector} + * interface, in which the score is not computed for a document by default, only + * if the collector requests it. Some collectors may need to use the score in + * several places, however all they have in hand is a {@link Scorer} object, and + * might end up computing the score of a document more than once. + */ +public class ScoreCachingWrappingScorer extends Scorer { + + private Scorer scorer; + private int curDoc = -1; + private float curScore; + + /** Creates a new instance by wrapping the given scorer. */ + public ScoreCachingWrappingScorer(Scorer scorer) { + super(scorer.getSimilarity()); + this.scorer = scorer; + } + + protected boolean score(Collector collector, int max) throws IOException { + return scorer.score(collector, max); + } + + public Similarity getSimilarity() { + return scorer.getSimilarity(); + } + + public Explanation explain(int doc) throws IOException { + return scorer.explain(doc); + } + + public float score() throws IOException { + int doc = scorer.doc(); + if (doc != curDoc) { + curScore = scorer.score(); + curDoc = doc; + } + + return curScore; + } + + public int doc() { + return scorer.doc(); + } + + public boolean next() throws IOException { + return scorer.next(); + } + + public void score(Collector collector) throws IOException { + scorer.score(collector); + } + + public boolean skipTo(int target) throws IOException { + return scorer.skipTo(target); + } + +} diff --git a/src/java/org/apache/lucene/search/Scorer.java b/src/java/org/apache/lucene/search/Scorer.java index 21cd5986746..aaa61c3b4e2 100644 --- a/src/java/org/apache/lucene/search/Scorer.java +++ b/src/java/org/apache/lucene/search/Scorer.java @@ -52,10 +52,20 @@ public abstract class Scorer extends DocIdSetIterator { * @param hc The collector to which all matching documents are passed through * {@link HitCollector#collect(int, float)}. *
When this method is used the {@link #explain(int)} method should not be used. + * @deprecated use {@link #score(Collector)} instead. */ public void score(HitCollector hc) throws IOException { + score(new HitCollectorWrapper(hc)); + } + + /** Scores and collects all matching documents. + * @param collector The collector to which all matching documents are passed. + *
When this method is used the {@link #explain(int)} method should not be used. + */ + public void score(Collector collector) throws IOException { + collector.setScorer(this); while (next()) { - hc.collect(doc(), score()); + collector.collect(doc()); } } @@ -66,10 +76,23 @@ public abstract class Scorer extends DocIdSetIterator { * {@link HitCollector#collect(int, float)}. * @param max Do not score documents past this. * @return true if more matching documents may remain. + * @deprecated use {@link #score(Collector, int)} instead. */ protected boolean score(HitCollector hc, int max) throws IOException { + return score(new HitCollectorWrapper(hc), max); + } + + /** Expert: Collects matching documents in a range. Hook for optimization. + * Note that {@link #next()} must be called once before this method is called + * for the first time. + * @param collector The collector to which all matching documents are passed. + * @param max Do not score documents past this. + * @return true if more matching documents may remain. + */ + protected boolean score(Collector collector, int max) throws IOException { + collector.setScorer(this); while (doc() < max) { - hc.collect(doc(), score()); + collector.collect(doc()); if (!next()) return false; } @@ -78,7 +101,8 @@ public abstract class Scorer extends DocIdSetIterator { /** Returns the score of the current document matching the query. * Initially invalid, until {@link #next()} or {@link #skipTo(int)} - * is called the first time. + * is called the first time, or when called from within + * {@link Collector#collect}. */ public abstract float score() throws IOException; diff --git a/src/java/org/apache/lucene/search/Searchable.java b/src/java/org/apache/lucene/search/Searchable.java index 5dc27f8b6b2..36f14a791b3 100644 --- a/src/java/org/apache/lucene/search/Searchable.java +++ b/src/java/org/apache/lucene/search/Searchable.java @@ -19,7 +19,7 @@ package org.apache.lucene.search; import org.apache.lucene.document.Document; import org.apache.lucene.document.FieldSelector; -import org.apache.lucene.index.IndexReader; +import org.apache.lucene.index.IndexReader; // for javadoc import org.apache.lucene.index.Term; import org.apache.lucene.index.CorruptIndexException; @@ -51,10 +51,32 @@ public interface Searchable extends java.rmi.Remote { * @param filter if non-null, used to permit documents to be collected. * @param results to receive hits * @throws BooleanQuery.TooManyClauses + * @deprecated use {@link #search(Weight, Filter, Collector)} instead. */ void search(Weight weight, Filter filter, HitCollector results) throws IOException; + /** + * Lower-level search API. + * + *

+ * {@link Collector#collect(int)} is called for every document.
+ * Collector-based access to remote indexes is discouraged. + * + *

+ * Applications should only use this if they need all of the matching + * documents. The high-level search API ({@link Searcher#search(Query)}) is + * usually more efficient, as it skips non-high-scoring hits. + * + * @param weight + * to match documents + * @param filter + * if non-null, used to permit documents to be collected. + * @param collector + * to receive hits + * @throws BooleanQuery.TooManyClauses + */ + void search(Weight weight, Filter filter, Collector collector) throws IOException; /** Frees resources associated with this Searcher. * Be careful not to call this method while you are still using objects @@ -140,6 +162,7 @@ public interface Searchable extends java.rmi.Remote { */ Explanation explain(Weight weight, int doc) throws IOException; + // TODO: change the javadoc in 3.0 to remove the last NOTE section. /** Expert: Low-level search implementation with arbitrary sorting. Finds * the top n hits for query, applying * filter if non-null, and sorting the hits by the criteria in @@ -147,6 +170,13 @@ public interface Searchable extends java.rmi.Remote { * *

Applications should usually call {@link * Searcher#search(Query,Filter,Sort)} instead. + * + * NOTE: currently, this method tracks document scores and sets them in + * the returned {@link FieldDoc}, however in 3.0 it will move to not track + * document scores. If document scores tracking is still needed, you can use + * {@link #search(Weight, Filter, Collector)} and pass in a + * {@link TopFieldCollector} instance. + * * @throws BooleanQuery.TooManyClauses */ TopFieldDocs search(Weight weight, Filter filter, int n, Sort sort) diff --git a/src/java/org/apache/lucene/search/Searcher.java b/src/java/org/apache/lucene/search/Searcher.java index b6b327dc4ba..c0fa26d8f20 100644 --- a/src/java/org/apache/lucene/search/Searcher.java +++ b/src/java/org/apache/lucene/search/Searcher.java @@ -76,9 +76,13 @@ public abstract class Searcher implements Searchable { * the top n hits for query, applying * filter if non-null, and sorting the hits by the criteria in * sort. + * + * NOTE: currently, this method tracks document scores and sets them in + * the returned {@link FieldDoc}, however in 3.0 it will move to not track + * document scores. If document scores tracking is still needed, you can use + * {@link #search(Weight, Filter, Collector)} and pass in a + * {@link TopFieldCollector} instance. * - *

Applications should usually call {@link - * Searcher#search(Query,Filter,Sort)} instead. * @throws BooleanQuery.TooManyClauses */ public TopFieldDocs search(Query query, Filter filter, int n, @@ -99,12 +103,31 @@ public abstract class Searcher implements Searchable { * In other words, the score will not necessarily be a float whose value is * between 0 and 1. * @throws BooleanQuery.TooManyClauses + * @deprecated use {@link #search(Query, Collector)} instead. */ public void search(Query query, HitCollector results) throws IOException { search(query, (Filter)null, results); } + /** Lower-level search API. + * + *

{@link Collector#collect(int)} is called for every matching document. + * + *

Applications should only use this if they need all of the + * matching documents. The high-level search API ({@link + * Searcher#search(Query)}) is usually more efficient, as it skips + * non-high-scoring hits. + *

Note: The score passed to this method is a raw score. + * In other words, the score will not necessarily be a float whose value is + * between 0 and 1. + * @throws BooleanQuery.TooManyClauses + */ + public void search(Query query, Collector results) + throws IOException { + search(query, (Filter)null, results); + } + /** Lower-level search API. * *

{@link HitCollector#collect(int,float)} is called for every matching @@ -120,11 +143,33 @@ public abstract class Searcher implements Searchable { * @param filter if non-null, used to permit documents to be collected. * @param results to receive hits * @throws BooleanQuery.TooManyClauses + * @deprecated use {@link #search(Query, Filter, Collector)} instead. */ public void search(Query query, Filter filter, HitCollector results) throws IOException { search(createWeight(query), filter, results); } + + /** Lower-level search API. + * + *

{@link Collector#collect(int)} is called for every matching + * document. + *
Collector-based access to remote indexes is discouraged. + * + *

Applications should only use this if they need all of the + * matching documents. The high-level search API ({@link + * Searcher#search(Query, Filter, int)}) is usually more efficient, as it skips + * non-high-scoring hits. + * + * @param query to match documents + * @param filter if non-null, used to permit documents to be collected. + * @param results to receive hits + * @throws BooleanQuery.TooManyClauses + */ + public void search(Query query, Filter filter, Collector results) + throws IOException { + search(createWeight(query), filter, results); + } /** Finds the top n * hits for query, applying filter if non-null. @@ -197,7 +242,11 @@ public abstract class Searcher implements Searchable { /* The following abstract methods were added as a workaround for GCJ bug #15411. * http://gcc.gnu.org/bugzilla/show_bug.cgi?id=15411 */ + /** + * @deprecated use {@link #search(Weight, Filter, Collector)} instead. + */ abstract public void search(Weight weight, Filter filter, HitCollector results) throws IOException; + abstract public void search(Weight weight, Filter filter, Collector results) throws IOException; abstract public void close() throws IOException; abstract public int docFreq(Term term) throws IOException; abstract public int maxDoc() throws IOException; diff --git a/src/java/org/apache/lucene/search/SortField.java b/src/java/org/apache/lucene/search/SortField.java index 10603bf6472..9d65f78e3c9 100644 --- a/src/java/org/apache/lucene/search/SortField.java +++ b/src/java/org/apache/lucene/search/SortField.java @@ -21,8 +21,6 @@ import java.io.IOException; import java.io.Serializable; import java.util.Locale; -import org.apache.lucene.index.IndexReader; - /** * Stores information about how to sort documents by terms in an individual * field. Fields must be indexed in order to sort by them. @@ -434,8 +432,6 @@ implements Serializable { /** Returns the {@link FieldComparator} to use for sorting. - * @param subReaders array of {@link IndexReader} search - * will step through * @param numHits number of top hits the queue will store * @param sortPos position of this SortField within {@link * Sort}. The comparator is primary if sortPos==0, @@ -444,7 +440,7 @@ implements Serializable { * @param reversed True if the SortField is reversed * @return {@link FieldComparator} to use when sorting */ - protected FieldComparator getComparator(final IndexReader[] subReaders, final int numHits, final int sortPos, final boolean reversed) throws IOException { + protected FieldComparator getComparator(final int numHits, final int sortPos, final boolean reversed) throws IOException { if (locale != null) { // TODO: it'd be nice to allow FieldCache.getStringIndex @@ -480,7 +476,7 @@ implements Serializable { case SortField.CUSTOM: assert factory == null && comparatorSource != null; - return comparatorSource.newComparator(field, subReaders, numHits, sortPos, reversed); + return comparatorSource.newComparator(field, numHits, sortPos, reversed); case SortField.STRING: return new FieldComparator.StringOrdValComparator(numHits, field, sortPos, reversed); diff --git a/src/java/org/apache/lucene/search/TermScorer.java b/src/java/org/apache/lucene/search/TermScorer.java index 8c41e1bd69d..1acc13bb4c8 100644 --- a/src/java/org/apache/lucene/search/TermScorer.java +++ b/src/java/org/apache/lucene/search/TermScorer.java @@ -24,6 +24,9 @@ import org.apache.lucene.index.TermDocs; /** Expert: A Scorer for documents matching a Term. */ final class TermScorer extends Scorer { + + private static final float[] SIM_NORM_DECODER = Similarity.getNormDecoder(); + private Weight weight; private TermDocs termDocs; private byte[] norms; @@ -56,25 +59,26 @@ final class TermScorer extends Scorer { scoreCache[i] = getSimilarity().tf(i) * weightValue; } + /** @deprecated use {@link #score(Collector)} instead. */ public void score(HitCollector hc) throws IOException { - next(); - score(hc, Integer.MAX_VALUE); + score(new HitCollectorWrapper(hc)); } + public void score(Collector c) throws IOException { + next(); + score(c, Integer.MAX_VALUE); + } + + /** @deprecated use {@link #score(Collector, int)} instead. */ protected boolean score(HitCollector c, int end) throws IOException { - Similarity similarity = getSimilarity(); // cache sim in local - float[] normDecoder = Similarity.getNormDecoder(); + return score(new HitCollectorWrapper(c), end); + } + + protected boolean score(Collector c, int end) throws IOException { + c.setScorer(this); while (doc < end) { // for docs in window - int f = freqs[pointer]; - float score = // compute tf(f)*weight - f < SCORE_CACHE_SIZE // check cache - ? scoreCache[f] // cache hit - : similarity.tf(f)*weightValue; // cache miss - - score *= normDecoder[norms[doc] & 0xFF]; // normalize for field - - c.collect(doc, score); // collect score - + c.collect(doc); // collect score + if (++pointer >= pointerMax) { pointerMax = termDocs.read(docs, freqs); // refill buffers if (pointerMax != 0) { @@ -123,7 +127,7 @@ final class TermScorer extends Scorer { ? scoreCache[f] // cache hit : getSimilarity().tf(f)*weightValue; // cache miss - return raw * Similarity.decodeNorm(norms[doc]); // normalize for field + return raw * SIM_NORM_DECODER[norms[doc] & 0xFF]; // normalize for field } /** Skips to the first match beyond the current whose document number is diff --git a/src/java/org/apache/lucene/search/TimeLimitedCollector.java b/src/java/org/apache/lucene/search/TimeLimitedCollector.java index e8caf43281e..2b2b719ae30 100755 --- a/src/java/org/apache/lucene/search/TimeLimitedCollector.java +++ b/src/java/org/apache/lucene/search/TimeLimitedCollector.java @@ -1,7 +1,5 @@ package org.apache.lucene.search; -import org.apache.lucene.index.IndexReader; - /** * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with @@ -17,18 +15,20 @@ import org.apache.lucene.index.IndexReader; * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. - */ - -import java.io.IOException; +*/ /** - *

The TimeLimitedCollector is used to timeout search requests that - * take longer than the maximum allowed search time limit. After this - * time is exceeded, the search thread is stopped by throwing a - * TimeExceeded Exception.

+ *

+ * The TimeLimitedCollector is used to timeout search requests that take longer + * than the maximum allowed search time limit. After this time is exceeded, the + * search thread is stopped by throwing a TimeExceeded Exception. + *

* + * @deprecated this class will be removed in 3.0. Use + * {@link TimeLimitingCollector} instead, which extends the new + * {@link Collector}. */ -public class TimeLimitedCollector extends MultiReaderHitCollector { +public class TimeLimitedCollector extends HitCollector { /** * Default timer resolution. @@ -136,19 +136,15 @@ public class TimeLimitedCollector extends MultiReaderHitCollector { private final long t0; private final long timeout; - private final MultiReaderHitCollector hc; + private final HitCollector hc; /** * Create a TimeLimitedCollector wrapper over another HitCollector with a specified timeout. * @param hc the wrapped HitCollector * @param timeAllowed max time allowed for collecting hits after which {@link TimeExceededException} is thrown */ - public TimeLimitedCollector( final HitCollector hc, final long timeAllowed ) { - if (hc instanceof MultiReaderHitCollector) { - this.hc = (MultiReaderHitCollector) hc; - } else { - this.hc = new IndexSearcher.MultiReaderCollectorWrapper(hc); - } + public TimeLimitedCollector(final HitCollector hc, final long timeAllowed) { + this.hc = hc; t0 = TIMER_THREAD.getMilliseconds(); this.timeout = t0 + timeAllowed; } @@ -219,7 +215,4 @@ public class TimeLimitedCollector extends MultiReaderHitCollector { this.greedy = greedy; } - public void setNextReader(IndexReader reader, int base) throws IOException { - hc.setNextReader(reader, base); - } } diff --git a/src/java/org/apache/lucene/search/TimeLimitingCollector.java b/src/java/org/apache/lucene/search/TimeLimitingCollector.java new file mode 100644 index 00000000000..b369183da9c --- /dev/null +++ b/src/java/org/apache/lucene/search/TimeLimitingCollector.java @@ -0,0 +1,219 @@ +package org.apache.lucene.search; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.io.IOException; + +import org.apache.lucene.index.IndexReader; + +/** + * The {@link TimeLimitingCollector} is used to timeout search requests that + * take longer than the maximum allowed search time limit. After this time is + * exceeded, the search thread is stopped by throwing a + * {@link TimeExceededException}. + */ +public class TimeLimitingCollector extends Collector { + + /** + * Default timer resolution. + * @see #setResolution(long) + */ + public static final int DEFAULT_RESOLUTION = 20; + + /** + * Default for {@link #isGreedy()}. + * @see #isGreedy() + */ + public boolean DEFAULT_GREEDY = false; + + private static long resolution = DEFAULT_RESOLUTION; + + private boolean greedy = DEFAULT_GREEDY ; + + private static final class TimerThread extends Thread { + + // NOTE: we can avoid explicit synchronization here for several reasons: + // * updates to volatile long variables are atomic + // * only single thread modifies this value + // * use of volatile keyword ensures that it does not reside in + // a register, but in main memory (so that changes are visible to + // other threads). + // * visibility of changes does not need to be instantanous, we can + // afford losing a tick or two. + // + // See section 17 of the Java Language Specification for details. + private volatile long time = 0; + + /** + * TimerThread provides a pseudo-clock service to all searching + * threads, so that they can count elapsed time with less overhead + * than repeatedly calling System.currentTimeMillis. A single + * thread should be created to be used for all searches. + */ + private TimerThread() { + super("TimeLimitedCollector timer thread"); + this.setDaemon( true ); + } + + public void run() { + while (true) { + // TODO: Use System.nanoTime() when Lucene moves to Java SE 5. + time += resolution; + try { + Thread.sleep( resolution ); + } catch (InterruptedException ie) { + Thread.currentThread().interrupt(); + throw new RuntimeException(ie); + } + } + } + + /** + * Get the timer value in milliseconds. + */ + public long getMilliseconds() { + return time; + } + } + + /** Thrown when elapsed search time exceeds allowed search time. */ + public static class TimeExceededException extends RuntimeException { + private long timeAllowed; + private long timeElapsed; + private int lastDocCollected; + private TimeExceededException(long timeAllowed, long timeElapsed, int lastDocCollected) { + super("Elapsed time: " + timeElapsed + "Exceeded allowed search time: " + timeAllowed + " ms."); + this.timeAllowed = timeAllowed; + this.timeElapsed = timeElapsed; + this.lastDocCollected = lastDocCollected; + } + /** Returns allowed time (milliseconds). */ + public long getTimeAllowed() { + return timeAllowed; + } + /** Returns elapsed time (milliseconds). */ + public long getTimeElapsed() { + return timeElapsed; + } + /** Returns last doc that was collected when the search time exceeded. */ + public int getLastDocCollected() { + return lastDocCollected; + } + } + + // Declare and initialize a single static timer thread to be used by + // all TimeLimitedCollector instances. The JVM assures that + // this only happens once. + private final static TimerThread TIMER_THREAD = new TimerThread(); + + static { + TIMER_THREAD.start(); + } + + private final long t0; + private final long timeout; + private final Collector collector; + + /** + * Create a TimeLimitedCollector wrapper over another {@link Collector} with a specified timeout. + * @param collector the wrapped {@link Collector} + * @param timeAllowed max time allowed for collecting hits after which {@link TimeExceededException} is thrown + */ + public TimeLimitingCollector(final Collector collector, final long timeAllowed ) { + this.collector = collector; + t0 = TIMER_THREAD.getMilliseconds(); + this.timeout = t0 + timeAllowed; + } + + /** + * Return the timer resolution. + * @see #setResolution(long) + */ + public static long getResolution() { + return resolution; + } + + /** + * Set the timer resolution. + * The default timer resolution is 20 milliseconds. + * This means that a search required to take no longer than + * 800 milliseconds may be stopped after 780 to 820 milliseconds. + *
Note that: + * + */ + public static void setResolution(long newResolution) { + resolution = Math.max(newResolution,5); // 5 milliseconds is about the minimum reasonable time for a Object.wait(long) call. + } + + /** + * Checks if this time limited collector is greedy in collecting the last hit. + * A non greedy collector, upon a timeout, would throw a {@link TimeExceededException} + * without allowing the wrapped collector to collect current doc. A greedy one would + * first allow the wrapped hit collector to collect current doc and only then + * throw a {@link TimeExceededException}. + * @see #setGreedy(boolean) + */ + public boolean isGreedy() { + return greedy; + } + + /** + * Sets whether this time limited collector is greedy. + * @param greedy true to make this time limited greedy + * @see #isGreedy() + */ + public void setGreedy(boolean greedy) { + this.greedy = greedy; + } + + /** + * Calls {@link Collector#collect(int)} on the decorated {@link Collector} + * unless the allowed time has passed, in which case it throws an exception. + * + * @throws TimeExceededException + * if the time allowed has exceeded. + */ + public void collect(final int doc) throws IOException { + long time = TIMER_THREAD.getMilliseconds(); + if (timeout < time) { + if (greedy) { + //System.out.println(this+" greedy: before failing, collecting doc: "+doc+" "+(time-t0)); + collector.collect(doc); + } + //System.out.println(this+" failing on: "+doc+" "+(time-t0)); + throw new TimeExceededException( timeout-t0, time-t0, doc ); + } + //System.out.println(this+" collecting: "+doc+" "+(time-t0)); + collector.collect(doc); + } + + public void setNextReader(IndexReader reader, int base) throws IOException { + collector.setNextReader(reader, base); + } + + public void setScorer(Scorer scorer) throws IOException { + collector.setScorer(scorer); + } + +} diff --git a/src/java/org/apache/lucene/search/TopDocs.java b/src/java/org/apache/lucene/search/TopDocs.java index 8c7dc564535..d1c1f03a47f 100644 --- a/src/java/org/apache/lucene/search/TopDocs.java +++ b/src/java/org/apache/lucene/search/TopDocs.java @@ -29,7 +29,10 @@ public class TopDocs implements java.io.Serializable { /** Expert: Stores the maximum score value encountered, needed for normalizing. */ private float maxScore; - /** Expert: Returns the maximum score value encountered. */ + /** + * Expert: Returns the maximum score value encountered. Note that in case + * scores are not tracked, this returns {@link Float#NaN}. + */ public float getMaxScore() { return maxScore; } @@ -38,7 +41,12 @@ public class TopDocs implements java.io.Serializable { public void setMaxScore(float maxScore) { this.maxScore=maxScore; } - + + /** Expert: Constructs a TopDocs with a default maxScore=Float.NaN. */ + TopDocs(int totalHits, ScoreDoc[] scoreDocs) { + this(totalHits, scoreDocs, Float.NaN); + } + /** Expert: Constructs a TopDocs.*/ public TopDocs(int totalHits, ScoreDoc[] scoreDocs, float maxScore) { this.totalHits = totalHits; diff --git a/src/java/org/apache/lucene/search/TopDocsCollector.java b/src/java/org/apache/lucene/search/TopDocsCollector.java new file mode 100644 index 00000000000..f53dfd8f20c --- /dev/null +++ b/src/java/org/apache/lucene/search/TopDocsCollector.java @@ -0,0 +1,138 @@ +package org.apache.lucene.search; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import org.apache.lucene.util.PriorityQueue; + +/** + * A base class for all collectors that return a {@link TopDocs} output. This + * collector allows easy extension by providing a single constructor which + * accepts a {@link PriorityQueue} as well as protected members for that + * priority queue and a counter of the number of total hits.
+ * Extending classes can override {@link #topDocs(int, int)} and + * {@link #getTotalHits()} in order to provide their own implementation. + */ +public abstract class TopDocsCollector extends Collector { + + // This is used in case topDocs() is called with illegal parameters, or there + // simply aren't (enough) results. + protected static final TopDocs EMPTY_TOPDOCS = new TopDocs(0, new ScoreDoc[0], Float.NaN); + + /** + * The priority queue which holds the top documents. Note that different + * implementations of PriorityQueue give different meaning to 'top documents'. + * HitQueue for example aggregates the top scoring documents, while other PQ + * implementations may hold documents sorted by other criteria. + */ + protected PriorityQueue pq; + + /** The total number of documents that the collector encountered. */ + protected int totalHits; + + protected TopDocsCollector(PriorityQueue pq) { + this.pq = pq; + } + + /** + * Populates the results array with the ScoreDoc instaces. This can be + * overridden in case a different ScoreDoc type should be returned. + */ + protected void populateResults(ScoreDoc[] results, int howMany) { + for (int i = howMany - 1; i >= 0; i--) { + results[i] = (ScoreDoc) pq.pop(); + } + } + + /** + * Returns a {@link TopDocs} instance containing the given results. If + * results is null it means there are no results to return, + * either because there were 0 calls to collect() or because the arguments to + * topDocs were invalid. + */ + protected TopDocs newTopDocs(ScoreDoc[] results, int start) { + return results == null ? EMPTY_TOPDOCS : new TopDocs(totalHits, results); + } + + /** The total number of documents that matched this query. */ + public int getTotalHits() { + return totalHits; + } + + /** Returns the top docs that were collected by this collector. */ + public final TopDocs topDocs() { + return topDocs(0, pq.size()); + } + + /** + * Returns the documents in the rage [start .. pq.size()) that were collected + * by this collector. Note that if start >= pq.size(), an empty TopDocs is + * returned.
+ * This method is convenient to call if the application allways asks for the + * last results, starting from the last 'page'.
+ * NOTE: you cannot call this method more than once for each search + * execution. If you need to call it more than once, passing each time a + * different start, you should call {@link #topDocs()} and work + * with the returned {@link TopDocs} object, which will contain all the + * results this search execution collected. + */ + public final TopDocs topDocs(int start) { + return topDocs(start, pq.size()); + } + + /** + * Returns the documents in the rage [start .. start+howMany) that were + * collected by this collector. Note that if start >= pq.size(), an empty + * TopDocs is returned, and if pq.size() - start < howMany, then only the + * available documents in [start .. pq.size()) are returned.
+ * This method is useful to call in case pagination of search results is + * allowed by the search application, as well as it attempts to optimize the + * memory used by allocating only as much as requested by howMany.
+ * NOTE: you cannot call this method more than once for each search + * execution. If you need to call it more than once, passing each time a + * different range, you should call {@link #topDocs()} and work with the + * returned {@link TopDocs} object, which will contain all the results this + * search execution collected. + */ + public TopDocs topDocs(int start, int howMany) { + + int pqsize = pq.size(); + + // Don't bother to throw an exception, just return an empty TopDocs in case + // the parameters are invalid or out of range. + if (start < 0 || start >= pqsize || howMany <= 0) { + return newTopDocs(null, start); + } + + // We know that start < pqsize, so just fix howMany. + howMany = Math.min(pqsize - start, howMany); + ScoreDoc[] results = new ScoreDoc[howMany]; + + // pq's pop() returns the 'least' element in the queue, therefore need + // to discard the first ones, until we reach the requested range. + // Note that this loop will usually not be executed, since the common usage + // should be that the caller asks for the last howMany results. However it's + // needed here for completeness. + for (int i = pqsize - start - howMany; i > 0; i--) { pq.pop(); } + + // Get the requested results from pq. + populateResults(results, howMany); + + return newTopDocs(results, start); + } + +} diff --git a/src/java/org/apache/lucene/search/TopFieldCollector.java b/src/java/org/apache/lucene/search/TopFieldCollector.java index 30082fb3496..e8c40cf98ef 100644 --- a/src/java/org/apache/lucene/search/TopFieldCollector.java +++ b/src/java/org/apache/lucene/search/TopFieldCollector.java @@ -21,201 +21,569 @@ import java.io.IOException; import org.apache.lucene.index.IndexReader; import org.apache.lucene.search.FieldValueHitQueue.Entry; +import org.apache.lucene.util.PriorityQueue; /** - * A {@link HitCollector} that sorts by {@link SortField} using + * A {@link Collector} that sorts by {@link SortField} using * {@link FieldComparator}s. - * - * NOTE: This API is experimental and might change in - * incompatible ways in the next release. + * + *

NOTE: This API is experimental and might change in + * incompatible ways in the next release.

*/ -public final class TopFieldCollector extends MultiReaderHitCollector { +public abstract class TopFieldCollector extends TopDocsCollector { + + // TODO: one optimization we could do is to pre-fill + // the queue with sentinel value that guaranteed to + // always compare lower than a real hit; this would + // save having to check queueFull on each insert - private final FieldValueHitQueue queue; + /* + * Implements a TopFieldCollector over one SortField criteria, without + * tracking document scores and maxScore. + */ + private static class OneComparatorNonScoringCollector extends + TopFieldCollector { - private final FieldComparator[] comparators; - private FieldComparator comparator1; - private final int numComparators; - private int[] reverseMul; - private int reverseMul1 = 0; - - private final int numHits; - private int totalHits; - private FieldValueHitQueue.Entry bottom = null; - - /** Stores the maximum score value encountered, needed for normalizing. */ - private float maxScore = Float.NEGATIVE_INFINITY; - - private boolean queueFull; - - private boolean fillFields; - - public TopFieldCollector(Sort sort, int numHits, IndexReader[] subReaders, boolean fillFields) - throws IOException { - - if (sort.fields.length == 0) { - throw new IllegalArgumentException("Sort must contain at least one field"); + final FieldComparator comparator; + final int reverseMul; + + public OneComparatorNonScoringCollector(FieldValueHitQueue queue, + int numHits, boolean fillFields) throws IOException { + super(queue, numHits, fillFields); + comparator = queue.getComparators()[0]; + reverseMul = queue.getReverseMul()[0]; + } + + private final void updateBottom(int doc) { + // bottom.score is already set to Float.NaN in add(). + bottom.docID = docBase + doc; + pq.adjustTop(); + bottom = (FieldValueHitQueue.Entry) pq.top(); } - queue = new FieldValueHitQueue(sort.fields, numHits, subReaders); - comparators = queue.getComparators(); - reverseMul = queue.getReverseMul(); - numComparators = comparators.length; - - if (numComparators == 1) { - comparator1 = comparators[0]; - reverseMul1 = reverseMul[0]; - } else { - comparator1 = null; - reverseMul1 = 0; + public void collect(int doc) throws IOException { + ++totalHits; + if (queueFull) { + // Fastmatch: return if this hit is not competitive + final int cmp = reverseMul * comparator.compareBottom(doc); + if (cmp < 0 || (cmp == 0 && doc + docBase > bottom.docID)) { + return; + } + + // This hit is competitive - replace bottom element in queue & adjustTop + comparator.copy(bottom.slot, doc); + updateBottom(doc); + comparator.setBottom(bottom.slot); + } else { + // Startup transient: queue hasn't gathered numHits yet + final int slot = totalHits - 1; + // Copy hit into queue + comparator.copy(slot, doc); + add(slot, doc, Float.NaN); + if (queueFull) { + comparator.setBottom(bottom.slot); + } + } } + + public void setNextReader(IndexReader reader, int docBase) throws IOException { + final int numSlotsFull = queueFull ? numHits : totalHits; + this.docBase = docBase; + comparator.setNextReader(reader, docBase, numSlotsFull); + } + + public void setScorer(Scorer scorer) throws IOException { + comparator.setScorer(scorer); + } + + } + + /* + * Implements a TopFieldCollector over one SortField criteria, while tracking + * document scores but no maxScore. + */ + private static class OneComparatorScoringNoMaxScoreCollector extends + OneComparatorNonScoringCollector { + + private Scorer scorer; + + public OneComparatorScoringNoMaxScoreCollector(FieldValueHitQueue queue, + int numHits, boolean fillFields) throws IOException { + super(queue, numHits, fillFields); + } + + private final void updateBottom(int doc, float score) { + bottom.docID = docBase + doc; + bottom.score = score; + pq.adjustTop(); + bottom = (FieldValueHitQueue.Entry) pq.top(); + } + + public void collect(int doc) throws IOException { + ++totalHits; + if (queueFull) { + // Fastmatch: return if this hit is not competitive + final int cmp = reverseMul * comparator.compareBottom(doc); + if (cmp < 0 || (cmp == 0 && doc + docBase > bottom.docID)) { + return; + } + + // Compute the score only if the hit is competitive. + final float score = scorer.score(); + + // This hit is competitive - replace bottom element in queue & adjustTop + comparator.copy(bottom.slot, doc); + updateBottom(doc, score); + comparator.setBottom(bottom.slot); + } else { + // Compute the score only if the hit is competitive. + final float score = scorer.score(); + + // Startup transient: queue hasn't gathered numHits yet + final int slot = totalHits - 1; + // Copy hit into queue + comparator.copy(slot, doc); + add(slot, doc, score); + if (queueFull) { + comparator.setBottom(bottom.slot); + } + } + } + + public void setNextReader(IndexReader reader, int docBase) throws IOException { + final int numSlotsFull = queueFull ? numHits : totalHits; + this.docBase = docBase; + comparator.setNextReader(reader, docBase, numSlotsFull); + } + + public void setScorer(Scorer scorer) throws IOException { + this.scorer = scorer; + comparator.setScorer(scorer); + } + + } + + /* + * Implements a TopFieldCollector over one SortField criteria, with tracking + * document scores and maxScore. + */ + private final static class OneComparatorScoringMaxScoreCollector extends + OneComparatorNonScoringCollector { + + private Scorer scorer; + + public OneComparatorScoringMaxScoreCollector(FieldValueHitQueue queue, + int numHits, boolean fillFields) throws IOException { + super(queue, numHits, fillFields); + // Must set maxScore to NEG_INF, or otherwise Math.max always returns NaN. + maxScore = Float.NEGATIVE_INFINITY; + } + + private final void updateBottom(int doc, float score) { + bottom.docID = docBase + doc; + bottom.score = score; + pq.adjustTop(); + bottom = (FieldValueHitQueue.Entry) pq.top(); + } + + public void collect(int doc) throws IOException { + final float score = scorer.score(); + if (score > maxScore) { + maxScore = score; + } + ++totalHits; + if (queueFull) { + // Fastmatch: return if this hit is not competitive + final int cmp = reverseMul * comparator.compareBottom(doc); + if (cmp < 0 || (cmp == 0 && doc + docBase > bottom.docID)) { + return; + } + + // This hit is competitive - replace bottom element in queue & adjustTop + comparator.copy(bottom.slot, doc); + updateBottom(doc, score); + comparator.setBottom(bottom.slot); + } else { + // Startup transient: queue hasn't gathered numHits yet + final int slot = totalHits - 1; + // Copy hit into queue + comparator.copy(slot, doc); + add(slot, doc, score); + if (queueFull) { + comparator.setBottom(bottom.slot); + } + } + + } + + public void setScorer(Scorer scorer) throws IOException { + this.scorer = scorer; + super.setScorer(scorer); + } + } + + /* + * Implements a TopFieldCollector over multiple SortField criteria, without + * tracking document scores and maxScore. + */ + private static class MultiComparatorNonScoringCollector extends TopFieldCollector { + + final FieldComparator[] comparators; + final int[] reverseMul; + + public MultiComparatorNonScoringCollector(FieldValueHitQueue queue, + int numHits, boolean fillFields) throws IOException { + super(queue, numHits, fillFields); + comparators = queue.getComparators(); + reverseMul = queue.getReverseMul(); + } + + private final void updateBottom(int doc) { + // bottom.score is already set to Float.NaN in add(). + bottom.docID = docBase + doc; + pq.adjustTop(); + bottom = (FieldValueHitQueue.Entry) pq.top(); + } + + public void collect(int doc) throws IOException { + ++totalHits; + if (queueFull) { + // Fastmatch: return if this hit is not competitive + for (int i = 0;; i++) { + final int c = reverseMul[i] * comparators[i].compareBottom(doc); + if (c < 0) { + // Definitely not competitive + return; + } else if (c > 0) { + // Definitely competitive + break; + } else if (i == comparators.length - 1) { + // This is the equals case. + if (doc + docBase > bottom.docID) { + // Definitely not competitive + return; + } + break; + } + } + + // This hit is competitive - replace bottom element in queue & adjustTop + for (int i = 0; i < comparators.length; i++) { + comparators[i].copy(bottom.slot, doc); + } + + updateBottom(doc); + + for (int i = 0; i < comparators.length; i++) { + comparators[i].setBottom(bottom.slot); + } + } else { + // Startup transient: queue hasn't gathered numHits yet + final int slot = totalHits - 1; + // Copy hit into queue + for (int i = 0; i < comparators.length; i++) { + comparators[i].copy(slot, doc); + } + add(slot, doc, Float.NaN); + if (queueFull) { + for (int i = 0; i < comparators.length; i++) { + comparators[i].setBottom(bottom.slot); + } + } + } + } + + public void setNextReader(IndexReader reader, int docBase) throws IOException { + final int numSlotsFull = queueFull ? numHits : totalHits; + this.docBase = docBase; + for (int i = 0; i < comparators.length; i++) { + comparators[i].setNextReader(reader, docBase, numSlotsFull); + } + } + + public void setScorer(Scorer scorer) throws IOException { + // set the scorer on all comparators + for (int i = 0; i < comparators.length; i++) { + comparators[i].setScorer(scorer); + } + } + } + + /* + * Implements a TopFieldCollector over multiple SortField criteria, with + * tracking document scores and maxScore. + */ + private final static class MultiComparatorScoringMaxScoreCollector extends MultiComparatorNonScoringCollector { + + private Scorer scorer; + + public MultiComparatorScoringMaxScoreCollector(FieldValueHitQueue queue, + int numHits, boolean fillFields) throws IOException { + super(queue, numHits, fillFields); + // Must set maxScore to NEG_INF, or otherwise Math.max always returns NaN. + maxScore = Float.NEGATIVE_INFINITY; + } + + private final void updateBottom(int doc, float score) { + bottom.docID = docBase + doc; + bottom.score = score; + pq.adjustTop(); + bottom = (FieldValueHitQueue.Entry) pq.top(); + } + + public void collect(int doc) throws IOException { + final float score = scorer.score(); + if (score > maxScore) { + maxScore = score; + } + ++totalHits; + if (queueFull) { + // Fastmatch: return if this hit is not competitive + for (int i = 0;; i++) { + final int c = reverseMul[i] * comparators[i].compareBottom(doc); + if (c < 0) { + // Definitely not competitive + return; + } else if (c > 0) { + // Definitely competitive + break; + } else if (i == comparators.length - 1) { + // This is the equals case. + if (doc + docBase > bottom.docID) { + // Definitely not competitive + return; + } + break; + } + } + + // This hit is competitive - replace bottom element in queue & adjustTop + for (int i = 0; i < comparators.length; i++) { + comparators[i].copy(bottom.slot, doc); + } + + updateBottom(doc, score); + + for (int i = 0; i < comparators.length; i++) { + comparators[i].setBottom(bottom.slot); + } + } else { + // Startup transient: queue hasn't gathered numHits yet + final int slot = totalHits - 1; + // Copy hit into queue + for (int i = 0; i < comparators.length; i++) { + comparators[i].copy(slot, doc); + } + add(slot, doc, score); + if (queueFull) { + for (int i = 0; i < comparators.length; i++) { + comparators[i].setBottom(bottom.slot); + } + } + } + } + + public void setScorer(Scorer scorer) throws IOException { + this.scorer = scorer; + super.setScorer(scorer); + } + } + + /* + * Implements a TopFieldCollector over multiple SortField criteria, with + * tracking document scores and maxScore. + */ + private final static class MultiComparatorScoringNoMaxScoreCollector extends MultiComparatorNonScoringCollector { + + private Scorer scorer; + + public MultiComparatorScoringNoMaxScoreCollector(FieldValueHitQueue queue, + int numHits, boolean fillFields) throws IOException { + super(queue, numHits, fillFields); + } + + private final void updateBottom(int doc, float score) { + bottom.docID = docBase + doc; + bottom.score = score; + pq.adjustTop(); + bottom = (FieldValueHitQueue.Entry) pq.top(); + } + + public void collect(int doc) throws IOException { + ++totalHits; + if (queueFull) { + // Fastmatch: return if this hit is not competitive + for (int i = 0;; i++) { + final int c = reverseMul[i] * comparators[i].compareBottom(doc); + if (c < 0) { + // Definitely not competitive + return; + } else if (c > 0) { + // Definitely competitive + break; + } else if (i == comparators.length - 1) { + // This is the equals case. + if (doc + docBase > bottom.docID) { + // Definitely not competitive + return; + } + break; + } + } + + // This hit is competitive - replace bottom element in queue & adjustTop + for (int i = 0; i < comparators.length; i++) { + comparators[i].copy(bottom.slot, doc); + } + + // Compute score only if it is competitive. + final float score = scorer.score(); + updateBottom(doc, score); + + for (int i = 0; i < comparators.length; i++) { + comparators[i].setBottom(bottom.slot); + } + } else { + // Startup transient: queue hasn't gathered numHits yet + final int slot = totalHits - 1; + // Copy hit into queue + for (int i = 0; i < comparators.length; i++) { + comparators[i].copy(slot, doc); + } + + // Compute score only if it competitive. + final float score = scorer.score(); + add(slot, doc, score); + if (queueFull) { + for (int i = 0; i < comparators.length; i++) { + comparators[i].setBottom(bottom.slot); + } + } + } + } + + public void setScorer(Scorer scorer) throws IOException { + this.scorer = scorer; + super.setScorer(scorer); + } + } + + private static final ScoreDoc[] EMPTY_SCOREDOCS = new ScoreDoc[0]; + + private final boolean fillFields; + + /* + * Stores the maximum score value encountered, needed for normalizing. If + * document scores are not tracked, this value is initialized to NaN. + */ + float maxScore = Float.NaN; + + final int numHits; + FieldValueHitQueue.Entry bottom = null; + boolean queueFull; + int docBase; + + // Declaring the constructor private prevents extending this class by anyone + // else. Note that the class cannot be final since it's extended by the + // internal versions. If someone will define a constructor with any other + // visibility, then anyone will be able to extend the class, which is not what + // we want. + private TopFieldCollector(PriorityQueue pq, int numHits, boolean fillFields) { + super(pq); this.numHits = numHits; this.fillFields = fillFields; } - int currentDocBase; - - // javadoc inherited - public void setNextReader(IndexReader reader, int docBase) throws IOException { - final int numSlotsFull; - if (queueFull) - numSlotsFull = numHits; - else - numSlotsFull = totalHits; - - currentDocBase = docBase; - - for (int i = 0; i < numComparators; i++) { - comparators[i].setNextReader(reader, docBase, numSlotsFull); + /** + * Creates a new {@link TopFieldCollector} from the given arguments. + * + * @param sort + * the sort criteria (SortFields). + * @param numHits + * the number of results to collect. + * @param fillFields + * specifies whether the actual field values should be returned on + * the results (FieldDoc). + * @param trackDocScores + * specifies whether document scores should be tracked and set on the + * results. Note that if set to false, then the results' scores will + * be set to Float.NaN. Setting this to true affects performance, as + * it incurs the score computation on each competitive result. + * Therefore if document scores are not required by the application, + * it is recommended to set it to false. + * @param trackMaxScore + * specifies whether the query's maxScore should be tracked and set + * on the resulting {@link TopDocs}. Note that if set to false, + * {@link TopDocs#getMaxScore()} returns Float.NaN. Setting this to + * true affects performance as it incurs the score computation on + * each result. Also, setting this true automatically sets + * trackDocScores to true as well. + * @return a {@link TopFieldCollector} instance which will sort the results by + * the sort criteria. + * @throws IOException + */ + public static TopFieldCollector create(Sort sort, int numHits, + boolean fillFields, boolean trackDocScores, boolean trackMaxScore) + throws IOException { + if (sort.fields.length == 0) { + throw new IllegalArgumentException("Sort must contain at least one field"); } - } - - private final void updateBottom(int doc, float score) { - bottom.docID = currentDocBase + doc; - bottom.score = score; - queue.adjustTop(); - bottom = (FieldValueHitQueue.Entry) queue.top(); - } - - private final void add(int slot, int doc, float score) { - queue.put(new FieldValueHitQueue.Entry(slot, currentDocBase+doc, score)); - bottom = (FieldValueHitQueue.Entry) queue.top(); - queueFull = totalHits == numHits; - } - - // javadoc inherited - public void collect(int doc, float score) { - if (score > 0.0f) { - - maxScore = Math.max(maxScore, score); - totalHits++; - - // TODO: one optimization we could do is to pre-fill - // the queue with sentinel value that guaranteed to - // always compare lower than a real hit; this would - // save having to check queueFull on each insert - - if (queueFull) { - - if (numComparators == 1) { - // Common case - - // Fastmatch: return if this hit is not competitive - final int cmp = reverseMul1 * comparator1.compareBottom(doc, score); - if (cmp < 0) { - // Definitely not competitive - return; - } else if (cmp == 0 && doc + currentDocBase > bottom.docID) { - // Definitely not competitive - return; - } - - // This hit is competitive -- replace bottom - // element in queue & adjustTop - comparator1.copy(bottom.slot, doc, score); - - updateBottom(doc, score); - - comparator1.setBottom(bottom.slot); - - } else { - - // Fastmatch: return if this hit is not competitive - for(int i=0;;i++) { - final int c = reverseMul[i] * comparators[i].compareBottom(doc, score); - if (c < 0) { - // Definitely not competitive - return; - } else if (c > 0) { - // Definitely competitive - break; - } else if (i == numComparators-1) { - // This is the equals case. - if (doc + currentDocBase > bottom.docID) { - // Definitely not competitive - return; - } else { - break; - } - } - } - - // This hit is competitive -- replace bottom - // element in queue & adjustTop - for (int i = 0; i < numComparators; i++) { - comparators[i].copy(bottom.slot, doc, score); - } - - updateBottom(doc, score); - - for(int i=0;i= 0; i--) { - scoreDocs[i] = queue.fillFields((FieldValueHitQueue.Entry) queue.pop()); + results[i] = queue.fillFields((FieldValueHitQueue.Entry) queue.pop()); } } else { - Entry entry = (FieldValueHitQueue.Entry) queue.pop(); for (int i = queue.size() - 1; i >= 0; i--) { - scoreDocs[i] = new FieldDoc(entry.docID, - entry.score); + Entry entry = (FieldValueHitQueue.Entry) queue.pop(); + results[i] = new FieldDoc(entry.docID, entry.score); } } - - return new TopFieldDocs(totalHits, scoreDocs, queue.getFields(), maxScore); } + + protected TopDocs newTopDocs(ScoreDoc[] results, int start) { + if (results == null) { + results = EMPTY_SCOREDOCS; + // Set maxScore to NaN, in case this is a maxScore tracking collector. + maxScore = Float.NaN; + } + + // If this is a maxScoring tracking collector and there were no results, + return new TopFieldDocs(totalHits, results, ((FieldValueHitQueue) pq).getFields(), maxScore); + } + } diff --git a/src/java/org/apache/lucene/search/TopFieldDocCollector.java b/src/java/org/apache/lucene/search/TopFieldDocCollector.java index 10c6c22878b..a689ab3c421 100644 --- a/src/java/org/apache/lucene/search/TopFieldDocCollector.java +++ b/src/java/org/apache/lucene/search/TopFieldDocCollector.java @@ -30,8 +30,8 @@ import org.apache.lucene.index.IndexReader; * documents are collected. * * @deprecated Please use {@link TopFieldCollector} instead. - **/ -public class TopFieldDocCollector extends TopScoreDocCollector { + */ +public class TopFieldDocCollector extends TopDocCollector { private FieldDoc reusableFD; @@ -50,7 +50,7 @@ public class TopFieldDocCollector extends TopScoreDocCollector { if (score > 0.0f) { totalHits++; if (reusableFD == null) - reusableFD = new FieldDoc(doc + docBase, score); + reusableFD = new FieldDoc(doc, score); else { // Whereas TopScoreDocCollector can skip this if the // score is not competitive, we cannot because the @@ -58,7 +58,7 @@ public class TopFieldDocCollector extends TopScoreDocCollector { // aren't in general congruent with "higher score // wins" reusableFD.score = score; - reusableFD.doc = doc + docBase; + reusableFD.doc = doc; } reusableFD = (FieldDoc) hq.insertWithOverflow(reusableFD); } diff --git a/src/java/org/apache/lucene/search/TopScoreDocCollector.java b/src/java/org/apache/lucene/search/TopScoreDocCollector.java index f28c4dbcc1a..f8a993cd898 100644 --- a/src/java/org/apache/lucene/search/TopScoreDocCollector.java +++ b/src/java/org/apache/lucene/search/TopScoreDocCollector.java @@ -17,85 +17,74 @@ package org.apache.lucene.search; * limitations under the License. */ -import org.apache.lucene.index.IndexReader; -import org.apache.lucene.util.PriorityQueue; +import java.io.IOException; -/** A {@link MultiReaderHitCollector} implementation that - * collects the top-scoring documents, returning them as a - * {@link TopDocs}. This is used by {@link IndexSearcher} - * to implement {@link TopDocs}-based search. - * - *

This may be extended, overriding the {@link - * MultiReaderHitCollector#collect} method to, e.g., - * conditionally invoke super() in order to - * filter which documents are collected, but sure you - * either take docBase into account, or also override - * {@link MultiReaderHitCollector#setNextReader} method. */ -public class TopScoreDocCollector extends MultiReaderHitCollector { +import org.apache.lucene.index.IndexReader; + +/** + * A {@link Collector} implementation that collects the + * top-scoring hits, returning them as a {@link + * TopDocs}. This is used by {@link IndexSearcher} to + * implement {@link TopDocs}-based search. Hits are sorted + * by score descending and then (when the scores are tied) + * docID ascending. + */ +public final class TopScoreDocCollector extends TopDocsCollector { private ScoreDoc reusableSD; - - /** The total number of hits the collector encountered. */ - protected int totalHits; - - /** The priority queue which holds the top-scoring documents. */ - protected PriorityQueue hq; - - protected int docBase = 0; + private int docBase = 0; + private Scorer scorer; /** Construct to collect a given number of hits. * @param numHits the maximum number of hits to collect */ public TopScoreDocCollector(int numHits) { - this(new HitQueue(numHits)); + super(new HitQueue(numHits)); } - /** Constructor to collect the top-scoring documents by using the given PQ. - * @param hq the PQ to use by this instance. - */ - protected TopScoreDocCollector(PriorityQueue hq) { - this.hq = hq; - } - - // javadoc inherited - public void collect(int doc, float score) { - if (score > 0.0f) { - totalHits++; - if (reusableSD == null) { - reusableSD = new ScoreDoc(doc + docBase, score); - } else if (score >= reusableSD.score) { - // reusableSD holds the last "rejected" entry, so, if - // this new score is not better than that, there's no - // need to try inserting it - reusableSD.doc = doc + docBase; - reusableSD.score = score; - } else { - return; - } - reusableSD = (ScoreDoc) hq.insertWithOverflow(reusableSD); + protected TopDocs newTopDocs(ScoreDoc[] results, int start) { + if (results == null) { + return EMPTY_TOPDOCS; } - } - - /** The total number of documents that matched this query. */ - public int getTotalHits() { - return totalHits; - } - - /** The top-scoring hits. */ - public TopDocs topDocs() { - ScoreDoc[] scoreDocs = new ScoreDoc[hq.size()]; - for (int i = hq.size()-1; i >= 0; i--) { // put docs in array - scoreDocs[i] = (ScoreDoc) hq.pop(); - } - - float maxScore = (totalHits==0) - ? Float.NEGATIVE_INFINITY - : scoreDocs[0].score; - return new TopDocs(totalHits, scoreDocs, maxScore); + // We need to compute maxScore in order to set it in TopDocs. If start == 0, + // it means the largest element is already in results, use its score as + // maxScore. Otherwise pop everything else, until the largest element is + // extracted and use its score as maxScore. + float maxScore = Float.NaN; + if (start == 0) { + maxScore = results[0].score; + } else { + for (int i = pq.size(); i > 1; i--) { pq.pop(); } + maxScore = ((ScoreDoc) pq.pop()).score; + } + + return new TopDocs(totalHits, results, maxScore); } + // javadoc inherited + public void collect(int doc) throws IOException { + float score = scorer.score(); + totalHits++; + if (reusableSD == null) { + reusableSD = new ScoreDoc(doc + docBase, score); + } else if (score >= reusableSD.score) { + // reusableSD holds the last "rejected" entry, so, if + // this new score is not better than that, there's no + // need to try inserting it + reusableSD.doc = doc + docBase; + reusableSD.score = score; + } else { + return; + } + reusableSD = (ScoreDoc) pq.insertWithOverflow(reusableSD); + } + public void setNextReader(IndexReader reader, int base) { docBase = base; } + + public void setScorer(Scorer scorer) throws IOException { + this.scorer = scorer; + } } diff --git a/src/test/org/apache/lucene/index/TestIndexReader.java b/src/test/org/apache/lucene/index/TestIndexReader.java index 0b1d3dac36a..e45c740bf1f 100644 --- a/src/test/org/apache/lucene/index/TestIndexReader.java +++ b/src/test/org/apache/lucene/index/TestIndexReader.java @@ -23,10 +23,10 @@ import java.io.FileNotFoundException; import java.io.IOException; import java.util.Arrays; import java.util.Collection; +import java.util.HashSet; import java.util.Iterator; import java.util.Map; import java.util.Set; -import java.util.HashSet; import junit.framework.TestSuite; import junit.textui.TestRunner; @@ -35,15 +35,16 @@ import org.apache.lucene.analysis.WhitespaceAnalyzer; import org.apache.lucene.analysis.standard.StandardAnalyzer; import org.apache.lucene.document.Document; import org.apache.lucene.document.Field; -import org.apache.lucene.document.Fieldable; import org.apache.lucene.document.FieldSelector; +import org.apache.lucene.document.Fieldable; import org.apache.lucene.document.SetBasedFieldSelector; import org.apache.lucene.index.IndexReader.FieldOption; +import org.apache.lucene.search.Collector; import org.apache.lucene.search.IndexSearcher; -import org.apache.lucene.search.MultiReaderHitCollector; -import org.apache.lucene.search.ScoreDoc; -import org.apache.lucene.search.TermQuery; import org.apache.lucene.search.Query; +import org.apache.lucene.search.ScoreDoc; +import org.apache.lucene.search.Scorer; +import org.apache.lucene.search.TermQuery; import org.apache.lucene.search.FieldCache; import org.apache.lucene.store.AlreadyClosedException; import org.apache.lucene.store.Directory; @@ -1651,7 +1652,7 @@ public class TestIndexReader extends LuceneTestCase Query q = new TermQuery(new Term("id", "a")); IndexSearcher s = new IndexSearcher(dir); - s.search(q, new MultiReaderHitCollector() { + s.search(q, new Collector() { int lastDocBase = -1; public void setNextReader(IndexReader reader, int docBase) { if (lastDocBase == -1) { @@ -1663,13 +1664,14 @@ public class TestIndexReader extends LuceneTestCase } lastDocBase = docBase; } - public void collect(int doc, float score) {} + public void collect(int doc) {} + public void setScorer(Scorer scorer) {} }); s.close(); IndexReader r = IndexReader.open(dir); s = new IndexSearcher(r, true); - s.search(q, new MultiReaderHitCollector() { + s.search(q, new Collector() { int lastDocBase = -1; public void setNextReader(IndexReader reader, int docBase) { if (lastDocBase == -1) { @@ -1681,7 +1683,8 @@ public class TestIndexReader extends LuceneTestCase } lastDocBase = docBase; } - public void collect(int doc, float score) {} + public void collect(int doc) {} + public void setScorer(Scorer scorer) {} }); s.close(); r.close(); diff --git a/src/test/org/apache/lucene/index/TestOmitTf.java b/src/test/org/apache/lucene/index/TestOmitTf.java index 8efc4ccd9a9..f4d8e454f89 100644 --- a/src/test/org/apache/lucene/index/TestOmitTf.java +++ b/src/test/org/apache/lucene/index/TestOmitTf.java @@ -17,6 +17,7 @@ package org.apache.lucene.index; * limitations under the License. */ +import java.io.IOException; import java.util.Collection; import org.apache.lucene.util.LuceneTestCase; @@ -27,7 +28,8 @@ import org.apache.lucene.document.Document; import org.apache.lucene.document.Field; import org.apache.lucene.search.BooleanQuery; import org.apache.lucene.search.IndexSearcher; -import org.apache.lucene.search.MultiReaderHitCollector; +import org.apache.lucene.search.Collector; +import org.apache.lucene.search.Scorer; import org.apache.lucene.search.Searcher; import org.apache.lucene.search.Similarity; import org.apache.lucene.search.TermQuery; @@ -283,10 +285,15 @@ public class TestOmitTf extends LuceneTestCase { searcher.search(q1, new CountingHitCollector() { - public final void collect(int doc, float score) { + private Scorer scorer; + public final void setScorer(Scorer scorer) { + this.scorer = scorer; + } + public final void collect(int doc) throws IOException { //System.out.println("Q1: Doc=" + doc + " score=" + score); + float score = scorer.score(); assertTrue(score==1.0f); - super.collect(doc, score); + super.collect(doc); } }); //System.out.println(CountingHitCollector.getCount()); @@ -294,10 +301,15 @@ public class TestOmitTf extends LuceneTestCase { searcher.search(q2, new CountingHitCollector() { - public final void collect(int doc, float score) { - //System.out.println("Q2: Doc=" + doc + " score=" + score); + private Scorer scorer; + public final void setScorer(Scorer scorer) { + this.scorer = scorer; + } + public final void collect(int doc) throws IOException { + //System.out.println("Q2: Doc=" + doc + " score=" + score); + float score = scorer.score(); assertTrue(score==1.0f+doc); - super.collect(doc, score); + super.collect(doc); } }); //System.out.println(CountingHitCollector.getCount()); @@ -308,11 +320,16 @@ public class TestOmitTf extends LuceneTestCase { searcher.search(q3, new CountingHitCollector() { - public final void collect(int doc, float score) { + private Scorer scorer; + public final void setScorer(Scorer scorer) { + this.scorer = scorer; + } + public final void collect(int doc) throws IOException { //System.out.println("Q1: Doc=" + doc + " score=" + score); + float score = scorer.score(); assertTrue(score==1.0f); assertFalse(doc%2==0); - super.collect(doc, score); + super.collect(doc); } }); //System.out.println(CountingHitCollector.getCount()); @@ -320,11 +337,16 @@ public class TestOmitTf extends LuceneTestCase { searcher.search(q4, new CountingHitCollector() { - public final void collect(int doc, float score) { + private Scorer scorer; + public final void setScorer(Scorer scorer) { + this.scorer = scorer; + } + public final void collect(int doc) throws IOException { + float score = scorer.score(); //System.out.println("Q1: Doc=" + doc + " score=" + score); assertTrue(score==1.0f); assertTrue(doc%2==0); - super.collect(doc, score); + super.collect(doc); } }); //System.out.println(CountingHitCollector.getCount()); @@ -337,9 +359,9 @@ public class TestOmitTf extends LuceneTestCase { searcher.search(bq, new CountingHitCollector() { - public final void collect(int doc, float score) { + public final void collect(int doc) throws IOException { //System.out.println("BQ: Doc=" + doc + " score=" + score); - super.collect(doc, score); + super.collect(doc); } }); assertTrue(15 == CountingHitCollector.getCount()); @@ -348,12 +370,13 @@ public class TestOmitTf extends LuceneTestCase { dir.close(); } - public static class CountingHitCollector extends MultiReaderHitCollector { + public static class CountingHitCollector extends Collector { static int count=0; static int sum=0; private int docBase = -1; CountingHitCollector(){count=0;sum=0;} - public void collect(int doc, float score) { + public void setScorer(Scorer scorer) throws IOException {} + public void collect(int doc) throws IOException { count++; sum += doc + docBase; // use it to avoid any possibility of being optimized away } diff --git a/src/test/org/apache/lucene/search/CheckHits.java b/src/test/org/apache/lucene/search/CheckHits.java index fd6c65bb40e..2ddfca70f2a 100644 --- a/src/test/org/apache/lucene/search/CheckHits.java +++ b/src/test/org/apache/lucene/search/CheckHits.java @@ -89,9 +89,10 @@ public class CheckHits { } final Set actual = new TreeSet(); - searcher.search(query, new MultiReaderHitCollector() { - private int base = -1; - public void collect(int doc, float score) { + searcher.search(query, new Collector() { + private int base = 0; + public void setScorer(Scorer scorer) throws IOException {} + public void collect(int doc) { actual.add(new Integer(doc + base)); } @@ -390,14 +391,22 @@ public class CheckHits { checkExplanations(query); return super.search(query,filter,n,sort); } + /** @deprecated use {@link #search(Query, Collector)} instead. */ public void search(Query query, HitCollector results) throws IOException { - checkExplanations(query); - super.search(query,results); + search(query, new HitCollectorWrapper(results)); } + public void search(Query query, Collector results) throws IOException { + checkExplanations(query); + super.search(query, results); + } + /** @deprecated use {@link #search(Query, Filter, Collector)} instead. */ public void search(Query query, Filter filter, HitCollector results) throws IOException { + search(query, filter, new HitCollectorWrapper(results)); + } + public void search(Query query, Filter filter, Collector results) throws IOException { checkExplanations(query); - super.search(query,filter, results); + super.search(query, filter, results); } public TopDocs search(Query query, Filter filter, int n) throws IOException { @@ -416,7 +425,7 @@ public class CheckHits { * * @see CheckHits#verifyExplanation */ - public static class ExplanationAsserter extends MultiReaderHitCollector { + public static class ExplanationAsserter extends Collector { /** * @deprecated @@ -428,8 +437,9 @@ public class CheckHits { Searcher s; String d; boolean deep; - - private int base = -1; + + Scorer scorer; + private int base = 0; /** Constructs an instance which does shallow tests on the Explanation */ public ExplanationAsserter(Query q, String defaultFieldName, Searcher s) { @@ -441,8 +451,12 @@ public class CheckHits { this.d = q.toString(defaultFieldName); this.deep=deep; } - - public void collect(int doc, float score) { + + public void setScorer(Scorer scorer) throws IOException { + this.scorer = scorer; + } + + public void collect(int doc) throws IOException { Explanation exp = null; doc = doc + base; try { @@ -454,7 +468,7 @@ public class CheckHits { TestCase.assertNotNull("Explanation of [["+d+"]] for #"+doc+" is null", exp); - verifyExplanation(d,doc,score,deep,exp); + verifyExplanation(d,doc,scorer.score(),deep,exp); } public void setNextReader(IndexReader reader, int docBase) { base = docBase; diff --git a/src/test/org/apache/lucene/search/JustCompileSearch.java b/src/test/org/apache/lucene/search/JustCompileSearch.java new file mode 100644 index 00000000000..ce96955d104 --- /dev/null +++ b/src/test/org/apache/lucene/search/JustCompileSearch.java @@ -0,0 +1,580 @@ +package org.apache.lucene.search; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.io.IOException; + +import org.apache.lucene.document.Document; +import org.apache.lucene.document.FieldSelector; +import org.apache.lucene.index.CorruptIndexException; +import org.apache.lucene.index.IndexReader; +import org.apache.lucene.index.Term; +import org.apache.lucene.index.TermPositions; +import org.apache.lucene.util.PriorityQueue; + +/** + * Holds all implementations of classes in the o.a.l.search package as a + * back-compatibility test. It does not run any tests per-se, however if + * someone adds a method to an interface or abstract method to an abstract + * class, one of the implementations here will fail to compile and so we know + * back-compat policy was violated. + */ +final class JustCompileSearch { + + private static final String UNSUPPORTED_MSG = "unsupported: used for back-compat testing only !"; + + static final class JustCompileSearchable implements Searchable { + + public void close() throws IOException { + throw new UnsupportedOperationException(UNSUPPORTED_MSG); + } + + public Document doc(int i) throws CorruptIndexException, IOException { + throw new UnsupportedOperationException(UNSUPPORTED_MSG); + } + + public Document doc(int n, FieldSelector fieldSelector) + throws CorruptIndexException, IOException { + throw new UnsupportedOperationException(UNSUPPORTED_MSG); + } + + public int docFreq(Term term) throws IOException { + throw new UnsupportedOperationException(UNSUPPORTED_MSG); + } + + public int[] docFreqs(Term[] terms) throws IOException { + throw new UnsupportedOperationException(UNSUPPORTED_MSG); + } + + public Explanation explain(Weight weight, int doc) throws IOException { + throw new UnsupportedOperationException(UNSUPPORTED_MSG); + } + + public int maxDoc() throws IOException { + throw new UnsupportedOperationException(UNSUPPORTED_MSG); + } + + public Query rewrite(Query query) throws IOException { + throw new UnsupportedOperationException(UNSUPPORTED_MSG); + } + + public void search(Weight weight, Filter filter, HitCollector results) + throws IOException { + throw new UnsupportedOperationException(UNSUPPORTED_MSG); + } + + public void search(Weight weight, Filter filter, Collector collector) + throws IOException { + throw new UnsupportedOperationException(UNSUPPORTED_MSG); + } + + public TopDocs search(Weight weight, Filter filter, int n) + throws IOException { + throw new UnsupportedOperationException(UNSUPPORTED_MSG); + } + + public TopFieldDocs search(Weight weight, Filter filter, int n, Sort sort) + throws IOException { + throw new UnsupportedOperationException(UNSUPPORTED_MSG); + } + + } + + static final class JustCompileSearcher extends Searcher { + + public void close() throws IOException { + throw new UnsupportedOperationException(UNSUPPORTED_MSG); + } + + public Document doc(int i) throws CorruptIndexException, IOException { + throw new UnsupportedOperationException(UNSUPPORTED_MSG); + } + + public int docFreq(Term term) throws IOException { + throw new UnsupportedOperationException(UNSUPPORTED_MSG); + } + + public Explanation explain(Weight weight, int doc) throws IOException { + throw new UnsupportedOperationException(UNSUPPORTED_MSG); + } + + public int maxDoc() throws IOException { + throw new UnsupportedOperationException(UNSUPPORTED_MSG); + } + + public Query rewrite(Query query) throws IOException { + throw new UnsupportedOperationException(UNSUPPORTED_MSG); + } + + public void search(Weight weight, Filter filter, HitCollector results) + throws IOException { + throw new UnsupportedOperationException(UNSUPPORTED_MSG); + } + + public void search(Weight weight, Filter filter, Collector results) + throws IOException { + throw new UnsupportedOperationException(UNSUPPORTED_MSG); + } + + public TopDocs search(Weight weight, Filter filter, int n) + throws IOException { + throw new UnsupportedOperationException(UNSUPPORTED_MSG); + } + + public TopFieldDocs search(Weight weight, Filter filter, int n, Sort sort) + throws IOException { + throw new UnsupportedOperationException(UNSUPPORTED_MSG); + } + + public Document doc(int n, FieldSelector fieldSelector) + throws CorruptIndexException, IOException { + throw new UnsupportedOperationException(UNSUPPORTED_MSG); + } + + } + + static final class JustCompileCollector extends Collector { + + public void collect(int doc) throws IOException { + throw new UnsupportedOperationException(UNSUPPORTED_MSG); + } + + public void setNextReader(IndexReader reader, int docBase) + throws IOException { + throw new UnsupportedOperationException(UNSUPPORTED_MSG); + } + + public void setScorer(Scorer scorer) throws IOException { + throw new UnsupportedOperationException(UNSUPPORTED_MSG); + } + + } + + static final class JustCompileDocIdSet extends DocIdSet { + + public DocIdSetIterator iterator() throws IOException { + throw new UnsupportedOperationException(UNSUPPORTED_MSG); + } + + } + + static final class JustCompileDocIdSetIterator extends DocIdSetIterator { + + public int doc() { + throw new UnsupportedOperationException(UNSUPPORTED_MSG); + } + + public boolean next() throws IOException { + throw new UnsupportedOperationException(UNSUPPORTED_MSG); + } + + public boolean skipTo(int target) throws IOException { + throw new UnsupportedOperationException(UNSUPPORTED_MSG); + } + + } + + static final class JustCompileFieldCache implements FieldCache { + + public Object getAuto(IndexReader reader, String field) throws IOException { + throw new UnsupportedOperationException(UNSUPPORTED_MSG); + } + + public byte[] getBytes(IndexReader reader, String field) throws IOException { + throw new UnsupportedOperationException(UNSUPPORTED_MSG); + } + + public byte[] getBytes(IndexReader reader, String field, ByteParser parser) + throws IOException { + throw new UnsupportedOperationException(UNSUPPORTED_MSG); + } + + /** @deprecated */ + public Comparable[] getCustom(IndexReader reader, String field, + SortComparator comparator) throws IOException { + throw new UnsupportedOperationException(UNSUPPORTED_MSG); + } + + public float[] getFloats(IndexReader reader, String field) + throws IOException { + throw new UnsupportedOperationException(UNSUPPORTED_MSG); + } + + public float[] getFloats(IndexReader reader, String field, + FloatParser parser) throws IOException { + throw new UnsupportedOperationException(UNSUPPORTED_MSG); + } + + public int[] getInts(IndexReader reader, String field) throws IOException { + throw new UnsupportedOperationException(UNSUPPORTED_MSG); + } + + public int[] getInts(IndexReader reader, String field, IntParser parser) + throws IOException { + throw new UnsupportedOperationException(UNSUPPORTED_MSG); + } + + public short[] getShorts(IndexReader reader, String field) + throws IOException { + throw new UnsupportedOperationException(UNSUPPORTED_MSG); + } + + public short[] getShorts(IndexReader reader, String field, + ShortParser parser) throws IOException { + throw new UnsupportedOperationException(UNSUPPORTED_MSG); + } + + public StringIndex getStringIndex(IndexReader reader, String field) + throws IOException { + throw new UnsupportedOperationException(UNSUPPORTED_MSG); + } + + public String[] getStrings(IndexReader reader, String field) + throws IOException { + throw new UnsupportedOperationException(UNSUPPORTED_MSG); + } + + } + + static final class JustCompileExtendedFieldCache implements ExtendedFieldCache { + + public double[] getDoubles(IndexReader reader, String field) + throws IOException { + throw new UnsupportedOperationException(UNSUPPORTED_MSG); + } + + public double[] getDoubles(IndexReader reader, String field, + DoubleParser parser) throws IOException { + throw new UnsupportedOperationException(UNSUPPORTED_MSG); + } + + public long[] getLongs(IndexReader reader, String field) throws IOException { + throw new UnsupportedOperationException(UNSUPPORTED_MSG); + } + + public long[] getLongs(IndexReader reader, String field, LongParser parser) + throws IOException { + throw new UnsupportedOperationException(UNSUPPORTED_MSG); + } + + public Object getAuto(IndexReader reader, String field) throws IOException { + throw new UnsupportedOperationException(UNSUPPORTED_MSG); + } + + public byte[] getBytes(IndexReader reader, String field) throws IOException { + throw new UnsupportedOperationException(UNSUPPORTED_MSG); + } + + public byte[] getBytes(IndexReader reader, String field, ByteParser parser) + throws IOException { + throw new UnsupportedOperationException(UNSUPPORTED_MSG); + } + + /** @deprecated */ + public Comparable[] getCustom(IndexReader reader, String field, + SortComparator comparator) throws IOException { + throw new UnsupportedOperationException(UNSUPPORTED_MSG); + } + + public float[] getFloats(IndexReader reader, String field) + throws IOException { + throw new UnsupportedOperationException(UNSUPPORTED_MSG); + } + + public float[] getFloats(IndexReader reader, String field, + FloatParser parser) throws IOException { + throw new UnsupportedOperationException(UNSUPPORTED_MSG); + } + + public int[] getInts(IndexReader reader, String field) throws IOException { + throw new UnsupportedOperationException(UNSUPPORTED_MSG); + } + + public int[] getInts(IndexReader reader, String field, IntParser parser) + throws IOException { + throw new UnsupportedOperationException(UNSUPPORTED_MSG); + } + + public short[] getShorts(IndexReader reader, String field) + throws IOException { + throw new UnsupportedOperationException(UNSUPPORTED_MSG); + } + + public short[] getShorts(IndexReader reader, String field, + ShortParser parser) throws IOException { + throw new UnsupportedOperationException(UNSUPPORTED_MSG); + } + + public StringIndex getStringIndex(IndexReader reader, String field) + throws IOException { + throw new UnsupportedOperationException(UNSUPPORTED_MSG); + } + + public String[] getStrings(IndexReader reader, String field) + throws IOException { + throw new UnsupportedOperationException(UNSUPPORTED_MSG); + } + + } + + static final class JustCompileExtendedFieldCacheLongParser implements ExtendedFieldCache.LongParser { + + public long parseLong(String string) { + throw new UnsupportedOperationException(UNSUPPORTED_MSG); + } + + } + + static final class JustCompileExtendedFieldCacheDoubleParser implements ExtendedFieldCache.DoubleParser { + + public double parseDouble(String string) { + throw new UnsupportedOperationException(UNSUPPORTED_MSG); + } + + } + + static final class JustCompileFieldComparator extends FieldComparator { + + public int compare(int slot1, int slot2) { + throw new UnsupportedOperationException(UNSUPPORTED_MSG); + } + + public int compareBottom(int doc) throws IOException { + throw new UnsupportedOperationException(UNSUPPORTED_MSG); + } + + public void copy(int slot, int doc) throws IOException { + throw new UnsupportedOperationException(UNSUPPORTED_MSG); + } + + public void setBottom(int slot) { + throw new UnsupportedOperationException(UNSUPPORTED_MSG); + } + + public void setNextReader(IndexReader reader, int docBase, int numSlotsFull) + throws IOException { + throw new UnsupportedOperationException(UNSUPPORTED_MSG); + } + + public int sortType() { + throw new UnsupportedOperationException(UNSUPPORTED_MSG); + } + + public Comparable value(int slot) { + throw new UnsupportedOperationException(UNSUPPORTED_MSG); + } + + } + + static final class JustCompileFieldComparatorSource extends FieldComparatorSource { + + public FieldComparator newComparator(String fieldname, int numHits, + int sortPos, boolean reversed) throws IOException { + throw new UnsupportedOperationException(UNSUPPORTED_MSG); + } + + } + + static final class JustCompileFilter extends Filter { + // Filter is just an abstract class with no abstract methods. However it is + // still added here in case someone will add abstract methods in the future. + } + + static final class JustCompileFilteredDocIdSet extends FilteredDocIdSet { + + public JustCompileFilteredDocIdSet(DocIdSet innerSet) { + super(innerSet); + } + + protected boolean match(int docid) { + throw new UnsupportedOperationException(UNSUPPORTED_MSG); + } + + } + + static final class JustCompileFilteredDocIdSetIterator extends FilteredDocIdSetIterator { + + public JustCompileFilteredDocIdSetIterator(DocIdSetIterator innerIter) { + super(innerIter); + } + + protected boolean match(int doc) { + throw new UnsupportedOperationException(UNSUPPORTED_MSG); + } + + } + + static final class JustCompileFilteredTermEnum extends FilteredTermEnum { + + public float difference() { + throw new UnsupportedOperationException(UNSUPPORTED_MSG); + } + + protected boolean endEnum() { + throw new UnsupportedOperationException(UNSUPPORTED_MSG); + } + + protected boolean termCompare(Term term) { + throw new UnsupportedOperationException(UNSUPPORTED_MSG); + } + + } + + static final class JustCompileMultiTermQuery extends MultiTermQuery { + + protected FilteredTermEnum getEnum(IndexReader reader) throws IOException { + throw new UnsupportedOperationException(UNSUPPORTED_MSG); + } + + } + + static final class JustCompilePhraseScorer extends PhraseScorer { + + JustCompilePhraseScorer(Weight weight, TermPositions[] tps, int[] offsets, + Similarity similarity, byte[] norms) { + super(weight, tps, offsets, similarity, norms); + } + + protected float phraseFreq() throws IOException { + throw new UnsupportedOperationException(UNSUPPORTED_MSG); + } + + } + + static final class JustCompileQuery extends Query { + + public String toString(String field) { + throw new UnsupportedOperationException(UNSUPPORTED_MSG); + } + + } + + static final class JustCompileScorer extends Scorer { + + protected JustCompileScorer(Similarity similarity) { + super(similarity); + } + + public Explanation explain(int doc) throws IOException { + throw new UnsupportedOperationException(UNSUPPORTED_MSG); + } + + public float score() throws IOException { + throw new UnsupportedOperationException(UNSUPPORTED_MSG); + } + + public int doc() { + throw new UnsupportedOperationException(UNSUPPORTED_MSG); + } + + public boolean next() throws IOException { + throw new UnsupportedOperationException(UNSUPPORTED_MSG); + } + + public boolean skipTo(int target) throws IOException { + throw new UnsupportedOperationException(UNSUPPORTED_MSG); + } + + } + + static final class JustCompileSimilarity extends Similarity { + + public float coord(int overlap, int maxOverlap) { + throw new UnsupportedOperationException(UNSUPPORTED_MSG); + } + + public float idf(int docFreq, int numDocs) { + throw new UnsupportedOperationException(UNSUPPORTED_MSG); + } + + public float lengthNorm(String fieldName, int numTokens) { + throw new UnsupportedOperationException(UNSUPPORTED_MSG); + } + + public float queryNorm(float sumOfSquaredWeights) { + throw new UnsupportedOperationException(UNSUPPORTED_MSG); + } + + public float sloppyFreq(int distance) { + throw new UnsupportedOperationException(UNSUPPORTED_MSG); + } + + public float tf(float freq) { + throw new UnsupportedOperationException(UNSUPPORTED_MSG); + } + + } + + static final class JustCompileSpanFilter extends SpanFilter { + + public SpanFilterResult bitSpans(IndexReader reader) throws IOException { + throw new UnsupportedOperationException(UNSUPPORTED_MSG); + } + + } + + static final class JustCompileTopDocsCollector extends TopDocsCollector { + + protected JustCompileTopDocsCollector(PriorityQueue pq) { + super(pq); + } + + public void collect(int doc) throws IOException { + throw new UnsupportedOperationException(UNSUPPORTED_MSG); + } + + public void setNextReader(IndexReader reader, int docBase) + throws IOException { + throw new UnsupportedOperationException(UNSUPPORTED_MSG); + } + + public void setScorer(Scorer scorer) throws IOException { + throw new UnsupportedOperationException(UNSUPPORTED_MSG); + } + + } + + static final class JustCompileWeight implements Weight { + + public Explanation explain(IndexReader reader, int doc) throws IOException { + throw new UnsupportedOperationException(UNSUPPORTED_MSG); + } + + public Query getQuery() { + throw new UnsupportedOperationException(UNSUPPORTED_MSG); + } + + public float getValue() { + throw new UnsupportedOperationException(UNSUPPORTED_MSG); + } + + public void normalize(float norm) { + throw new UnsupportedOperationException(UNSUPPORTED_MSG); + } + + public Scorer scorer(IndexReader reader) throws IOException { + throw new UnsupportedOperationException(UNSUPPORTED_MSG); + } + + public float sumOfSquaredWeights() throws IOException { + throw new UnsupportedOperationException(UNSUPPORTED_MSG); + } + + } + +} diff --git a/src/test/org/apache/lucene/search/QueryUtils.java b/src/test/org/apache/lucene/search/QueryUtils.java index a59f6e6585b..e6cf21b5f17 100644 --- a/src/test/org/apache/lucene/search/QueryUtils.java +++ b/src/test/org/apache/lucene/search/QueryUtils.java @@ -153,10 +153,15 @@ public class QueryUtils { final int[] sdoc = new int[] {-1}; final float maxDiff = 1e-5f; - s.search(q,new MultiReaderHitCollector() { - private int base = -1; - public void collect(int doc, float score) { + s.search(q,new Collector() { + private int base = 0; + private Scorer sc; + public void setScorer(Scorer scorer) throws IOException { + this.sc = scorer; + } + public void collect(int doc) throws IOException { doc = doc + base; + float score = sc.score(); try { int op = order[(opidx[0]++)%order.length]; //System.out.println(op==skip_op ? "skip("+(sdoc[0]+1)+")":"next()"); @@ -205,11 +210,16 @@ public class QueryUtils { //System.out.println("checkFirstSkipTo: "+q); final float maxDiff = 1e-5f; final int lastDoc[] = {-1}; - s.search(q,new MultiReaderHitCollector() { - private int base = -1; - public void collect(int doc, float score) { + s.search(q,new Collector() { + private int base = 0; + private Scorer scorer; + public void setScorer(Scorer scorer) throws IOException { + this.scorer = scorer; + } + public void collect(int doc) throws IOException { //System.out.println("doc="+doc); doc = doc + base; + float score = scorer.score(); try { for (int i=lastDoc[0]+1; i<=doc; i++) { Weight w = q.weight(s); diff --git a/src/test/org/apache/lucene/search/TestDocBoost.java b/src/test/org/apache/lucene/search/TestDocBoost.java index bb4ffcdabca..a0e2196c66f 100644 --- a/src/test/org/apache/lucene/search/TestDocBoost.java +++ b/src/test/org/apache/lucene/search/TestDocBoost.java @@ -17,6 +17,8 @@ package org.apache.lucene.search; * limitations under the License. */ +import java.io.IOException; + import org.apache.lucene.util.LuceneTestCase; import org.apache.lucene.analysis.SimpleAnalyzer; import org.apache.lucene.document.*; @@ -66,10 +68,14 @@ public class TestDocBoost extends LuceneTestCase { new IndexSearcher(store).search (new TermQuery(new Term("field", "word")), - new MultiReaderHitCollector() { - private int base = -1; - public final void collect(int doc, float score) { - scores[doc + base] = score; + new Collector() { + private int base = 0; + private Scorer scorer; + public void setScorer(Scorer scorer) throws IOException { + this.scorer = scorer; + } + public final void collect(int doc) throws IOException { + scores[doc + base] = scorer.score(); } public void setNextReader(IndexReader reader, int docBase) { base = docBase; diff --git a/src/test/org/apache/lucene/search/TestMultiTermConstantScore.java b/src/test/org/apache/lucene/search/TestMultiTermConstantScore.java index 43d39a6f96f..e1d14a0a4be 100644 --- a/src/test/org/apache/lucene/search/TestMultiTermConstantScore.java +++ b/src/test/org/apache/lucene/search/TestMultiTermConstantScore.java @@ -168,10 +168,14 @@ public class TestMultiTermConstantScore extends BaseTestRangeFilter { // must use a non score normalizing method for this. Query q = csrq("data", "1", "6", T, T); q.setBoost(100); - search.search(q, null, new MultiReaderHitCollector() { - private int base = -1; - public void collect(int doc, float score) { - assertEquals("score for doc " + (doc + base) + " was not correct", 1.0f, score); + search.search(q, null, new Collector() { + private int base = 0; + private Scorer scorer; + public void setScorer(Scorer scorer) throws IOException { + this.scorer = scorer; + } + public void collect(int doc) throws IOException { + assertEquals("score for doc " + (doc + base) + " was not correct", 1.0f, scorer.score()); } public void setNextReader(IndexReader reader, int docBase) { base = docBase; diff --git a/src/test/org/apache/lucene/search/TestPositiveScoresOnlyCollector.java b/src/test/org/apache/lucene/search/TestPositiveScoresOnlyCollector.java new file mode 100644 index 00000000000..71250141c81 --- /dev/null +++ b/src/test/org/apache/lucene/search/TestPositiveScoresOnlyCollector.java @@ -0,0 +1,85 @@ +package org.apache.lucene.search; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.io.IOException; + +import org.apache.lucene.util.LuceneTestCase; + +public class TestPositiveScoresOnlyCollector extends LuceneTestCase { + + private static final class SimpleScorer extends Scorer { + private int idx = -1; + + public SimpleScorer() { + super(null); + } + + public Explanation explain(int doc) throws IOException { return null; } + + public float score() throws IOException { + return idx == scores.length ? Float.NaN : scores[idx]; + } + + public int doc() { return idx; } + + public boolean next() throws IOException { + return ++idx == scores.length; + } + + public boolean skipTo(int target) throws IOException { + idx = target; + return idx >= scores.length; + } + } + + // The scores must have positive as well as negative values + private static final float[] scores = new float[] { 0.7767749f, -1.7839992f, + 8.9925785f, 7.9608946f, -0.07948637f, 2.6356435f, 7.4950366f, 7.1490803f, + -8.108544f, 4.961808f, 2.2423935f, -7.285586f, 4.6699767f }; + + public void testNegativeScores() throws Exception { + + // The Top*Collectors previously filtered out documents with <= scores. This + // behavior has changed. This test checks that if PositiveOnlyScoresFilter + // wraps one of these collectors, documents with <= 0 scores are indeed + // filtered. + + int numPositiveScores = 0; + for (int i = 0; i < scores.length; i++) { + if (scores[i] > 0) { + ++numPositiveScores; + } + } + + Scorer s = new SimpleScorer(); + TopDocsCollector tdc = new TopScoreDocCollector(scores.length); + Collector c = new PositiveScoresOnlyCollector(tdc); + c.setScorer(s); + while (!s.next()) { + c.collect(0); + } + TopDocs td = tdc.topDocs(); + ScoreDoc[] sd = td.scoreDocs; + assertEquals(numPositiveScores, td.totalHits); + for (int i = 0; i < sd.length; i++) { + assertTrue("only positive scores should return: " + sd[i].score, sd[i].score > 0); + } + } + +} diff --git a/src/test/org/apache/lucene/search/TestScoreCachingWrappingScorer.java b/src/test/org/apache/lucene/search/TestScoreCachingWrappingScorer.java new file mode 100644 index 00000000000..258eaaf57bd --- /dev/null +++ b/src/test/org/apache/lucene/search/TestScoreCachingWrappingScorer.java @@ -0,0 +1,111 @@ +package org.apache.lucene.search; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.io.IOException; + +import org.apache.lucene.index.IndexReader; +import org.apache.lucene.util.LuceneTestCase; + +public class TestScoreCachingWrappingScorer extends LuceneTestCase { + + private static final class SimpleScorer extends Scorer { + private int idx = 0; + private int doc = -1; + + public SimpleScorer() { + super(null); + } + + public Explanation explain(int doc) throws IOException { return null; } + + public float score() throws IOException { + // advance idx on purpose, so that consecutive calls to score will get + // different results. This is to emulate computation of a score. If + // ScoreCachingWrappingScorer is used, this should not be called more than + // once per document. + return idx == scores.length ? Float.NaN : scores[idx++]; + } + + public int doc() { return doc; } + + public boolean next() throws IOException { + return ++doc == scores.length; + } + + public boolean skipTo(int target) throws IOException { + doc = target; + return doc >= scores.length; + } + } + + private static final class ScoreCachingCollector extends Collector { + + private int idx = 0; + private Scorer scorer; + float[] mscores; + + public ScoreCachingCollector(int numToCollect) { + mscores = new float[numToCollect]; + } + + public void collect(int doc) throws IOException { + // just a sanity check to avoid IOOB. + if (idx == mscores.length) { + return; + } + + // just call score() a couple of times and record the score. + mscores[idx] = scorer.score(); + mscores[idx] = scorer.score(); + mscores[idx] = scorer.score(); + ++idx; + } + + public void setNextReader(IndexReader reader, int docBase) + throws IOException { + } + + public void setScorer(Scorer scorer) throws IOException { + this.scorer = new ScoreCachingWrappingScorer(scorer); + } + + } + + private static final float[] scores = new float[] { 0.7767749f, 1.7839992f, + 8.9925785f, 7.9608946f, 0.07948637f, 2.6356435f, 7.4950366f, 7.1490803f, + 8.108544f, 4.961808f, 2.2423935f, 7.285586f, 4.6699767f }; + + public void testGetScores() throws Exception { + + Scorer s = new SimpleScorer(); + ScoreCachingCollector scc = new ScoreCachingCollector(scores.length); + scc.setScorer(s); + + // We need to iterate on the scorer so that its doc() advances. + while (!s.next()) { + scc.collect(s.doc()); + } + + for (int i = 0; i < scores.length; i++) { + assertEquals(scores[i], scc.mscores[i], 0f); + } + + } + +} diff --git a/src/test/org/apache/lucene/search/TestScorerPerf.java b/src/test/org/apache/lucene/search/TestScorerPerf.java index 9bf736329b8..f898b31d2b6 100755 --- a/src/test/org/apache/lucene/search/TestScorerPerf.java +++ b/src/test/org/apache/lucene/search/TestScorerPerf.java @@ -96,14 +96,16 @@ public class TestScorerPerf extends LuceneTestCase { return sets; } - public static class CountingHitCollector extends MultiReaderHitCollector { + public static class CountingHitCollector extends Collector { int count=0; int sum=0; - protected int docBase = -1; + protected int docBase = 0; - public void collect(int doc, float score) { + public void setScorer(Scorer scorer) throws IOException {} + + public void collect(int doc) { count++; - sum += docBase+doc; // use it to avoid any possibility of being optimized away + sum += docBase + doc; // use it to avoid any possibility of being optimized away } public int getCount() { return count; } @@ -123,11 +125,12 @@ public class TestScorerPerf extends LuceneTestCase { } public void collect(int doc, float score) { + pos = answer.nextSetBit(pos+1); if (pos != doc + docBase) { throw new RuntimeException("Expected doc " + pos + " but got " + doc + docBase); } - super.collect(doc,score); + super.collect(doc); } } diff --git a/src/test/org/apache/lucene/search/TestSetNorm.java b/src/test/org/apache/lucene/search/TestSetNorm.java index 558946a2e12..7031ff445a6 100644 --- a/src/test/org/apache/lucene/search/TestSetNorm.java +++ b/src/test/org/apache/lucene/search/TestSetNorm.java @@ -17,6 +17,8 @@ package org.apache.lucene.search; * limitations under the License. */ +import java.io.IOException; + import org.apache.lucene.util.LuceneTestCase; import org.apache.lucene.analysis.SimpleAnalyzer; import org.apache.lucene.document.*; @@ -62,10 +64,14 @@ public class TestSetNorm extends LuceneTestCase { new IndexSearcher(store).search (new TermQuery(new Term("field", "word")), - new MultiReaderHitCollector() { - private int base = -1; - public final void collect(int doc, float score) { - scores[doc + base] = score; + new Collector() { + private int base = 0; + private Scorer scorer; + public void setScorer(Scorer scorer) throws IOException { + this.scorer = scorer; + } + public final void collect(int doc) throws IOException { + scores[doc + base] = scorer.score(); } public void setNextReader(IndexReader reader, int docBase) { base = docBase; diff --git a/src/test/org/apache/lucene/search/TestSimilarity.java b/src/test/org/apache/lucene/search/TestSimilarity.java index 7a8bfd3ad2b..165bfdd2944 100644 --- a/src/test/org/apache/lucene/search/TestSimilarity.java +++ b/src/test/org/apache/lucene/search/TestSimilarity.java @@ -19,6 +19,7 @@ package org.apache.lucene.search; import org.apache.lucene.util.LuceneTestCase; +import java.io.IOException; import java.util.Collection; import org.apache.lucene.index.IndexReader; @@ -75,9 +76,13 @@ public class TestSimilarity extends LuceneTestCase { searcher.search (new TermQuery(b), - new MultiReaderHitCollector() { - public final void collect(int doc, float score) { - assertTrue(score == 1.0f); + new Collector() { + private Scorer scorer; + public void setScorer(Scorer scorer) throws IOException { + this.scorer = scorer; + } + public final void collect(int doc) throws IOException { + assertTrue(scorer.score() == 1.0f); } public void setNextReader(IndexReader reader, int docBase) {} }); @@ -88,11 +93,15 @@ public class TestSimilarity extends LuceneTestCase { //System.out.println(bq.toString("field")); searcher.search (bq, - new MultiReaderHitCollector() { - private int base = -1; - public final void collect(int doc, float score) { + new Collector() { + private int base = 0; + private Scorer scorer; + public void setScorer(Scorer scorer) throws IOException { + this.scorer = scorer; + } + public final void collect(int doc) throws IOException { //System.out.println("Doc=" + doc + " score=" + score); - assertTrue(score == (float)doc+base+1); + assertTrue(scorer.score() == (float)doc+base+1); } public void setNextReader(IndexReader reader, int docBase) { base = docBase; @@ -105,10 +114,14 @@ public class TestSimilarity extends LuceneTestCase { //System.out.println(pq.toString("field")); searcher.search (pq, - new MultiReaderHitCollector() { - public final void collect(int doc, float score) { + new Collector() { + private Scorer scorer; + public void setScorer(Scorer scorer) throws IOException { + this.scorer = scorer; + } + public final void collect(int doc) throws IOException { //System.out.println("Doc=" + doc + " score=" + score); - assertTrue(score == 1.0f); + assertTrue(scorer.score() == 1.0f); } public void setNextReader(IndexReader reader, int docBase) {} }); @@ -117,10 +130,14 @@ public class TestSimilarity extends LuceneTestCase { //System.out.println(pq.toString("field")); searcher.search (pq, - new MultiReaderHitCollector() { - public final void collect(int doc, float score) { + new Collector() { + private Scorer scorer; + public void setScorer(Scorer scorer) throws IOException { + this.scorer = scorer; + } + public final void collect(int doc) throws IOException { //System.out.println("Doc=" + doc + " score=" + score); - assertTrue(score == 2.0f); + assertTrue(scorer.score() == 2.0f); } public void setNextReader(IndexReader reader, int docBase) {} }); diff --git a/src/test/org/apache/lucene/search/TestSort.java b/src/test/org/apache/lucene/search/TestSort.java index d3d0e908759..2c7474a2325 100644 --- a/src/test/org/apache/lucene/search/TestSort.java +++ b/src/test/org/apache/lucene/search/TestSort.java @@ -413,7 +413,7 @@ implements Serializable { slotValues = new int[numHits]; } - public void copy(int slot, int doc, float score) { + public void copy(int slot, int doc) { slotValues[slot] = docValues[doc]; } @@ -421,7 +421,7 @@ implements Serializable { return slotValues[slot1] - slotValues[slot2]; } - public int compareBottom(int doc, float score) { + public int compareBottom(int doc) { return bottomValue - docValues[doc]; } @@ -447,7 +447,7 @@ implements Serializable { } static class MyFieldComparatorSource extends FieldComparatorSource { - public FieldComparator newComparator(String fieldname, IndexReader[] subReaders, int numHits, int sortPos, boolean reversed) { + public FieldComparator newComparator(String fieldname, int numHits, int sortPos, boolean reversed) { return new MyFieldComparator(numHits); } } @@ -803,7 +803,94 @@ implements Serializable { assertEquals(docs1.scoreDocs[0].score, docs2.scoreDocs[0].score, 1e-6); } + + public void testSortWithoutFillFields() throws Exception { + + // There was previously a bug in TopFieldCollector when fillFields was set + // to false - the same doc and score was set in ScoreDoc[] array. This test + // asserts that if fillFields is false, the documents are set properly. It + // does not use Searcher's default search methods (with Sort) since all set + // fillFields to true. + Sort[] sort = new Sort[] { new Sort(SortField.FIELD_DOC), new Sort() }; + for (int i = 0; i < sort.length; i++) { + TopDocsCollector tdc = TopFieldCollector.create(sort[i], 10, false, false, false); + + full.search(new MatchAllDocsQuery(), tdc); + + ScoreDoc[] sd = tdc.topDocs().scoreDocs; + for (int j = 1; j < sd.length; j++) { + assertTrue(sd[j].doc != sd[j - 1].doc); + } + + } + } + public void testSortWithoutScoreTracking() throws Exception { + + // Two Sort criteria to instantiate the multi/single comparators. + Sort[] sort = new Sort[] {new Sort(SortField.FIELD_DOC), new Sort() }; + for (int i = 0; i < sort.length; i++) { + TopDocsCollector tdc = TopFieldCollector.create(sort[i], 10, true, false, false); + + full.search(new MatchAllDocsQuery(), tdc); + + TopDocs td = tdc.topDocs(); + ScoreDoc[] sd = td.scoreDocs; + for (int j = 0; j < sd.length; j++) { + assertTrue(Float.isNaN(sd[j].score)); + } + assertTrue(Float.isNaN(td.getMaxScore())); + } + } + + public void testSortWithScoreNoMaxScoreTracking() throws Exception { + + // Two Sort criteria to instantiate the multi/single comparators. + Sort[] sort = new Sort[] {new Sort(SortField.FIELD_DOC), new Sort() }; + for (int i = 0; i < sort.length; i++) { + TopDocsCollector tdc = TopFieldCollector.create(sort[i], 10, true, true, false); + + full.search(new MatchAllDocsQuery(), tdc); + + TopDocs td = tdc.topDocs(); + ScoreDoc[] sd = td.scoreDocs; + for (int j = 0; j < sd.length; j++) { + assertTrue(!Float.isNaN(sd[j].score)); + } + assertTrue(Float.isNaN(td.getMaxScore())); + } + } + + public void testSortWithScoreAndMaxScoreTracking() throws Exception { + + // Two Sort criteria to instantiate the multi/single comparators. + Sort[] sort = new Sort[] {new Sort(SortField.FIELD_DOC), new Sort() }; + for (int i = 0; i < sort.length; i++) { + TopDocsCollector tdc = TopFieldCollector.create(sort[i], 10, true, true, true); + + full.search(new MatchAllDocsQuery(), tdc); + + TopDocs td = tdc.topDocs(); + ScoreDoc[] sd = td.scoreDocs; + for (int j = 0; j < sd.length; j++) { + assertTrue(!Float.isNaN(sd[j].score)); + } + assertTrue(!Float.isNaN(td.getMaxScore())); + } + } + + public void testSortWithScoreAndMaxScoreTrackingNoResults() throws Exception { + + // Two Sort criteria to instantiate the multi/single comparators. + Sort[] sort = new Sort[] {new Sort(SortField.FIELD_DOC), new Sort() }; + for (int i = 0; i < sort.length; i++) { + TopDocsCollector tdc = TopFieldCollector.create(sort[i], 10, true, true, true); + TopDocs td = tdc.topDocs(); + assertEquals(0, td.totalHits); + assertTrue(Float.isNaN(td.getMaxScore())); + } + } + // runs a variety of sorts useful for multisearchers private void runMultiSorts (Searcher multi) throws Exception { sort.setSort (SortField.FIELD_DOC); diff --git a/src/test/org/apache/lucene/search/TestTermScorer.java b/src/test/org/apache/lucene/search/TestTermScorer.java index 72695165601..51162f463c8 100644 --- a/src/test/org/apache/lucene/search/TestTermScorer.java +++ b/src/test/org/apache/lucene/search/TestTermScorer.java @@ -65,8 +65,7 @@ public class TestTermScorer extends LuceneTestCase } - public void test() throws IOException - { + public void test() throws IOException { Term allTerm = new Term(FIELD, "all"); TermQuery termQuery = new TermQuery(allTerm); @@ -76,21 +75,25 @@ public class TestTermScorer extends LuceneTestCase TermScorer ts = new TermScorer(weight, indexReader.termDocs(allTerm), indexSearcher.getSimilarity(), indexReader.norms(FIELD)); - assertTrue("ts is null and it shouldn't be", ts != null); //we have 2 documents with the term all in them, one document for all the other values final List docs = new ArrayList(); //must call next first - ts.score(new MultiReaderHitCollector() - { - private int base = -1; - public void collect(int doc, float score) - { - docs.add(new TestHit(doc + base, score)); - assertTrue("score " + score + " is not greater than 0", score > 0); - assertTrue("Doc: " + doc + " does not equal: " + 0 + - " or doc does not equaal: " + 5, doc == 0 || doc == 5); + ts.score(new Collector() { + private int base = 0; + private Scorer scorer; + public void setScorer(Scorer scorer) throws IOException { + this.scorer = scorer; + } + + public void collect(int doc) throws IOException { + float score = scorer.score(); + doc = doc + base; + docs.add(new TestHit(doc, score)); + assertTrue("score " + score + " is not greater than 0", score > 0); + assertTrue("Doc: " + doc + " does not equal 0 or doc does not equal 5", + doc == 0 || doc == 5); } public void setNextReader(IndexReader reader, int docBase) { base = docBase; @@ -121,8 +124,7 @@ public class TestTermScorer extends LuceneTestCase assertTrue(doc0.score + " does not equal: " + 1.6931472f, doc0.score == 1.6931472f); } - public void testNext() throws Exception - { + public void testNext() throws Exception { Term allTerm = new Term(FIELD, "all"); TermQuery termQuery = new TermQuery(allTerm); @@ -132,7 +134,6 @@ public class TestTermScorer extends LuceneTestCase TermScorer ts = new TermScorer(weight, indexReader.termDocs(allTerm), indexSearcher.getSimilarity(), indexReader.norms(FIELD)); - assertTrue("ts is null and it shouldn't be", ts != null); assertTrue("next did not return a doc", ts.next() == true); assertTrue("score is not correct", ts.score() == 1.6931472f); assertTrue("next did not return a doc", ts.next() == true); @@ -140,8 +141,7 @@ public class TestTermScorer extends LuceneTestCase assertTrue("next returned a doc and it should not have", ts.next() == false); } - public void testSkipTo() throws Exception - { + public void testSkipTo() throws Exception { Term allTerm = new Term(FIELD, "all"); TermQuery termQuery = new TermQuery(allTerm); @@ -151,7 +151,6 @@ public class TestTermScorer extends LuceneTestCase TermScorer ts = new TermScorer(weight, indexReader.termDocs(allTerm), indexSearcher.getSimilarity(), indexReader.norms(FIELD)); - assertTrue("ts is null and it shouldn't be", ts != null); assertTrue("Didn't skip", ts.skipTo(3) == true); //The next doc should be doc 5 assertTrue("doc should be number 5", ts.doc() == 5); @@ -167,7 +166,6 @@ public class TestTermScorer extends LuceneTestCase TermScorer ts = new TermScorer(weight, indexReader.termDocs(allTerm), indexSearcher.getSimilarity(), indexReader.norms(FIELD)); - assertTrue("ts is null and it shouldn't be", ts != null); Explanation explanation = ts.explain(0); assertTrue("explanation is null and it shouldn't be", explanation != null); //System.out.println("Explanation: " + explanation.toString()); @@ -185,7 +183,6 @@ public class TestTermScorer extends LuceneTestCase ts = new TermScorer(weight, indexReader.termDocs(dogsTerm), indexSearcher.getSimilarity(), indexReader.norms(FIELD)); - assertTrue("ts is null and it shouldn't be", ts != null); explanation = ts.explain(1); assertTrue("explanation is null and it shouldn't be", explanation != null); //System.out.println("Explanation: " + explanation.toString()); @@ -202,23 +199,17 @@ public class TestTermScorer extends LuceneTestCase } - private class TestHit - { + private class TestHit { public int doc; public float score; - public TestHit(int doc, float score) - { + public TestHit(int doc, float score) { this.doc = doc; this.score = score; } - public String toString() - { - return "TestHit{" + - "doc=" + doc + - ", score=" + score + - "}"; + public String toString() { + return "TestHit{" + "doc=" + doc + ", score=" + score + "}"; } } diff --git a/src/test/org/apache/lucene/search/TestTimeLimitedCollector.java b/src/test/org/apache/lucene/search/TestTimeLimitedCollector.java index cfb64de7611..557684fb6fc 100755 --- a/src/test/org/apache/lucene/search/TestTimeLimitedCollector.java +++ b/src/test/org/apache/lucene/search/TestTimeLimitedCollector.java @@ -20,7 +20,6 @@ package org.apache.lucene.search; import org.apache.lucene.analysis.WhitespaceAnalyzer; import org.apache.lucene.document.Document; import org.apache.lucene.document.Field; -import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.IndexWriter; import org.apache.lucene.index.IndexWriter.MaxFieldLength; import org.apache.lucene.queryParser.QueryParser; @@ -287,12 +286,11 @@ public class TestTimeLimitedCollector extends LuceneTestCase { } // counting hit collector that can slow down at collect(). - private class MyHitCollector extends MultiReaderHitCollector + private class MyHitCollector extends HitCollector { private final BitSet bits = new BitSet(); private int slowdown = 0; private int lastDocCollected = -1; - private int docBase = -1; /** * amount of time to wait on each collect to simulate a long iteration @@ -301,8 +299,7 @@ public class TestTimeLimitedCollector extends LuceneTestCase { slowdown = milliseconds; } - public void collect( final int doc, final float score ) { - int docId = doc + docBase; + public void collect( final int docId, final float score ) { if( slowdown > 0 ) { try { Thread.sleep(slowdown); @@ -311,7 +308,7 @@ public class TestTimeLimitedCollector extends LuceneTestCase { throw new RuntimeException(ie); } } - assert docId >= 0: " base=" + docBase + " doc=" + doc; + assert docId >= 0: " doc=" + docId; bits.set( docId ); lastDocCollected = docId; } @@ -323,11 +320,6 @@ public class TestTimeLimitedCollector extends LuceneTestCase { public int getLastDocCollected() { return lastDocCollected; } - - public void setNextReader(IndexReader reader, int base) { - docBase = base; - } - } } diff --git a/src/test/org/apache/lucene/search/TestTimeLimitingCollector.java b/src/test/org/apache/lucene/search/TestTimeLimitingCollector.java new file mode 100644 index 00000000000..4f28708ff58 --- /dev/null +++ b/src/test/org/apache/lucene/search/TestTimeLimitingCollector.java @@ -0,0 +1,337 @@ +package org.apache.lucene.search; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.io.IOException; +import java.util.BitSet; + +import org.apache.lucene.analysis.WhitespaceAnalyzer; +import org.apache.lucene.document.Document; +import org.apache.lucene.document.Field; +import org.apache.lucene.index.IndexReader; +import org.apache.lucene.index.IndexWriter; +import org.apache.lucene.index.IndexWriter.MaxFieldLength; +import org.apache.lucene.queryParser.QueryParser; +import org.apache.lucene.search.TimeLimitingCollector.TimeExceededException; +import org.apache.lucene.store.Directory; +import org.apache.lucene.store.RAMDirectory; +import org.apache.lucene.util.LuceneTestCase; + +/** + * Tests the {@link TimeLimitingCollector}. This test checks (1) search + * correctness (regardless of timeout), (2) expected timeout behavior, + * and (3) a sanity test with multiple searching threads. + */ +public class TestTimeLimitingCollector extends LuceneTestCase { + private static final int SLOW_DOWN = 47; + private static final long TIME_ALLOWED = 17 * SLOW_DOWN; // so searches can find about 17 docs. + + // max time allowed is relaxed for multithreading tests. + // the multithread case fails when setting this to 1 (no slack) and launching many threads (>2000). + // but this is not a real failure, just noise. + private static final double MULTI_THREAD_SLACK = 7; + + private static final int N_DOCS = 3000; + private static final int N_THREADS = 50; + + private Searcher searcher; + private final String FIELD_NAME = "body"; + private Query query; + + public TestTimeLimitingCollector(String name) { + super(name); + } + + /** + * initializes searcher with a document set + */ + protected void setUp() throws Exception { + final String docText[] = { + "docThatNeverMatchesSoWeCanRequireLastDocCollectedToBeGreaterThanZero", + "one blah three", + "one foo three multiOne", + "one foobar three multiThree", + "blueberry pancakes", + "blueberry pie", + "blueberry strudel", + "blueberry pizza", + }; + Directory directory = new RAMDirectory(); + IndexWriter iw = new IndexWriter(directory, new WhitespaceAnalyzer(), true, MaxFieldLength.UNLIMITED); + + for (int i=0; i 0!", exceptionDoc > 0 ); + if (greedy) { + assertTrue("greedy="+greedy+" exceptionDoc="+exceptionDoc+" != lastCollected="+lastCollected, exceptionDoc==lastCollected); + assertTrue("greedy, but no hits found!", myHc.hitCount() > 0 ); + } else { + assertTrue("greedy="+greedy+" exceptionDoc="+exceptionDoc+" not > lastCollected="+lastCollected, exceptionDoc>lastCollected); + } + + // verify that elapsed time at exception is within valid limits + assertEquals( timoutException.getTimeAllowed(), TIME_ALLOWED); + // a) Not too early + assertTrue ( "elapsed="+timoutException.getTimeElapsed()+" <= (allowed-resolution)="+(TIME_ALLOWED-TimeLimitingCollector.getResolution()), + timoutException.getTimeElapsed() > TIME_ALLOWED-TimeLimitingCollector.getResolution()); + // b) Not too late. + // This part is problematic in a busy test system, so we just print a warning. + // We already verified that a timeout occurred, we just can't be picky about how long it took. + if (timoutException.getTimeElapsed() > maxTime(multiThreaded)) { + System.out.println("Informative: timeout exceeded (no action required: most probably just " + + " because the test machine is slower than usual): " + + "lastDoc="+exceptionDoc+ + " ,&& allowed="+timoutException.getTimeAllowed() + + " ,&& elapsed="+timoutException.getTimeElapsed() + + " >= " + maxTimeStr(multiThreaded)); + } + } + + private long maxTime(boolean multiThreaded) { + long res = 2 * TimeLimitingCollector.getResolution() + TIME_ALLOWED + SLOW_DOWN; // some slack for less noise in this test + if (multiThreaded) { + res *= MULTI_THREAD_SLACK; // larger slack + } + return res; + } + + private String maxTimeStr(boolean multiThreaded) { + String s = + "( " + + "2*resolution + TIME_ALLOWED + SLOW_DOWN = " + + "2*" + TimeLimitingCollector.getResolution() + " + " + TIME_ALLOWED + " + " + SLOW_DOWN + + ")"; + if (multiThreaded) { + s = MULTI_THREAD_SLACK + " * "+s; + } + return maxTime(multiThreaded) + " = " + s; + } + + /** + * Test timeout behavior when resolution is modified. + */ + public void testModifyResolution() { + try { + // increase and test + long resolution = 20 * TimeLimitingCollector.DEFAULT_RESOLUTION; //400 + TimeLimitingCollector.setResolution(resolution); + assertEquals(resolution, TimeLimitingCollector.getResolution()); + doTestTimeout(false,true); + // decrease much and test + resolution = 5; + TimeLimitingCollector.setResolution(resolution); + assertEquals(resolution, TimeLimitingCollector.getResolution()); + doTestTimeout(false,true); + // return to default and test + resolution = TimeLimitingCollector.DEFAULT_RESOLUTION; + TimeLimitingCollector.setResolution(resolution); + assertEquals(resolution, TimeLimitingCollector.getResolution()); + doTestTimeout(false,true); + } finally { + TimeLimitingCollector.setResolution(TimeLimitingCollector.DEFAULT_RESOLUTION); + } + } + + /** + * Test correctness with multiple searching threads. + */ + public void testSearchMultiThreaded() throws Exception { + doTestMultiThreads(false); + } + + /** + * Test correctness with multiple searching threads. + */ + public void testTimeoutMultiThreaded() throws Exception { + doTestMultiThreads(true); + } + + private void doTestMultiThreads(final boolean withTimeout) throws Exception { + Thread [] threadArray = new Thread[N_THREADS]; + final BitSet success = new BitSet(N_THREADS); + for( int i = 0; i < threadArray.length; ++i ) { + final int num = i; + threadArray[num] = new Thread() { + public void run() { + if (withTimeout) { + doTestTimeout(true,true); + } else { + doTestSearch(); + } + synchronized(success) { + success.set(num); + } + } + }; + } + for( int i = 0; i < threadArray.length; ++i ) { + threadArray[i].start(); + } + for( int i = 0; i < threadArray.length; ++i ) { + threadArray[i].join(); + } + assertEquals("some threads failed!", N_THREADS,success.cardinality()); + } + + // counting collector that can slow down at collect(). + private class MyHitCollector extends Collector { + private final BitSet bits = new BitSet(); + private int slowdown = 0; + private int lastDocCollected = -1; + private int docBase = 0; + + /** + * amount of time to wait on each collect to simulate a long iteration + */ + public void setSlowDown( int milliseconds ) { + slowdown = milliseconds; + } + + public int hitCount() { + return bits.cardinality(); + } + + public int getLastDocCollected() { + return lastDocCollected; + } + + public void setScorer(Scorer scorer) throws IOException { + // scorer is not needed + } + + public void collect(final int doc) throws IOException { + int docId = doc + docBase; + if( slowdown > 0 ) { + try { + Thread.sleep(slowdown); + } catch (InterruptedException ie) { + Thread.currentThread().interrupt(); + throw new RuntimeException(ie); + } + } + assert docId >= 0: " base=" + docBase + " doc=" + doc; + bits.set( docId ); + lastDocCollected = docId; + } + + public void setNextReader(IndexReader reader, int base) { + docBase = base; + } + + } + +} diff --git a/src/test/org/apache/lucene/search/TestTopDocsCollector.java b/src/test/org/apache/lucene/search/TestTopDocsCollector.java new file mode 100644 index 00000000000..3bae9c447b5 --- /dev/null +++ b/src/test/org/apache/lucene/search/TestTopDocsCollector.java @@ -0,0 +1,198 @@ +package org.apache.lucene.search; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.io.IOException; + +import org.apache.lucene.analysis.KeywordAnalyzer; +import org.apache.lucene.document.Document; +import org.apache.lucene.index.IndexReader; +import org.apache.lucene.index.IndexWriter; +import org.apache.lucene.index.IndexWriter.MaxFieldLength; +import org.apache.lucene.store.Directory; +import org.apache.lucene.store.RAMDirectory; +import org.apache.lucene.util.LuceneTestCase; + +public class TestTopDocsCollector extends LuceneTestCase { + + private static final class MyTopsDocCollector extends TopDocsCollector { + + private int idx = 0; + private int base = 0; + + public MyTopsDocCollector(int size) { + super(new HitQueue(size)); + } + + protected TopDocs newTopDocs(ScoreDoc[] results, int start) { + if (results == null) { + return EMPTY_TOPDOCS; + } + + float maxScore = Float.NaN; + if (start == 0) { + maxScore = results[0].score; + } else { + for (int i = pq.size(); i > 1; i--) { pq.pop(); } + maxScore = ((ScoreDoc) pq.pop()).score; + } + + return new TopDocs(totalHits, results, maxScore); + } + + public void collect(int doc) throws IOException { + pq.insert(new ScoreDoc(doc + base, scores[idx++])); + } + + public void setNextReader(IndexReader reader, int docBase) + throws IOException { + base = docBase; + } + + public void setScorer(Scorer scorer) throws IOException { + // Don't do anything. Assign scores in random + } + + } + + // Scores array to be used by MyTopDocsCollector. If it is changed, MAX_SCORE + // must also change. + private static final float[] scores = new float[] { + 0.7767749f, 1.7839992f, 8.9925785f, 7.9608946f, 0.07948637f, 2.6356435f, + 7.4950366f, 7.1490803f, 8.108544f, 4.961808f, 2.2423935f, 7.285586f, 4.6699767f, + 2.9655676f, 6.953706f, 5.383931f, 6.9916306f, 8.365894f, 7.888485f, 8.723962f, + 3.1796896f, 0.39971232f, 1.3077754f, 6.8489285f, 9.17561f, 5.060466f, 7.9793315f, + 8.601509f, 4.1858315f, 0.28146625f + }; + + private static final float MAX_SCORE = 9.17561f; + + private Directory dir = new RAMDirectory(); + + private TopDocsCollector doSearch(int numResults) throws IOException { + Query q = new MatchAllDocsQuery(); + IndexSearcher searcher = new IndexSearcher(dir); + TopDocsCollector tdc = new MyTopsDocCollector(numResults); + searcher.search(q, tdc); + searcher.close(); + return tdc; + } + + protected void setUp() throws Exception { + super.setUp(); + + // populate an index with 30 documents, this should be enough for the test. + // The documents have no content - the test uses MatchAllDocsQuery(). + IndexWriter writer = new IndexWriter(dir, new KeywordAnalyzer(), MaxFieldLength.UNLIMITED); + for (int i = 0; i < 30; i++) { + writer.addDocument(new Document()); + } + writer.close(); + } + + protected void tearDown() throws Exception { + dir.close(); + dir = null; + super.tearDown(); + } + + public void testInvalidArguments() throws Exception { + int numResults = 5; + TopDocsCollector tdc = doSearch(numResults); + + // start < 0 + assertEquals(0, tdc.topDocs(-1).scoreDocs.length); + + // start > pq.size() + assertEquals(0, tdc.topDocs(numResults + 1).scoreDocs.length); + + // start == pq.size() + assertEquals(0, tdc.topDocs(numResults).scoreDocs.length); + + // howMany < 0 + assertEquals(0, tdc.topDocs(0, -1).scoreDocs.length); + + // howMany == 0 + assertEquals(0, tdc.topDocs(0, 0).scoreDocs.length); + + } + + public void testZeroResults() throws Exception { + TopDocsCollector tdc = new MyTopsDocCollector(5); + assertEquals(0, tdc.topDocs(0, 1).scoreDocs.length); + } + + public void testFirstResultsPage() throws Exception { + TopDocsCollector tdc = doSearch(15); + assertEquals(10, tdc.topDocs(0, 10).scoreDocs.length); + } + + public void testSecondResultsPages() throws Exception { + TopDocsCollector tdc = doSearch(15); + // ask for more results than are available + assertEquals(5, tdc.topDocs(10, 10).scoreDocs.length); + + // ask for 5 results (exactly what there should be + tdc = doSearch(15); + assertEquals(5, tdc.topDocs(10, 5).scoreDocs.length); + + // ask for less results than there are + tdc = doSearch(15); + assertEquals(4, tdc.topDocs(10, 4).scoreDocs.length); + } + + public void testGetAllResults() throws Exception { + TopDocsCollector tdc = doSearch(15); + assertEquals(15, tdc.topDocs().scoreDocs.length); + } + + public void testGetResultsFromStart() throws Exception { + TopDocsCollector tdc = doSearch(15); + // should bring all results + assertEquals(15, tdc.topDocs(0).scoreDocs.length); + + tdc = doSearch(15); + // get the last 5 only. + assertEquals(5, tdc.topDocs(10).scoreDocs.length); + } + + public void testMaxScore() throws Exception { + // ask for all results + TopDocsCollector tdc = doSearch(15); + TopDocs td = tdc.topDocs(); + assertEquals(MAX_SCORE, td.getMaxScore(), 0f); + + // ask for 5 last results + tdc = doSearch(15); + td = tdc.topDocs(10); + assertEquals(MAX_SCORE, td.getMaxScore(), 0f); + } + + // This does not test the PQ's correctness, but whether topDocs() + // implementations return the results in decreasing score order. + public void testResultsOrder() throws Exception { + TopDocsCollector tdc = doSearch(15); + ScoreDoc[] sd = tdc.topDocs().scoreDocs; + + assertEquals(MAX_SCORE, sd[0].score, 0f); + for (int i = 1; i < sd.length; i++) { + assertTrue(sd[i - 1].score >= sd[i].score); + } + } + +} diff --git a/src/test/org/apache/lucene/search/function/JustCompileSearchSpans.java b/src/test/org/apache/lucene/search/function/JustCompileSearchSpans.java new file mode 100644 index 00000000000..3873a08a0d4 --- /dev/null +++ b/src/test/org/apache/lucene/search/function/JustCompileSearchSpans.java @@ -0,0 +1,89 @@ +package org.apache.lucene.search.function; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.io.IOException; + +import org.apache.lucene.index.IndexReader; +import org.apache.lucene.search.FieldCache; + +/** + * Holds all implementations of classes in the o.a.l.s.function package as a + * back-compatibility test. It does not run any tests per-se, however if + * someone adds a method to an interface or abstract method to an abstract + * class, one of the implementations here will fail to compile and so we know + * back-compat policy was violated. + */ +final class JustCompileSearchFunction { + + private static final String UNSUPPORTED_MSG = "unsupported: used for back-compat testing only !"; + + static final class JustCompileDocValues extends DocValues { + + public float floatVal(int doc) { + throw new UnsupportedOperationException(UNSUPPORTED_MSG); + } + + public String toString(int doc) { + throw new UnsupportedOperationException(UNSUPPORTED_MSG); + } + + } + + static final class JustCompileFieldCacheSource extends FieldCacheSource { + + public JustCompileFieldCacheSource(String field) { + super(field); + } + + public boolean cachedFieldSourceEquals(FieldCacheSource other) { + throw new UnsupportedOperationException(UNSUPPORTED_MSG); + } + + public int cachedFieldSourceHashCode() { + throw new UnsupportedOperationException(UNSUPPORTED_MSG); + } + + public DocValues getCachedFieldValues(FieldCache cache, String field, + IndexReader reader) throws IOException { + throw new UnsupportedOperationException(UNSUPPORTED_MSG); + } + + } + + static final class JustCompileValueSource extends ValueSource { + + public String description() { + throw new UnsupportedOperationException(UNSUPPORTED_MSG); + } + + public boolean equals(Object o) { + throw new UnsupportedOperationException(UNSUPPORTED_MSG); + } + + public DocValues getValues(IndexReader reader) throws IOException { + throw new UnsupportedOperationException(UNSUPPORTED_MSG); + } + + public int hashCode() { + throw new UnsupportedOperationException(UNSUPPORTED_MSG); + } + + } + +} diff --git a/src/test/org/apache/lucene/search/spans/JustCompileSearchSpans.java b/src/test/org/apache/lucene/search/spans/JustCompileSearchSpans.java new file mode 100644 index 00000000000..a15b5d79a5f --- /dev/null +++ b/src/test/org/apache/lucene/search/spans/JustCompileSearchSpans.java @@ -0,0 +1,112 @@ +package org.apache.lucene.search.spans; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.io.IOException; +import java.util.Collection; + +import org.apache.lucene.index.IndexReader; + +/** + * Holds all implementations of classes in the o.a.l.s.spans package as a + * back-compatibility test. It does not run any tests per-se, however if + * someone adds a method to an interface or abstract method to an abstract + * class, one of the implementations here will fail to compile and so we know + * back-compat policy was violated. + */ +final class JustCompileSearchSpans { + + private static final String UNSUPPORTED_MSG = "unsupported: used for back-compat testing only !"; + + static final class JustCompileSpans implements Spans { + + public int doc() { + throw new UnsupportedOperationException(UNSUPPORTED_MSG); + } + + public int end() { + throw new UnsupportedOperationException(UNSUPPORTED_MSG); + } + + public boolean next() throws IOException { + throw new UnsupportedOperationException(UNSUPPORTED_MSG); + } + + public boolean skipTo(int target) throws IOException { + throw new UnsupportedOperationException(UNSUPPORTED_MSG); + } + + public int start() { + throw new UnsupportedOperationException(UNSUPPORTED_MSG); + } + + } + + static final class JustCompileSpanQuery extends SpanQuery { + + public String getField() { + throw new UnsupportedOperationException(UNSUPPORTED_MSG); + } + + public Spans getSpans(IndexReader reader) throws IOException { + throw new UnsupportedOperationException(UNSUPPORTED_MSG); + } + + public Collection getTerms() { + throw new UnsupportedOperationException(UNSUPPORTED_MSG); + } + + public String toString(String field) { + throw new UnsupportedOperationException(UNSUPPORTED_MSG); + } + + } + + static final class JustCompilePayloadSpans implements PayloadSpans { + + public Collection getPayload() throws IOException { + throw new UnsupportedOperationException(UNSUPPORTED_MSG); + } + + public boolean isPayloadAvailable() { + throw new UnsupportedOperationException(UNSUPPORTED_MSG); + } + + public int doc() { + throw new UnsupportedOperationException(UNSUPPORTED_MSG); + } + + public int end() { + throw new UnsupportedOperationException(UNSUPPORTED_MSG); + } + + public boolean next() throws IOException { + throw new UnsupportedOperationException(UNSUPPORTED_MSG); + } + + public boolean skipTo(int target) throws IOException { + throw new UnsupportedOperationException(UNSUPPORTED_MSG); + } + + public int start() { + throw new UnsupportedOperationException(UNSUPPORTED_MSG); + } + + } + +}