From 229a0a84a1d26c76f51fd7c2bf50066a92ae39f0 Mon Sep 17 00:00:00 2001 From: Michael McCandless Date: Wed, 15 Jul 2009 11:15:58 +0000 Subject: [PATCH] LUCENE-1744: fix BooleanScorer2.doc() to be correct when it's the top scorer git-svn-id: https://svn.apache.org/repos/asf/lucene/java/trunk@794233 13f79535-47bb-0310-9956-ffa450edef68 --- .../apache/lucene/search/BooleanScorer2.java | 12 +- .../lucene/search/FieldValueHitQueue.java | 5 +- .../search/TestElevationComparator.java | 181 ++++++++++++++++++ 3 files changed, 188 insertions(+), 10 deletions(-) create mode 100644 src/test/org/apache/lucene/search/TestElevationComparator.java diff --git a/src/java/org/apache/lucene/search/BooleanScorer2.java b/src/java/org/apache/lucene/search/BooleanScorer2.java index 45a90fca6f5..fdadc688423 100644 --- a/src/java/org/apache/lucene/search/BooleanScorer2.java +++ b/src/java/org/apache/lucene/search/BooleanScorer2.java @@ -297,7 +297,6 @@ class BooleanScorer2 extends Scorer { */ public void score(Collector collector) throws IOException { collector.setScorer(this); - int doc; while ((doc = countingSumScorer.nextDoc()) != NO_MORE_DOCS) { collector.collect(doc); } @@ -317,14 +316,13 @@ class BooleanScorer2 extends Scorer { } protected boolean score(Collector collector, int max, int firstDocID) throws IOException { - // null pointer exception when next() was not called before: - int docNr = firstDocID; + doc = firstDocID; collector.setScorer(this); - while (docNr < max) { - collector.collect(docNr); - docNr = countingSumScorer.nextDoc(); + while (doc < max) { + collector.collect(doc); + doc = countingSumScorer.nextDoc(); } - return docNr != NO_MORE_DOCS; + return doc != NO_MORE_DOCS; } /** @deprecated use {@link #docID()} instead. */ diff --git a/src/java/org/apache/lucene/search/FieldValueHitQueue.java b/src/java/org/apache/lucene/search/FieldValueHitQueue.java index 13a191137ad..cd4da555eeb 100644 --- a/src/java/org/apache/lucene/search/FieldValueHitQueue.java +++ b/src/java/org/apache/lucene/search/FieldValueHitQueue.java @@ -19,7 +19,6 @@ package org.apache.lucene.search; import java.io.IOException; -import org.apache.lucene.index.IndexReader; import org.apache.lucene.util.PriorityQueue; /** @@ -54,7 +53,7 @@ public abstract class FieldValueHitQueue extends PriorityQueue { } public String toString() { - return "slot:" + slot + " docID:" + docID; + return "slot:" + slot + " docID:" + docID + " score=" + score; } } @@ -134,7 +133,7 @@ public abstract class FieldValueHitQueue extends PriorityQueue { initialize(size); } - protected boolean lessThan(Object a, Object b) { + protected boolean lessThan(final Object a, final Object b) { final Entry hitA = (Entry) a; final Entry hitB = (Entry) b; diff --git a/src/test/org/apache/lucene/search/TestElevationComparator.java b/src/test/org/apache/lucene/search/TestElevationComparator.java new file mode 100644 index 00000000000..2c41b1a8265 --- /dev/null +++ b/src/test/org/apache/lucene/search/TestElevationComparator.java @@ -0,0 +1,181 @@ +package org.apache.lucene.search; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import org.apache.lucene.analysis.WhitespaceAnalyzer; +import org.apache.lucene.document.Document; +import org.apache.lucene.document.Field; +import org.apache.lucene.index.*; +import org.apache.lucene.search.*; +import org.apache.lucene.store.*; +import org.apache.lucene.util.LuceneTestCase; + +import java.io.IOException; +import java.util.HashMap; +import java.util.Map; + +public class TestElevationComparator extends LuceneTestCase { + + private final Map/**/ priority = new HashMap/**/(); + + //@Test + public void testSorting() throws Throwable { + Directory directory = new MockRAMDirectory(); + IndexWriter writer = new IndexWriter(directory, new WhitespaceAnalyzer(), true, IndexWriter.MaxFieldLength.LIMITED); + writer.setMaxBufferedDocs(2); + writer.setMergeFactor(1000); + writer.addDocument(adoc(new String[] {"id", "a", "title", "ipod", "str_s", "a"})); + writer.addDocument(adoc(new String[] {"id", "b", "title", "ipod ipod", "str_s", "b"})); + writer.addDocument(adoc(new String[] {"id", "c", "title", "ipod ipod ipod", "str_s","c"})); + writer.addDocument(adoc(new String[] {"id", "x", "title", "boosted", "str_s", "x"})); + writer.addDocument(adoc(new String[] {"id", "y", "title", "boosted boosted", "str_s","y"})); + writer.addDocument(adoc(new String[] {"id", "z", "title", "boosted boosted boosted","str_s", "z"})); + + IndexReader r = writer.getReader(); + writer.close(); + + IndexSearcher searcher = new IndexSearcher(r); + + runTest(searcher, true); + runTest(searcher, false); + + searcher.close(); + r.close(); + directory.close(); + } + + private void runTest(IndexSearcher searcher, boolean reversed) throws Throwable { + + BooleanQuery newq = new BooleanQuery(false); + TermQuery query = new TermQuery(new Term("title", "ipod")); + + newq.add(query, BooleanClause.Occur.SHOULD); + newq.add(getElevatedQuery(new String[] {"id", "a", "id", "x"}), BooleanClause.Occur.SHOULD); + + Sort sort = new Sort(new SortField[]{ + new SortField("id", new ElevationComparatorSource(priority), false), + new SortField(null, SortField.SCORE, reversed) + }); + + TopDocsCollector topCollector = TopFieldCollector.create(sort, 50, false, true, true, true); + searcher.search(newq, null, topCollector); + + TopDocs topDocs = topCollector.topDocs(0, 10); + int nDocsReturned = topDocs.scoreDocs.length; + + int[] ids = new int[nDocsReturned]; + float[] scores = new float[nDocsReturned]; + Document[] documents = new Document[nDocsReturned]; + assertEquals(4, nDocsReturned); + + // 0 & 3 were elevated + assertEquals(0, topDocs.scoreDocs[0].doc); + assertEquals(3, topDocs.scoreDocs[1].doc); + + if (reversed) { + assertEquals(2, topDocs.scoreDocs[2].doc); + assertEquals(1, topDocs.scoreDocs[3].doc); + } else { + assertEquals(1, topDocs.scoreDocs[2].doc); + assertEquals(2, topDocs.scoreDocs[3].doc); + } + + /* + for (int i = 0; i < nDocsReturned; i++) { + ScoreDoc scoreDoc = topDocs.scoreDocs[i]; + ids[i] = scoreDoc.doc; + scores[i] = scoreDoc.score; + documents[i] = searcher.doc(ids[i]); + System.out.println("ids[i] = " + ids[i]); + System.out.println("documents[i] = " + documents[i]); + System.out.println("scores[i] = " + scores[i]); + } + */ + } + + private Query getElevatedQuery(String[] vals) { + BooleanQuery q = new BooleanQuery(false); + q.setBoost(0); + int max = (vals.length / 2) + 5; + for (int i = 0; i < vals.length - 1; i += 2) { + q.add(new TermQuery(new Term(vals[i], vals[i + 1])), BooleanClause.Occur.SHOULD); + priority.put(vals[i + 1], new Integer(max--)); + System.out.println(" pri doc=" + vals[i+1] + " pri=" + (1+max)); + } + return q; + } + + private Document adoc(String[] vals) { + Document doc = new Document(); + for (int i = 0; i < vals.length - 2; i += 2) { + doc.add(new Field(vals[i], vals[i + 1], Field.Store.YES, Field.Index.ANALYZED)); + } + return doc; + } +} + +class ElevationComparatorSource extends FieldComparatorSource { + private final Map/**/ priority; + + public ElevationComparatorSource(final Map/**/ boosts) { + this.priority = boosts; + } + + public FieldComparator newComparator(final String fieldname, final int numHits, int sortPos, boolean reversed) throws IOException { + return new FieldComparator() { + + FieldCache.StringIndex idIndex; + private final int[] values = new int[numHits]; + int bottomVal; + + public int compare(int slot1, int slot2) { + return values[slot2] - values[slot1]; // values will be small enough that there is no overflow concern + } + + public void setBottom(int slot) { + bottomVal = values[slot]; + } + + private int docVal(int doc) throws IOException { + String id = idIndex.lookup[idIndex.order[doc]]; + Integer prio = (Integer) priority.get(id); + return prio == null ? 0 : prio.intValue(); + } + + public int compareBottom(int doc) throws IOException { + return docVal(doc) - bottomVal; + } + + public void copy(int slot, int doc) throws IOException { + values[slot] = docVal(doc); + } + + public void setNextReader(IndexReader reader, int docBase, int numSlotsFull) throws IOException { + idIndex = FieldCache.DEFAULT.getStringIndex(reader, fieldname); + } + + public int sortType() { + return SortField.CUSTOM; + } + + public Comparable value(int slot) { + return new Integer(values[slot]); + } + }; + } +}