From 9adff2072a2bc4f0c23a7108816d83bfc8f3e3a0 Mon Sep 17 00:00:00 2001 From: Michael Busch Date: Thu, 7 Feb 2008 22:46:37 +0000 Subject: [PATCH] LUCENE-1169: Fixed bug in IndexSearcher.search(): searching with a filter might miss some hits because scorer.skipTo() is called without checking if the scorer is already at the right position. git-svn-id: https://svn.apache.org/repos/asf/lucene/java/trunk@619676 13f79535-47bb-0310-9956-ffa450edef68 --- CHANGES.txt | 6 ++ .../apache/lucene/search/IndexSearcher.java | 8 +- .../lucene/search/TestFilteredSearch.java | 93 +++++++++++++++++++ 3 files changed, 104 insertions(+), 3 deletions(-) create mode 100644 src/test/org/apache/lucene/search/TestFilteredSearch.java diff --git a/CHANGES.txt b/CHANGES.txt index 841c05667f3..4221dcdc1c1 100644 --- a/CHANGES.txt +++ b/CHANGES.txt @@ -49,6 +49,12 @@ Bug fixes documents have mixed term vectors (Suresh Guvvala via Mike McCandless). + 4. LUCENE-1169: Fixed bug in IndexSearcher.search(): searching with + a filter might miss some hits because scorer.skipTo() is called + without checking if the scorer is already at the right position. + scorer.skipTo(scorer.doc()) is not a NOOP, it behaves as + scorer.next(). (Eks Dev, Michael Busch) + New features 1. LUCENE-1137: Added Token.set/getFlags() accessors for passing more information about a Token through the analysis diff --git a/src/java/org/apache/lucene/search/IndexSearcher.java b/src/java/org/apache/lucene/search/IndexSearcher.java index cb9ef692355..eecd8b20a01 100644 --- a/src/java/org/apache/lucene/search/IndexSearcher.java +++ b/src/java/org/apache/lucene/search/IndexSearcher.java @@ -139,16 +139,18 @@ public class IndexSearcher extends Searcher { } DocIdSetIterator docIdSetIterator = filter.getDocIdSet(reader).iterator(); // CHECKME: use ConjunctionScorer here? - boolean more = docIdSetIterator.next(); + + boolean more = docIdSetIterator.next() && scorer.skipTo(docIdSetIterator.doc()); + while (more) { int filterDocId = docIdSetIterator.doc(); - if (! scorer.skipTo(filterDocId)) { + if (filterDocId > scorer.doc() && !scorer.skipTo(filterDocId)) { more = false; } else { int scorerDocId = scorer.doc(); if (scorerDocId == filterDocId) { // permitted by filter results.collect(scorerDocId, scorer.score()); - more = docIdSetIterator.skipTo(scorerDocId + 1); + more = docIdSetIterator.next(); } else { more = docIdSetIterator.skipTo(scorerDocId); } diff --git a/src/test/org/apache/lucene/search/TestFilteredSearch.java b/src/test/org/apache/lucene/search/TestFilteredSearch.java new file mode 100644 index 00000000000..534a9d3fb59 --- /dev/null +++ b/src/test/org/apache/lucene/search/TestFilteredSearch.java @@ -0,0 +1,93 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.lucene.search; + +import java.io.IOException; + +import junit.framework.TestCase; + +import org.apache.lucene.analysis.WhitespaceAnalyzer; +import org.apache.lucene.document.Document; +import org.apache.lucene.document.Field; +import org.apache.lucene.index.IndexReader; +import org.apache.lucene.index.IndexWriter; +import org.apache.lucene.index.Term; +import org.apache.lucene.store.RAMDirectory; +import org.apache.lucene.util.OpenBitSet; + + +/** + * + */ +public class TestFilteredSearch extends TestCase +{ + + public TestFilteredSearch(String name) { + super(name); + } + + private static final String FIELD = "category"; + + public void testFilteredSearch() { + RAMDirectory directory = new RAMDirectory(); + int[] filterBits = {1, 36}; + Filter filter = new SimpleDocIdSetFilter(filterBits); + + + try { + IndexWriter writer = new IndexWriter(directory, new WhitespaceAnalyzer(), true, IndexWriter.MaxFieldLength.LIMITED); + for (int i = 0; i < 60; i++) {//Simple docs + Document doc = new Document(); + doc.add(new Field(FIELD, Integer.toString(i), Field.Store.YES, Field.Index.UN_TOKENIZED)); + writer.addDocument(doc); + } + writer.close(); + + BooleanQuery booleanQuery = new BooleanQuery(); + booleanQuery.add(new TermQuery(new Term(FIELD, "36")), BooleanClause.Occur.SHOULD); + + + IndexSearcher indexSearcher = new IndexSearcher(directory); + org.apache.lucene.search.Hits hits = indexSearcher.search(booleanQuery, filter); + assertEquals("Number of matched documents", 1, hits.length()); + + } + catch (IOException e) { + fail(e.getMessage()); + } + + } + + + public static final class SimpleDocIdSetFilter extends Filter { + private OpenBitSet bits; + + public SimpleDocIdSetFilter(int[] docs) { + bits = new OpenBitSet(); + for(int i = 0; i < docs.length; i++){ + bits.set(docs[i]); + } + + } + + public DocIdSet getDocIdSet(IndexReader reader) { + return bits; + } + } + +}