#35157 - Fix for SpanTermQuerys in a BooleanQuery. Patch from Reece Wilton.

git-svn-id: https://svn.apache.org/repos/asf/lucene/java/trunk@179679 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Erik Hatcher 2005-06-03 00:22:47 +00:00
parent 07cee0b287
commit e45c04c7ff
4 changed files with 282 additions and 0 deletions

View File

@ -168,6 +168,10 @@ Bug fixes
corrupted when the old version of a file was longer than the new.
Now any existing file is first removed. (Doug Cutting)
9. Fix BooleanQuery containing nested SpanTermQuery's, which previously
could return an incorrect number of hits.
(Reece Wilton via Erik Hatcher, Bug #35157)
Optimizations
1. Disk usage (peak requirements during indexing and optimization)

View File

@ -89,6 +89,11 @@ public class SpanTermQuery extends SpanQuery {
}
public boolean skipTo(int target) throws IOException {
// are we already at the correct position?
if (doc >= target) {
return true;
}
if (!positions.skipTo(target)) {
doc = Integer.MAX_VALUE;
return false;

View File

@ -0,0 +1,164 @@
package org.apache.lucene.search.spans;
/**
* Copyright 2005 The Apache Software Foundation
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import java.io.IOException;
import junit.framework.TestCase;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.Term;
import org.apache.lucene.search.BooleanClause;
import org.apache.lucene.search.BooleanQuery;
import org.apache.lucene.search.Hits;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.RAMDirectory;
/*******************************************************************************
* Tests the span query bug in Lucene. It demonstrates that SpanTermQuerys don't
* work correctly in a BooleanQuery.
*
* @author Reece Wilton
*/
public class TestSpansAdvanced extends TestCase {
// location to the index
protected Directory mDirectory;;
// field names in the index
private final static String FIELD_ID = "ID";
protected final static String FIELD_TEXT = "TEXT";
/**
* Initializes the tests by adding 4 identical documents to the index.
*/
protected void setUp() throws Exception {
super.setUp();
// create test index
mDirectory = new RAMDirectory();
final IndexWriter writer = new IndexWriter(mDirectory, new StandardAnalyzer(), true);
addDocument(writer, "1", "I think it should work.");
addDocument(writer, "2", "I think it should work.");
addDocument(writer, "3", "I think it should work.");
addDocument(writer, "4", "I think it should work.");
writer.close();
}
protected void tearDown() throws Exception {
mDirectory.close();
mDirectory = null;
}
/**
* Adds the document to the index.
*
* @param writer the Lucene index writer
* @param id the unique id of the document
* @param text the text of the document
* @throws IOException
*/
protected void addDocument(final IndexWriter writer, final String id, final String text) throws IOException {
final Document document = new Document();
document.add(new Field(FIELD_ID, id, Field.Store.YES, Field.Index.UN_TOKENIZED));
document.add(new Field(FIELD_TEXT, text, Field.Store.YES, Field.Index.TOKENIZED));
writer.addDocument(document);
}
/**
* Tests two span queries.
*
* ERROR: Lucene returns the incorrect number of results and the scoring for
* the results is incorrect.
*
* @throws IOException
*/
public void testBooleanQueryWithSpanQueries() throws IOException {
doTestBooleanQueryWithSpanQueries(0.3884282f);
}
/**
* Tests two span queries.
*
* ERROR: Lucene returns the incorrect number of results and the scoring for
* the results is incorrect.
*
* @throws IOException
*/
protected void doTestBooleanQueryWithSpanQueries(final float expectedScore) throws IOException {
final Query spanQuery = new SpanTermQuery(new Term(FIELD_TEXT, "work"));
final BooleanQuery query = new BooleanQuery();
query.add(spanQuery, BooleanClause.Occur.MUST);
query.add(spanQuery, BooleanClause.Occur.MUST);
final Hits hits = executeQuery(query);
final String[] expectedIds = new String[] { "1", "2", "3", "4" };
final float[] expectedScores = new float[] { expectedScore, expectedScore, expectedScore, expectedScore };
assertHits(hits, "two span queries", expectedIds, expectedScores);
}
/**
* Executes the query and throws an assertion if the results don't match the
* expectedHits.
*
* @param query the query to execute
* @throws IOException
*/
protected Hits executeQuery(final Query query) throws IOException {
final IndexSearcher searcher = new IndexSearcher(mDirectory);
final Hits hits = searcher.search(query);
searcher.close();
return hits;
}
/**
* Checks to see if the hits are what we expected.
*
* @param hits the search results
* @param description the description of the search
* @param expectedIds the expected document ids of the hits
* @param expectedScores the expected scores of the hits
*
* @throws IOException
*/
protected void assertHits(final Hits hits, final String description, final String[] expectedIds,
final float[] expectedScores) throws IOException {
// display the hits
System.out.println(hits.length() + " hits for search: \"" + description + '\"');
for (int i = 0; i < hits.length(); i++) {
System.out.println(" " + FIELD_ID + ':' + hits.doc(i).get(FIELD_ID) + " (score:" + hits.score(i) + ')');
}
// did we get the hits we expected
assertEquals(expectedIds.length, hits.length());
for (int i = 0; i < hits.length(); i++) {
assertTrue(expectedIds[i].equals(hits.doc(i).get(FIELD_ID)));
assertEquals(expectedScores[i], hits.score(i), 0);
}
}
}

View File

@ -0,0 +1,109 @@
package org.apache.lucene.search.spans;
/**
* Copyright 2005 The Apache Software Foundation
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import java.io.IOException;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.Term;
import org.apache.lucene.search.BooleanClause;
import org.apache.lucene.search.BooleanQuery;
import org.apache.lucene.search.Hits;
import org.apache.lucene.search.Query;
/*******************************************************************************
* Some expanded tests to make sure my patch doesn't break other SpanTermQuery
* functionality.
*
* @author Reece Wilton
*/
public class TestSpansAdvanced2 extends TestSpansAdvanced {
/**
* Initializes the tests by adding documents to the index.
*/
protected void setUp() throws Exception {
super.setUp();
// create test index
final IndexWriter writer = new IndexWriter(mDirectory, new StandardAnalyzer(), false);
addDocument(writer, "A", "Should we, could we, would we?");
addDocument(writer, "B", "It should. Should it?");
addDocument(writer, "C", "It shouldn't.");
addDocument(writer, "D", "Should we, should we, should we.");
writer.close();
}
/**
* Verifies that the index has the correct number of documents.
*
* @throws Exception
*/
public void testVerifyIndex() throws Exception {
final IndexReader reader = IndexReader.open(mDirectory);
assertEquals(8, reader.numDocs());
reader.close();
}
/**
* Tests a single span query that matches multiple documents.
*
* @throws IOException
*/
public void testSingleSpanQuery() throws IOException {
final Query spanQuery = new SpanTermQuery(new Term(FIELD_TEXT, "should"));
final Hits hits = executeQuery(spanQuery);
final String[] expectedIds = new String[] { "B", "D", "1", "2", "3", "4", "A" };
final float[] expectedScores = new float[] { 0.625f, 0.45927936f, 0.35355338f, 0.35355338f, 0.35355338f,
0.35355338f, 0.26516503f, };
assertHits(hits, "single span query", expectedIds, expectedScores);
}
/**
* Tests a single span query that matches multiple documents.
*
* @throws IOException
*/
public void testMultipleDifferentSpanQueries() throws IOException {
final Query spanQuery1 = new SpanTermQuery(new Term(FIELD_TEXT, "should"));
final Query spanQuery2 = new SpanTermQuery(new Term(FIELD_TEXT, "we"));
final BooleanQuery query = new BooleanQuery();
query.add(spanQuery1, BooleanClause.Occur.MUST);
query.add(spanQuery2, BooleanClause.Occur.MUST);
final Hits hits = executeQuery(query);
final String[] expectedIds = new String[] { "A", "D" };
final float[] expectedScores = new float[] { 0.93163157f, 0.20698164f };
assertHits(hits, "multiple different span queries", expectedIds, expectedScores);
}
/**
* Tests two span queries.
*
* ERROR: Lucene returns the incorrect number of results and the scoring for
* the results is incorrect.
*
* @throws IOException
*/
public void testBooleanQueryWithSpanQueries() throws IOException {
doTestBooleanQueryWithSpanQueries(0.73500174f);
}
}