From 52cda0c4ef17f4150778809c706c97012583ec14 Mon Sep 17 00:00:00 2001 From: Daniel Naber Date: Thu, 23 Jun 2005 20:43:47 +0000 Subject: [PATCH] Most of Wolf Siberski's patch to fix #35241/#31841: MulitSearcher failed on WildcardQuery etc. -this has not been committed: removal of mergeBooleanQueries(); change to BooleanQuery.equals() -added: a test case that is commented out which shows strange queries like "multi* multi* foo" don't work properly git-svn-id: https://svn.apache.org/repos/asf/lucene/java/trunk@201453 13f79535-47bb-0310-9956-ffa450edef68 --- .../apache/lucene/search/BooleanQuery.java | 5 - .../apache/lucene/search/MultiTermQuery.java | 5 - .../org/apache/lucene/search/PrefixQuery.java | 4 - src/java/org/apache/lucene/search/Query.java | 47 +++++- .../org/apache/lucene/search/RangeQuery.java | 4 - .../search/TestMultiSearcherRanking.java | 153 ++++++++++++------ 6 files changed, 145 insertions(+), 73 deletions(-) diff --git a/src/java/org/apache/lucene/search/BooleanQuery.java b/src/java/org/apache/lucene/search/BooleanQuery.java index 0a2fc7dc8a3..417ae3d8c00 100644 --- a/src/java/org/apache/lucene/search/BooleanQuery.java +++ b/src/java/org/apache/lucene/search/BooleanQuery.java @@ -371,11 +371,6 @@ public class BooleanQuery extends Query { } } - // inherit javadoc - public Query combine(Query[] queries) { - return Query.mergeBooleanQueries(queries); - } - public Object clone() { BooleanQuery clone = (BooleanQuery)super.clone(); clone.clauses = (Vector)this.clauses.clone(); diff --git a/src/java/org/apache/lucene/search/MultiTermQuery.java b/src/java/org/apache/lucene/search/MultiTermQuery.java index 616c40c7f32..a884d5dcb8e 100644 --- a/src/java/org/apache/lucene/search/MultiTermQuery.java +++ b/src/java/org/apache/lucene/search/MultiTermQuery.java @@ -67,11 +67,6 @@ public abstract class MultiTermQuery extends Query { return query; } - public Query combine(Query[] queries) { - return Query.mergeBooleanQueries(queries); - } - - /** Prints a user-readable version of this query. */ public String toString(String field) { StringBuffer buffer = new StringBuffer(); diff --git a/src/java/org/apache/lucene/search/PrefixQuery.java b/src/java/org/apache/lucene/search/PrefixQuery.java index cfa5e0bd6db..4c2d0e5be0e 100644 --- a/src/java/org/apache/lucene/search/PrefixQuery.java +++ b/src/java/org/apache/lucene/search/PrefixQuery.java @@ -60,10 +60,6 @@ public class PrefixQuery extends Query { return query; } - public Query combine(Query[] queries) { - return Query.mergeBooleanQueries(queries); - } - /** Prints a user-readable version of this query. */ public String toString(String field) { StringBuffer buffer = new StringBuffer(); diff --git a/src/java/org/apache/lucene/search/Query.java b/src/java/org/apache/lucene/search/Query.java index 4a8365ad246..1249e3d31a0 100644 --- a/src/java/org/apache/lucene/search/Query.java +++ b/src/java/org/apache/lucene/search/Query.java @@ -104,16 +104,47 @@ public abstract class Query implements java.io.Serializable, Cloneable { /** Expert: called when re-writing queries under MultiSearcher. * - *

Only implemented by derived queries, with no - * {@link #createWeight(Searcher)} implementation. - */ + * Create a single query suitable for use by all subsearchers (in 1-1 + * correspondence with queries). This is an optimization of the OR of + * all queries. We handle the common optimization cases of equal + * queries and overlapping clauses of boolean OR queries (as generated + * by MultiTermQuery.rewrite() and RangeQuery.rewrite()). + * Be careful overriding this method as queries[0] determines which + * method will be called and is not necessarily of the same type as + * the other queries. + */ public Query combine(Query[] queries) { - for (int i = 0; i < queries.length; i++) { - if (!this.equals(queries[i])) { - throw new IllegalArgumentException(); - } + HashSet uniques = new HashSet(); + for (int i = 0; i < queries.length; i++) { + Query query = queries[i]; + BooleanClause[] clauses = null; + // check if we can split the query into clauses + boolean splittable = (query instanceof BooleanQuery); + if(splittable){ + BooleanQuery bq = (BooleanQuery) query; + splittable = bq.isCoordDisabled(); + clauses = bq.getClauses(); + for (int j = 0; splittable && j < clauses.length; j++) { + splittable = (clauses[j].getOccur() == BooleanClause.Occur.SHOULD); } - return this; + } + if(splittable){ + for (int j = 0; j < clauses.length; j++) { + uniques.add(clauses[j].getQuery()); + } + } else { + uniques.add(query); + } + } + // optimization: if we have just one query, just return it + if(uniques.size() == 1){ + return (Query)uniques.iterator().next(); + } + Iterator it = uniques.iterator(); + BooleanQuery result = new BooleanQuery(true); + while (it.hasNext()) + result.add((Query) it.next(), BooleanClause.Occur.SHOULD); + return result; } /** diff --git a/src/java/org/apache/lucene/search/RangeQuery.java b/src/java/org/apache/lucene/search/RangeQuery.java index ed822af57d2..cd40e3ce84a 100644 --- a/src/java/org/apache/lucene/search/RangeQuery.java +++ b/src/java/org/apache/lucene/search/RangeQuery.java @@ -105,10 +105,6 @@ public class RangeQuery extends Query return query; } - public Query combine(Query[] queries) { - return Query.mergeBooleanQueries(queries); - } - /** Returns the field name for this query */ public String getField() { return (lowerTerm != null ? lowerTerm.field() : upperTerm.field()); diff --git a/src/test/org/apache/lucene/search/TestMultiSearcherRanking.java b/src/test/org/apache/lucene/search/TestMultiSearcherRanking.java index 957259eb87f..d21b495c67c 100644 --- a/src/test/org/apache/lucene/search/TestMultiSearcherRanking.java +++ b/src/test/org/apache/lucene/search/TestMultiSearcherRanking.java @@ -1,7 +1,7 @@ package org.apache.lucene.search; /** - * Copyright 2005 The Apache Software Foundation + * Copyright 2004 The Apache Software Foundation * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -24,7 +24,8 @@ import org.apache.lucene.analysis.standard.StandardAnalyzer; import org.apache.lucene.document.Document; import org.apache.lucene.document.Field; import org.apache.lucene.index.IndexWriter; -import org.apache.lucene.index.Term; +import org.apache.lucene.queryParser.ParseException; +import org.apache.lucene.queryParser.QueryParser; import org.apache.lucene.store.Directory; import org.apache.lucene.store.RAMDirectory; @@ -34,70 +35,128 @@ import org.apache.lucene.store.RAMDirectory; * * @version $Id: TestMultiSearcher.java 150492 2004-09-06 22:01:49Z dnaber $ */ -public class TestMultiSearcherRanking extends TestCase -{ - - private final Query query = new TermQuery(new Term("body", "three")); +public class TestMultiSearcherRanking extends TestCase { - public void testMultiSearcherRanking() throws IOException { - Hits multiSearcherHits = multi(); - Hits singleSearcherHits = single(); - assertEquals(multiSearcherHits.length(), singleSearcherHits.length()); - for(int i = 0; i < multiSearcherHits.length(); i++) { - assertEquals(multiSearcherHits.score(i), singleSearcherHits.score(i), 0.001f); - Document docMulti = multiSearcherHits.doc(i); - Document docSingle = singleSearcherHits.doc(i); - assertEquals(docMulti.get("body"), docSingle.get("body")); - } + private final boolean verbose = false; // set to true to output hits + private final String FIELD_NAME = "body"; + private Searcher multiSearcher; + private Searcher singleSearcher; + + public void testOneTermQuery() throws IOException, ParseException { + checkQuery("three"); } - // Collection 1+2 searched with MultiSearcher: - private Hits multi() throws IOException { - Directory d1 = new RAMDirectory(); - IndexWriter iw = new IndexWriter(d1, new StandardAnalyzer(), true); - addCollection1(iw); - iw.close(); + public void testTwoTermQuery() throws IOException, ParseException { + checkQuery("three foo"); + } + public void testPrefixQuery() throws IOException, ParseException { + checkQuery("multi*"); + } + + public void testFuzzyQuery() throws IOException, ParseException { + checkQuery("multiThree~"); + } + + public void testRangeQuery() throws IOException, ParseException { + checkQuery("{multiA TO multiP}"); + } + + public void testMultiPhraseQuery() throws IOException, ParseException { + checkQuery("\"blueberry pi*\""); + } + + public void testNoMatchQuery() throws IOException, ParseException { + checkQuery("+three +nomatch"); + } + + /* + public void testTermRepeatedQuery() throws IOException, ParseException { + // TODO: this corner case yields different results. + checkQuery("multi* multi* foo"); + } + */ + + /** + * checks if a query yields the same result when executed on + * a single IndexSearcher containing all documents and on a + * MultiSearcher aggregating sub-searchers + * @param queryStr the query to check. + * @throws IOException + * @throws ParseException + */ + private void checkQuery(String queryStr) throws IOException, ParseException { + // check result hit ranking + if(verbose) System.out.println("Query: " + queryStr); + Query query = QueryParser.parse(queryStr, FIELD_NAME, + new StandardAnalyzer()); + Hits multiSearcherHits = multiSearcher.search(query); + Hits singleSearcherHits = singleSearcher.search(query); + assertEquals(multiSearcherHits.length(), singleSearcherHits.length()); + for (int i = 0; i < multiSearcherHits.length(); i++) { + Document docMulti = multiSearcherHits.doc(i); + Document docSingle = singleSearcherHits.doc(i); + if(verbose) System.out.println("Multi: " + docMulti.get(FIELD_NAME) + " score=" + + multiSearcherHits.score(i)); + if(verbose) System.out.println("Single: " + docSingle.get(FIELD_NAME) + " score=" + + singleSearcherHits.score(i)); + assertEquals(multiSearcherHits.score(i), singleSearcherHits.score(i), + 0.001f); + assertEquals(docMulti.get(FIELD_NAME), docSingle.get(FIELD_NAME)); + } + if(verbose) System.out.println(); + } + + /** + * initializes multiSearcher and singleSearcher with the same document set + */ + protected void setUp() throws Exception { + // create MultiSearcher from two seperate searchers + Directory d1 = new RAMDirectory(); + IndexWriter iw1 = new IndexWriter(d1, new StandardAnalyzer(), true); + addCollection1(iw1); + iw1.close(); Directory d2 = new RAMDirectory(); - iw = new IndexWriter(d2, new StandardAnalyzer(), true); - addCollection2(iw); - iw.close(); - + IndexWriter iw2 = new IndexWriter(d2, new StandardAnalyzer(), true); + addCollection2(iw2); + iw2.close(); + Searchable[] s = new Searchable[2]; s[0] = new IndexSearcher(d1); s[1] = new IndexSearcher(d2); - MultiSearcher ms = new MultiSearcher(s); - Hits hits = ms.search(query); - return hits; - } - - // Collection 1+2 indexed together: - private Hits single() throws IOException { + multiSearcher = new MultiSearcher(s); + + // create IndexSearcher which contains all documents Directory d = new RAMDirectory(); IndexWriter iw = new IndexWriter(d, new StandardAnalyzer(), true); addCollection1(iw); addCollection2(iw); iw.close(); - IndexSearcher is = new IndexSearcher(d); - Hits hits = is.search(query); - return hits; - } - - private void addCollection1(IndexWriter iw) throws IOException { - add("one blah three", iw); - add("one foo three", iw); - add("one foobar three", iw); + singleSearcher = new IndexSearcher(d); } - private void addCollection2(IndexWriter iw) throws IOException { - add("two blah three", iw); - add("two foo xxx", iw); - add("two foobar xxx", iw); + private void addCollection1(IndexWriter iw) throws IOException { + add("one blah three", iw); + add("one foo three multiOne", iw); + add("one foobar three multiThree", iw); + add("blueberry pie", iw); + add("blueberry strudel", iw); + add("blueberry pizza", iw); } + private void addCollection2(IndexWriter iw) throws IOException { + add("two blah three", iw); + add("two foo xxx multiTwo", iw); + add("two foobar xxx multiThreee", iw); + add("blueberry chewing gum", iw); + add("bluebird pizza", iw); + add("bluebird foobar pizza", iw); + add("piccadilly circus", iw); + } + private void add(String value, IndexWriter iw) throws IOException { Document d = new Document(); - d.add(new Field("body", value, Field.Store.YES, Field.Index.TOKENIZED)); + d.add(new Field(FIELD_NAME, value, Field.Store.YES, Field.Index.TOKENIZED)); iw.addDocument(d); }