diff --git a/CHANGES.txt b/CHANGES.txt index 69f214cc833..a13715398df 100644 --- a/CHANGES.txt +++ b/CHANGES.txt @@ -47,6 +47,13 @@ New Features (facet.offset, facet.limit), and explicit sorting (facet.sort). facet.zeros is now deprecated. (yonik) + 5. SOLR-80: Negative queries are now allowed everywhere. Negative queries + are generated and cached as their positive counterpart, speeding + generation and generally resulting in smaller sets to cache. + Set intersections in SolrIndexSearcher are more efficient, + starting with the smallest positive set, subtracting all negative + sets, then intersecting with all other positive sets. (yonik) + Changes in runtime behavior 1. Highlighting using DisMax will only pick up terms from the main user query, not boost or filter queries (klaas). diff --git a/src/java/org/apache/solr/search/QueryUtils.java b/src/java/org/apache/solr/search/QueryUtils.java new file mode 100755 index 00000000000..499176e7466 --- /dev/null +++ b/src/java/org/apache/solr/search/QueryUtils.java @@ -0,0 +1,121 @@ +package org.apache.solr.search; + +import org.apache.lucene.search.Query; +import org.apache.lucene.search.BooleanQuery; +import org.apache.lucene.search.BooleanClause; +import org.apache.lucene.search.MatchAllDocsQuery; + +import java.util.List; +import java.util.Arrays; + +/** + * @author yonik + * @version $Id$ + */ +public class QueryUtils { + + /** return true if this query has no positive components */ + static boolean isNegative(Query q) { + if (!(q instanceof BooleanQuery)) return false; + BooleanQuery bq = (BooleanQuery)q; + // TODO: use after next lucene update + //for (BooleanClause clause: (List )bq.clauses()) { + // if (bq.getClauses().size()==0) return false; + BooleanClause[] clauses = bq.getClauses(); + if (clauses.length==0) return false; + for (BooleanClause clause: clauses) { + if (!clause.isProhibited()) return false; + } + return true; + } + + /** Returns the original query if it was already a positive query, otherwise + * return the negative of the query (i.e., a positive query). + *

+ * Example: both id:10 and id:-10 will return id:10 + *

+ * The caller can tell the sign of the original by a reference comparison between + * the original and returned query. + * @param q + * @return + */ + static Query getAbs(Query q) { + if (!(q instanceof BooleanQuery)) return q; + BooleanQuery bq = (BooleanQuery)q; + + BooleanClause[] clauses = bq.getClauses(); + if (clauses.length==0) return q; + + + for (BooleanClause clause: clauses) { + if (!clause.isProhibited()) return q; + } + + if (clauses.length==1) { + // if only one clause, dispense with the wrapping BooleanQuery + Query negClause = clauses[0].getQuery(); + // we shouldn't need to worry about adjusting the boosts since the negative + // clause would have never been selected in a positive query, and hence would + // not contribute to a score. + return negClause; + } else { + BooleanQuery newBq = new BooleanQuery(bq.isCoordDisabled()); + newBq.setBoost(bq.getBoost()); + // ignore minNrShouldMatch... it doesn't make sense for a negative query + + // the inverse of -a -b is a OR b + for (BooleanClause clause: clauses) { + newBq.add(clause.getQuery(), BooleanClause.Occur.SHOULD); + } + return newBq; + } + + + /*** TODO: use after next lucene update + List clauses = (List )bq.clauses(); + // A single filtered out stopword currently causes a BooleanQuery with + // zero clauses. + if (clauses.size()==0) return q; + + for (BooleanClause clause: clauses) { + if (!clause.isProhibited()) return q; + } + + if (clauses.size()==1) { + // if only one clause, dispense with the wrapping BooleanQuery + Query negClause = clauses.get(0).getQuery(); + // we shouldn't need to worry about adjusting the boosts since the negative + // clause would have never been selected in a positive query, and hence the + // boost is meaningless. + return negClause; + } else { + BooleanQuery newBq = new BooleanQuery(bq.isCoordDisabled()); + newBq.setBoost(bq.getBoost()); + // ignore minNrShouldMatch... it doesn't make sense for a negative query + + // the inverse of -a -b is a b + for (BooleanClause clause: clauses) { + newBq.add(clause.getQuery(), BooleanClause.Occur.SHOULD); + } + return newBq; + } + ***/ + } + + /** Makes negative queries suitable for querying by + * lucene. + */ + static Query makeQueryable(Query q) { + return isNegative(q) ? fixNegativeQuery(q) : q; + } + + /** Fixes a negative query by adding a MatchAllDocs query clause. + * The query passed in *must* be a negative query. + */ + static Query fixNegativeQuery(Query q) { + BooleanQuery newBq = (BooleanQuery)q.clone(); + newBq.add(new MatchAllDocsQuery(), BooleanClause.Occur.MUST); + return newBq; + } + +} diff --git a/src/java/org/apache/solr/search/SolrIndexSearcher.java b/src/java/org/apache/solr/search/SolrIndexSearcher.java index f402d8ad46d..e7456472289 100644 --- a/src/java/org/apache/solr/search/SolrIndexSearcher.java +++ b/src/java/org/apache/solr/search/SolrIndexSearcher.java @@ -469,44 +469,98 @@ public class SolrIndexSearcher extends Searcher implements SolrInfoMBean { * Returns the set of document ids matching a query. * This method is cache-aware and attempts to retrieve the answer from the cache if possible. * If the answer was not cached, it may have been inserted into the cache as a result of this call. + * This method can handle negative queries. *

* The DocSet returned should not be modified. */ public DocSet getDocSet(Query query) throws IOException { - DocSet answer; - if (filterCache != null) { - answer = (DocSet)filterCache.get(query); - if (answer!=null) return answer; - } - - answer = getDocSetNC(query, null); + // Get the absolute value (positive version) of this query. If we + // get back the same reference, we know it's positive. + Query absQ = QueryUtils.getAbs(query); + boolean positive = query==absQ; if (filterCache != null) { - filterCache.put(query, answer); - } - - return answer; - } - - - // TODO: do a more efficient version that starts with the - // smallest DocSet and drives the intersection off that - // or implement an intersection() function that takes multiple - // DocSets (prob the better way) - protected DocSet getDocSet(List queries) throws IOException { - DocSet answer=null; - if (queries==null) return null; - for (Query q : queries) { - if (answer==null) { - answer = getDocSet(q); - } else { - answer = answer.intersection(getDocSet(q)); + DocSet absAnswer = (DocSet)filterCache.get(absQ); + if (absAnswer!=null) { + if (positive) return absAnswer; + else return getPositiveDocSet(matchAllDocsQuery).andNot(absAnswer); } } + + DocSet absAnswer = getDocSetNC(absQ, null); + DocSet answer = positive ? absAnswer : getPositiveDocSet(matchAllDocsQuery).andNot(absAnswer); + + if (filterCache != null) { + // cache negative queries as positive + filterCache.put(absQ, absAnswer); + } + + return answer; + } + + // only handle positive (non negative) queries + DocSet getPositiveDocSet(Query q) throws IOException { + DocSet answer; + if (filterCache != null) { + answer = (DocSet)filterCache.get(q); + if (answer!=null) return answer; + } + answer = getDocSetNC(q,null); + if (filterCache != null) filterCache.put(q,answer); return answer; } + private static Query matchAllDocsQuery = new MatchAllDocsQuery(); + + + protected DocSet getDocSet(List queries) throws IOException { + if (queries==null) return null; + if (queries.size()==1) return getDocSet(queries.get(0)); + DocSet answer=null; + + boolean[] neg = new boolean[queries.size()]; + DocSet[] sets = new DocSet[queries.size()]; + + int smallestIndex = -1; + int smallestCount = Integer.MAX_VALUE; + for (int i=0; i