From fd35f2609a324d164e4b0f0f76f99e3aefe69815 Mon Sep 17 00:00:00 2001 From: Michael McCandless Date: Tue, 28 Jan 2014 16:59:06 +0000 Subject: [PATCH] LUCENE-5409: fix ToParentBlockJoinQuery.rewrite to correctly preserve the origChildQuery after more than one iteration of rewrite git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1562124 13f79535-47bb-0310-9956-ffa450edef68 --- lucene/CHANGES.txt | 4 ++ .../apache/lucene/search/BooleanQuery.java | 37 ++++++++------ .../lucene/search/TestSubScorerFreqs.java | 9 ++-- .../search/join/ToParentBlockJoinQuery.java | 2 +- .../lucene/search/join/TestBlockJoin.java | 50 ++++++++++++++++++- .../apache/lucene/search/AssertingScorer.java | 6 ++- 6 files changed, 87 insertions(+), 21 deletions(-) diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt index b3f3aafa2c5..d84befd1267 100644 --- a/lucene/CHANGES.txt +++ b/lucene/CHANGES.txt @@ -207,6 +207,10 @@ Bug fixes * LUCENE-5415: SpanMultiTermQueryWrapper didn't handle its boost in hashcode/equals/tostring/rewrite. (Robert Muir) +* LUCENE-5409: ToParentBlockJoinCollector.getTopGroups would fail to + return any groups when the joined query required more than one + rewrite step (Peng Cheng via Mike McCandless) + API Changes * LUCENE-5339: The facet module was simplified/reworked to make the diff --git a/lucene/core/src/java/org/apache/lucene/search/BooleanQuery.java b/lucene/core/src/java/org/apache/lucene/search/BooleanQuery.java index 6b6117d67a0..e6dfa3b50b7 100644 --- a/lucene/core/src/java/org/apache/lucene/search/BooleanQuery.java +++ b/lucene/core/src/java/org/apache/lucene/search/BooleanQuery.java @@ -62,8 +62,9 @@ public class BooleanQuery extends Query implements Iterable { * Default value is 1024. */ public static void setMaxClauseCount(int maxClauseCount) { - if (maxClauseCount < 1) + if (maxClauseCount < 1) { throw new IllegalArgumentException("maxClauseCount must be >= 1"); + } BooleanQuery.maxClauseCount = maxClauseCount; } @@ -138,8 +139,9 @@ public class BooleanQuery extends Query implements Iterable { * @see #getMaxClauseCount() */ public void add(BooleanClause clause) { - if (clauses.size() >= maxClauseCount) + if (clauses.size() >= maxClauseCount) { throw new TooManyClauses(); + } clauses.add(clause); } @@ -182,7 +184,9 @@ public class BooleanQuery extends Query implements Iterable { BooleanClause c = clauses.get(i); Weight w = c.getQuery().createWeight(searcher); weights.add(w); - if (!c.isProhibited()) maxCoord++; + if (!c.isProhibited()) { + maxCoord++; + } } } @@ -195,9 +199,10 @@ public class BooleanQuery extends Query implements Iterable { for (int i = 0 ; i < weights.size(); i++) { // call sumOfSquaredWeights for all clauses in case of side effects float s = weights.get(i).getValueForNormalization(); // sum sub weights - if (!clauses.get(i).isProhibited()) + if (!clauses.get(i).isProhibited()) { // only add to sum for non-prohibited clauses sum += s; + } } sum *= getBoost() * getBoost(); // boost each sub-weight @@ -258,8 +263,9 @@ public class BooleanQuery extends Query implements Iterable { sumExpl.addDetail(r); fail = true; } - if (c.getOccur() == Occur.SHOULD) + if (c.getOccur() == Occur.SHOULD) { shouldMatchCount++; + } } else if (c.isRequired()) { Explanation r = new Explanation(0.0f, "no match on required clause (" + c.getQuery().toString() + ")"); r.addDetail(e); @@ -422,8 +428,9 @@ public class BooleanQuery extends Query implements Iterable { } if (clone != null) { return clone; // some clauses rewrote - } else + } else { return this; // no clauses rewrote + } } // inherit javadoc @@ -447,17 +454,18 @@ public class BooleanQuery extends Query implements Iterable { @Override public String toString(String field) { StringBuilder buffer = new StringBuilder(); - boolean needParens=(getBoost() != 1.0) || (getMinimumNumberShouldMatch()>0) ; + boolean needParens= getBoost() != 1.0 || getMinimumNumberShouldMatch() > 0; if (needParens) { buffer.append("("); } for (int i = 0 ; i < clauses.size(); i++) { BooleanClause c = clauses.get(i); - if (c.isProhibited()) + if (c.isProhibited()) { buffer.append("-"); - else if (c.isRequired()) + } else if (c.isRequired()) { buffer.append("+"); + } Query subQuery = c.getQuery(); if (subQuery != null) { @@ -472,8 +480,9 @@ public class BooleanQuery extends Query implements Iterable { buffer.append("null"); } - if (i != clauses.size()-1) + if (i != clauses.size()-1) { buffer.append(" "); + } } if (needParens) { @@ -485,8 +494,7 @@ public class BooleanQuery extends Query implements Iterable { buffer.append(getMinimumNumberShouldMatch()); } - if (getBoost() != 1.0f) - { + if (getBoost() != 1.0f) { buffer.append(ToStringUtils.boost(getBoost())); } @@ -496,10 +504,11 @@ public class BooleanQuery extends Query implements Iterable { /** Returns true iff o is equal to this. */ @Override public boolean equals(Object o) { - if (!(o instanceof BooleanQuery)) + if (!(o instanceof BooleanQuery)) { return false; + } BooleanQuery other = (BooleanQuery)o; - return (this.getBoost() == other.getBoost()) + return this.getBoost() == other.getBoost() && this.clauses.equals(other.clauses) && this.getMinimumNumberShouldMatch() == other.getMinimumNumberShouldMatch() && this.disableCoord == other.disableCoord; diff --git a/lucene/core/src/test/org/apache/lucene/search/TestSubScorerFreqs.java b/lucene/core/src/test/org/apache/lucene/search/TestSubScorerFreqs.java index a51f118ce18..f82caa8038a 100644 --- a/lucene/core/src/test/org/apache/lucene/search/TestSubScorerFreqs.java +++ b/lucene/core/src/test/org/apache/lucene/search/TestSubScorerFreqs.java @@ -92,7 +92,7 @@ public class TestSubScorerFreqs extends LuceneTestCase { public void setSubScorers(Scorer scorer, String relationship) { for (ChildScorer child : scorer.getChildren()) { - if (relationships.contains(child.relationship)) { + if (scorer instanceof AssertingScorer || relationships.contains(child.relationship)) { setSubScorers(child.child, child.relationship); } } @@ -180,16 +180,17 @@ public class TestSubScorerFreqs extends LuceneTestCase { assertEquals(includeOptional ? 5 : 4, doc0.size()); assertEquals(1.0F, doc0.get(aQuery), FLOAT_TOLERANCE); assertEquals(4.0F, doc0.get(dQuery), FLOAT_TOLERANCE); - if (includeOptional) + if (includeOptional) { assertEquals(3.0F, doc0.get(cQuery), FLOAT_TOLERANCE); + } Map doc1 = c.docCounts.get(++i); assertEquals(includeOptional ? 5 : 4, doc1.size()); assertEquals(1.0F, doc1.get(aQuery), FLOAT_TOLERANCE); assertEquals(1.0F, doc1.get(dQuery), FLOAT_TOLERANCE); - if (includeOptional) + if (includeOptional) { assertEquals(1.0F, doc1.get(cQuery), FLOAT_TOLERANCE); - + } } } } diff --git a/lucene/join/src/java/org/apache/lucene/search/join/ToParentBlockJoinQuery.java b/lucene/join/src/java/org/apache/lucene/search/join/ToParentBlockJoinQuery.java index cd27fa0e4a8..e5fe46dc65a 100644 --- a/lucene/join/src/java/org/apache/lucene/search/join/ToParentBlockJoinQuery.java +++ b/lucene/join/src/java/org/apache/lucene/search/join/ToParentBlockJoinQuery.java @@ -453,7 +453,7 @@ public class ToParentBlockJoinQuery extends Query { public Query rewrite(IndexReader reader) throws IOException { final Query childRewrite = childQuery.rewrite(reader); if (childRewrite != childQuery) { - Query rewritten = new ToParentBlockJoinQuery(childQuery, + Query rewritten = new ToParentBlockJoinQuery(origChildQuery, childRewrite, parentsFilter, scoreMode); diff --git a/lucene/join/src/test/org/apache/lucene/search/join/TestBlockJoin.java b/lucene/join/src/test/org/apache/lucene/search/join/TestBlockJoin.java index 76f25f4df89..b788ea0f2bc 100644 --- a/lucene/join/src/test/org/apache/lucene/search/join/TestBlockJoin.java +++ b/lucene/join/src/test/org/apache/lucene/search/join/TestBlockJoin.java @@ -201,7 +201,7 @@ public class TestBlockJoin extends LuceneTestCase { childDoc = s.doc(hits.scoreDocs[0].doc); //System.out.println("CHILD = " + childDoc + " docID=" + hits.scoreDocs[0].doc); assertEquals("java", childDoc.get("skill")); - assertEquals(2007, (childDoc.getField("year")).numericValue()); + assertEquals(2007, childDoc.getField("year").numericValue()); assertEquals("Lisa", getParentDoc(r, parentsFilter, hits.scoreDocs[0].doc).get("name")); // Test with filter on child docs: @@ -213,6 +213,54 @@ public class TestBlockJoin extends LuceneTestCase { dir.close(); } + public void testBugCausedByRewritingTwice() throws IOException { + final Directory dir = newDirectory(); + final RandomIndexWriter w = new RandomIndexWriter(random(), dir); + + final List docs = new ArrayList(); + + for (int i=0;i<10;i++) { + docs.clear(); + docs.add(makeJob("ruby", i)); + docs.add(makeJob("java", 2007)); + docs.add(makeResume("Frank", "United States")); + w.addDocuments(docs); + } + + IndexReader r = w.getReader(); + w.close(); + IndexSearcher s = newSearcher(r); + + MultiTermQuery qc = NumericRangeQuery.newIntRange("year", 2007, 2007, true, true); + // Hacky: this causes the query to need 2 rewrite + // iterations: + qc.setRewriteMethod(MultiTermQuery.CONSTANT_SCORE_BOOLEAN_QUERY_REWRITE); + + Filter parentsFilter = new FixedBitSetCachingWrapperFilter(new QueryWrapperFilter(new TermQuery(new Term("docType", "resume")))); + + int h1 = qc.hashCode(); + Query qw1 = qc.rewrite(r); + int h2 = qw1.hashCode(); + Query qw2 = qw1.rewrite(r); + int h3 = qw2.hashCode(); + + assertTrue(h1 != h2); + assertTrue(h2 != h3); + assertTrue(h3 != h1); + + ToParentBlockJoinQuery qp = new ToParentBlockJoinQuery(qc, parentsFilter, ScoreMode.Max); + ToParentBlockJoinCollector c = new ToParentBlockJoinCollector(Sort.RELEVANCE, 10, true, true); + + s.search(qp, c); + TopGroups groups = c.getTopGroups(qp, Sort.INDEXORDER, 0, 10, 0, true); + for (GroupDocs group : groups.groups) { + assertEquals(1, group.totalHits); + } + + r.close(); + dir.close(); + } + protected QueryWrapperFilter skill(String skill) { return new QueryWrapperFilter(new TermQuery(new Term("skill", skill))); } diff --git a/lucene/test-framework/src/java/org/apache/lucene/search/AssertingScorer.java b/lucene/test-framework/src/java/org/apache/lucene/search/AssertingScorer.java index 635703cb434..731d6def884 100644 --- a/lucene/test-framework/src/java/org/apache/lucene/search/AssertingScorer.java +++ b/lucene/test-framework/src/java/org/apache/lucene/search/AssertingScorer.java @@ -151,7 +151,11 @@ public class AssertingScorer extends Scorer { @Override public Collection getChildren() { - return in.getChildren(); + // We cannot hide that we hold a single child, else + // collectors (e.g. ToParentBlockJoinCollector) that + // need to walk the scorer tree will miss/skip the + // Scorer we wrap: + return Collections.singletonList(new ChildScorer(in, "SHOULD")); } @Override