From d700b149a5597b4b8cdd10a5850d6050b6a82107 Mon Sep 17 00:00:00 2001 From: Martijn van Groningen Date: Fri, 4 Mar 2016 18:23:00 +0100 Subject: [PATCH] LUCENE-7065: Fix the explain for the global ordinals join query. Before the explain would also indicate that non matching documents would match. On top of that with score mode average, the explain would fail with a NPE. --- lucene/CHANGES.txt | 7 ++ .../search/join/GlobalOrdinalsQuery.java | 27 +++-- .../join/GlobalOrdinalsWithScoreQuery.java | 35 +++--- .../lucene/search/join/TestJoinUtil.java | 103 ++++++++++++++++++ 4 files changed, 151 insertions(+), 21 deletions(-) diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt index 71a715b80f3..b6d0bb3d973 100644 --- a/lucene/CHANGES.txt +++ b/lucene/CHANGES.txt @@ -155,6 +155,13 @@ Tests expression to encapsulate a statement that is expected to throw an exception. (Ryan Ernst) +Bug Fixes + +* LUCENE-7065: Fix the explain for the global ordinals join query. Before the + explain would also indicate that non matching documents would match. + On top of that with score mode average, the explain would fail with a NPE. + (Martijn van Groningen) + Other * LUCENE-7035: Upgrade icu4j to 56.1/unicode 8. (Robert Muir) diff --git a/lucene/join/src/java/org/apache/lucene/search/join/GlobalOrdinalsQuery.java b/lucene/join/src/java/org/apache/lucene/search/join/GlobalOrdinalsQuery.java index 5bd8c4c8368..f9b74d46932 100644 --- a/lucene/join/src/java/org/apache/lucene/search/join/GlobalOrdinalsQuery.java +++ b/lucene/join/src/java/org/apache/lucene/search/join/GlobalOrdinalsQuery.java @@ -112,14 +112,27 @@ final class GlobalOrdinalsQuery extends Query { @Override public Explanation explain(LeafReaderContext context, int doc) throws IOException { SortedDocValues values = DocValues.getSorted(context.reader(), joinField); - if (values != null) { - int segmentOrd = values.getOrd(doc); - if (segmentOrd != -1) { - BytesRef joinValue = values.lookupOrd(segmentOrd); - return Explanation.match(score(), "Score based on join value " + joinValue.utf8ToString()); - } + if (values == null) { + return Explanation.noMatch("Not a match"); } - return Explanation.noMatch("Not a match"); + + int segmentOrd = values.getOrd(doc); + if (segmentOrd == -1) { + return Explanation.noMatch("Not a match"); + } + BytesRef joinValue = values.lookupOrd(segmentOrd); + + int ord; + if (globalOrds != null) { + ord = (int) globalOrds.getGlobalOrds(context.ord).get(segmentOrd); + } else { + ord = segmentOrd; + } + if (foundOrds.get(ord) == false) { + return Explanation.noMatch("Not a match, join value " + Term.toString(joinValue)); + } + + return Explanation.match(score(), "A match, join value " + Term.toString(joinValue)); } @Override diff --git a/lucene/join/src/java/org/apache/lucene/search/join/GlobalOrdinalsWithScoreQuery.java b/lucene/join/src/java/org/apache/lucene/search/join/GlobalOrdinalsWithScoreQuery.java index a85dfa33fb4..f9b2064c65f 100644 --- a/lucene/join/src/java/org/apache/lucene/search/join/GlobalOrdinalsWithScoreQuery.java +++ b/lucene/join/src/java/org/apache/lucene/search/join/GlobalOrdinalsWithScoreQuery.java @@ -120,21 +120,28 @@ final class GlobalOrdinalsWithScoreQuery extends Query { @Override public Explanation explain(LeafReaderContext context, int doc) throws IOException { SortedDocValues values = DocValues.getSorted(context.reader(), joinField); - if (values != null) { - int segmentOrd = values.getOrd(doc); - if (segmentOrd != -1) { - final float score; - if (globalOrds != null) { - long globalOrd = globalOrds.getGlobalOrds(context.ord).get(segmentOrd); - score = collector.score((int) globalOrd); - } else { - score = collector.score(segmentOrd); - } - BytesRef joinValue = values.lookupOrd(segmentOrd); - return Explanation.match(score, "Score based on join value " + joinValue.utf8ToString()); - } + if (values == null) { + return Explanation.noMatch("Not a match"); } - return Explanation.noMatch("Not a match"); + + int segmentOrd = values.getOrd(doc); + if (segmentOrd == -1) { + return Explanation.noMatch("Not a match"); + } + BytesRef joinValue = values.lookupOrd(segmentOrd); + + int ord; + if (globalOrds != null) { + ord = (int) globalOrds.getGlobalOrds(context.ord).get(segmentOrd); + } else { + ord = segmentOrd; + } + if (collector.match(ord) == false) { + return Explanation.noMatch("Not a match, join value " + Term.toString(joinValue)); + } + + float score = collector.score(ord); + return Explanation.match(score, "A match, join value " + Term.toString(joinValue)); } @Override diff --git a/lucene/join/src/test/org/apache/lucene/search/join/TestJoinUtil.java b/lucene/join/src/test/org/apache/lucene/search/join/TestJoinUtil.java index 9ab886bc54f..af507cdc2fb 100644 --- a/lucene/join/src/test/org/apache/lucene/search/join/TestJoinUtil.java +++ b/lucene/join/src/test/org/apache/lucene/search/join/TestJoinUtil.java @@ -48,6 +48,7 @@ import org.apache.lucene.index.BinaryDocValues; import org.apache.lucene.index.DirectoryReader; import org.apache.lucene.index.DocValues; import org.apache.lucene.index.IndexReader; +import org.apache.lucene.index.IndexWriter; import org.apache.lucene.index.LeafReader; import org.apache.lucene.index.LeafReaderContext; import org.apache.lucene.index.MultiDocValues; @@ -297,6 +298,108 @@ public class TestJoinUtil extends LuceneTestCase { dir.close(); } + public void testOrdinalsJoinExplainNoMatches() throws Exception { + final String idField = "id"; + final String productIdField = "productId"; + // A field indicating to what type a document belongs, which is then used to distinques between documents during joining. + final String typeField = "type"; + // A single sorted doc values field that holds the join values for all document types. + // Typically during indexing a schema will automatically create this field with the values + final String joinField = idField + productIdField; + + Directory dir = newDirectory(); + IndexWriter w = new IndexWriter( + dir, newIndexWriterConfig(new MockAnalyzer(random())).setMergePolicy(NoMergePolicy.INSTANCE) + ); + + // 0 + Document doc = new Document(); + doc.add(new TextField(idField, "1", Field.Store.NO)); + doc.add(new TextField(typeField, "product", Field.Store.NO)); + doc.add(new TextField("description", "random text", Field.Store.NO)); + doc.add(new TextField("name", "name1", Field.Store.NO)); + doc.add(new SortedDocValuesField(joinField, new BytesRef("1"))); + w.addDocument(doc); + + // 1 + doc = new Document(); + doc.add(new TextField(idField, "2", Field.Store.NO)); + doc.add(new TextField(typeField, "product", Field.Store.NO)); + doc.add(new TextField("description", "random text", Field.Store.NO)); + doc.add(new TextField("name", "name2", Field.Store.NO)); + doc.add(new SortedDocValuesField(joinField, new BytesRef("2"))); + w.addDocument(doc); + + // 2 + doc = new Document(); + doc.add(new TextField(productIdField, "1", Field.Store.NO)); + doc.add(new TextField(typeField, "price", Field.Store.NO)); + doc.add(new TextField("price", "10.0", Field.Store.NO)); + doc.add(new SortedDocValuesField(joinField, new BytesRef("1"))); + w.addDocument(doc); + + // 3 + doc = new Document(); + doc.add(new TextField(productIdField, "2", Field.Store.NO)); + doc.add(new TextField(typeField, "price", Field.Store.NO)); + doc.add(new TextField("price", "20.0", Field.Store.NO)); + doc.add(new SortedDocValuesField(joinField, new BytesRef("1"))); + w.addDocument(doc); + + if (random().nextBoolean()) { + w.flush(); + } + + // 4 + doc = new Document(); + doc.add(new TextField(productIdField, "3", Field.Store.NO)); + doc.add(new TextField(typeField, "price", Field.Store.NO)); + doc.add(new TextField("price", "5.0", Field.Store.NO)); + doc.add(new SortedDocValuesField(joinField, new BytesRef("2"))); + w.addDocument(doc); + + // 5 + doc = new Document(); + doc.add(new TextField("field", "value", Field.Store.NO)); + w.addDocument(doc); + + IndexReader r = DirectoryReader.open(w); + IndexSearcher indexSearcher = new IndexSearcher(r); + SortedDocValues[] values = new SortedDocValues[r.leaves().size()]; + for (int i = 0; i < values.length; i++) { + LeafReader leafReader = r.leaves().get(i).reader(); + values[i] = DocValues.getSorted(leafReader, joinField); + } + MultiDocValues.OrdinalMap ordinalMap = MultiDocValues.OrdinalMap.build( + r.getCoreCacheKey(), values, PackedInts.DEFAULT + ); + + Query toQuery = new TermQuery(new Term("price", "5.0")); + Query fromQuery = new TermQuery(new Term("name", "name2")); + + for (ScoreMode scoreMode : ScoreMode.values()) { + Query joinQuery = JoinUtil.createJoinQuery(joinField, fromQuery, toQuery, indexSearcher, scoreMode, ordinalMap); + TopDocs result = indexSearcher.search(joinQuery, 10); + assertEquals(1, result.totalHits); + assertEquals(4, result.scoreDocs[0].doc); // doc with price: 5.0 + Explanation explanation = indexSearcher.explain(joinQuery, 4); + assertTrue(explanation.isMatch()); + assertEquals(explanation.getDescription(), "A match, join value 2"); + + explanation = indexSearcher.explain(joinQuery, 3); + assertFalse(explanation.isMatch()); + assertEquals(explanation.getDescription(), "Not a match, join value 1"); + + explanation = indexSearcher.explain(joinQuery, 5); + assertFalse(explanation.isMatch()); + assertEquals(explanation.getDescription(), "Not a match"); + } + + w.close(); + indexSearcher.getIndexReader().close(); + dir.close(); + } + public void testRandomOrdinalsJoin() throws Exception { IndexIterationContext context = createContext(512, false, true); int searchIters = 10;