diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt index 88f22f15119..46e79da68af 100644 --- a/lucene/CHANGES.txt +++ b/lucene/CHANGES.txt @@ -16,6 +16,11 @@ New Features * LUCENE-8764: Add "export all terms" feature to Luke. (Leonardo Menezes via Tomoko Uchida) +* LUCENE-8747: Composite Matches from multiple subqueries now allow access to + their submatches, and a new NamedMatches API allows marking of subqueries + and a simple way to find which subqueries have matched on a given document + (Alan Woodward, Jim Ferenczi) + Improvements * LUCENE-8874: Show SPI names instead of class names in Luke Analysis tab. (Tomoko Uchida) diff --git a/lucene/core/src/java/org/apache/lucene/search/Matches.java b/lucene/core/src/java/org/apache/lucene/search/Matches.java index 43cffa6179c..80a860e1577 100644 --- a/lucene/core/src/java/org/apache/lucene/search/Matches.java +++ b/lucene/core/src/java/org/apache/lucene/search/Matches.java @@ -18,6 +18,7 @@ package org.apache.lucene.search; import java.io.IOException; +import java.util.Collection; /** * Reports the positions and optionally offsets of all matching terms in a query @@ -37,4 +38,10 @@ public interface Matches extends Iterable { */ MatchesIterator getMatches(String field) throws IOException; + /** + * Returns a collection of Matches that make up this instance; if it is not + * a composite, then this returns an empty list + */ + Collection getSubMatches(); + } diff --git a/lucene/core/src/java/org/apache/lucene/search/MatchesUtils.java b/lucene/core/src/java/org/apache/lucene/search/MatchesUtils.java index bf460b7b699..5dc966c5bc6 100644 --- a/lucene/core/src/java/org/apache/lucene/search/MatchesUtils.java +++ b/lucene/core/src/java/org/apache/lucene/search/MatchesUtils.java @@ -19,6 +19,7 @@ package org.apache.lucene.search; import java.io.IOException; import java.util.ArrayList; +import java.util.Collection; import java.util.Collections; import java.util.Iterator; import java.util.List; @@ -44,13 +45,18 @@ public final class MatchesUtils { */ public static final Matches MATCH_WITH_NO_TERMS = new Matches() { @Override - public Iterator iterator() { - return Collections.emptyIterator(); + public MatchesIterator getMatches(String field) { + return null; } @Override - public MatchesIterator getMatches(String field) { - return null; + public Collection getSubMatches() { + return Collections.emptyList(); + } + + @Override + public Iterator iterator() { + return Collections.emptyIterator(); } }; @@ -87,6 +93,11 @@ public final class MatchesUtils { // for each sub-match, iterate its fields (it's an Iterable of the fields), and return the distinct set return sm.stream().flatMap(m -> StreamSupport.stream(m.spliterator(), false)).distinct().iterator(); } + + @Override + public Collection getSubMatches() { + return subMatches; + } }; } @@ -122,6 +133,11 @@ public final class MatchesUtils { public Iterator iterator() { return Collections.singleton(field).iterator(); } + + @Override + public Collection getSubMatches() { + return Collections.emptyList(); + } }; } diff --git a/lucene/core/src/java/org/apache/lucene/search/NamedMatches.java b/lucene/core/src/java/org/apache/lucene/search/NamedMatches.java new file mode 100644 index 00000000000..c9cdf5e61f1 --- /dev/null +++ b/lucene/core/src/java/org/apache/lucene/search/NamedMatches.java @@ -0,0 +1,158 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.lucene.search; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.Collection; +import java.util.Collections; +import java.util.Iterator; +import java.util.LinkedList; +import java.util.List; +import java.util.Objects; + +import org.apache.lucene.index.IndexReader; +import org.apache.lucene.index.LeafReaderContext; + +/** + * Utility class to help extract the set of sub queries that have matched from + * a larger query. + * + * Individual subqueries may be wrapped using {@link #wrapQuery(String, Query)}, and + * the matching queries for a particular document can then be pulled from the parent + * Query's {@link Matches} object by calling {@link #findNamedMatches(Matches)} + */ +public class NamedMatches implements Matches { + + private final Matches in; + private final String name; + + /** + * Wraps a {@link Matches} object and associates a name with it + */ + public NamedMatches(String name, Matches in) { + this.in = Objects.requireNonNull(in); + this.name = name; + } + + /** + * Returns the name of this {@link Matches} + */ + public String getName() { + return name; + } + + @Override + public MatchesIterator getMatches(String field) throws IOException { + return in.getMatches(field); + } + + @Override + public Collection getSubMatches() { + return Collections.singleton(in); + } + + @Override + public Iterator iterator() { + return in.iterator(); + } + + /** + * Wrap a Query so that it associates a name with its {@link Matches} + */ + public static Query wrapQuery(String name, Query in) { + return new NamedQuery(name, in); + } + + /** + * Finds all {@link NamedMatches} in a {@link Matches} tree + */ + public static List findNamedMatches(Matches matches) { + List nm = new ArrayList<>(); + List toProcess = new LinkedList<>(); + toProcess.add(matches); + while (toProcess.size() > 0) { + matches = toProcess.remove(0); + if (matches instanceof NamedMatches) { + nm.add((NamedMatches) matches); + } + toProcess.addAll(matches.getSubMatches()); + } + return nm; + } + + private static class NamedQuery extends Query { + + private final String name; + private final Query in; + + private NamedQuery(String name, Query in) { + this.name = name; + this.in = in; + } + + @Override + public Weight createWeight(IndexSearcher searcher, ScoreMode scoreMode, float boost) throws IOException { + Weight w = in.createWeight(searcher, scoreMode, boost); + return new FilterWeight(w) { + @Override + public Matches matches(LeafReaderContext context, int doc) throws IOException { + Matches m = in.matches(context, doc); + if (m == null) { + return null; + } + return new NamedMatches(name, m); + } + }; + } + + @Override + public Query rewrite(IndexReader reader) throws IOException { + Query rewritten = in.rewrite(reader); + if (rewritten != in) { + return new NamedQuery(name, rewritten); + } + return this; + } + + @Override + public String toString(String field) { + return "NamedQuery(" + name + "," + in.toString(field) + ")"; + } + + @Override + public void visit(QueryVisitor visitor) { + QueryVisitor sub = visitor.getSubVisitor(BooleanClause.Occur.MUST, this); + in.visit(sub); + } + + @Override + public boolean equals(Object o) { + if (this == o) return true; + if (o == null || getClass() != o.getClass()) return false; + NamedQuery that = (NamedQuery) o; + return Objects.equals(name, that.name) && + Objects.equals(in, that.in); + } + + @Override + public int hashCode() { + return Objects.hash(name, in); + } + } +} diff --git a/lucene/core/src/test/org/apache/lucene/search/TestMatchesIterator.java b/lucene/core/src/test/org/apache/lucene/search/TestMatchesIterator.java index 235bead6a93..394840603ba 100644 --- a/lucene/core/src/test/org/apache/lucene/search/TestMatchesIterator.java +++ b/lucene/core/src/test/org/apache/lucene/search/TestMatchesIterator.java @@ -192,6 +192,29 @@ public class TestMatchesIterator extends LuceneTestCase { } } + private void checkSubMatches(Query q, String[][] expectedNames) throws IOException { + Weight w = searcher.createWeight(searcher.rewrite(q), ScoreMode.COMPLETE_NO_SCORES, 1); + for (int i = 0; i < expectedNames.length; i++) { + LeafReaderContext ctx = searcher.leafContexts.get(ReaderUtil.subIndex(i, searcher.leafContexts)); + int doc = i - ctx.docBase; + Matches matches = w.matches(ctx, doc); + if (matches == null) { + assertEquals("Expected to get no matches on document " + i, 0, expectedNames[i].length); + continue; + } + Set expectedQueries = new HashSet<>(Arrays.asList(expectedNames[i])); + Set actualQueries = NamedMatches.findNamedMatches(matches) + .stream().map(NamedMatches::getName).collect(Collectors.toSet()); + + Set unexpected = new HashSet<>(actualQueries); + unexpected.removeAll(expectedQueries); + assertEquals("Unexpected matching leaf queries: " + unexpected, 0, unexpected.size()); + Set missing = new HashSet<>(expectedQueries); + missing.removeAll(actualQueries); + assertEquals("Missing matching leaf queries: " + missing, 0, missing.size()); + } + } + private void assertIsLeafMatch(Query q, String field) throws IOException { Weight w = searcher.createWeight(searcher.rewrite(q), ScoreMode.COMPLETE, 1); for (int i = 0; i < searcher.reader.maxDoc(); i++) { @@ -294,7 +317,7 @@ public class TestMatchesIterator extends LuceneTestCase { public void testTermQuery() throws IOException { Term t = new Term(FIELD_WITH_OFFSETS, "w1"); - Query q = new TermQuery(t); + Query q = NamedMatches.wrapQuery("q", new TermQuery(t)); checkMatches(q, FIELD_WITH_OFFSETS, new int[][]{ { 0, 0, 0, 0, 2 }, { 1, 0, 0, 0, 2 }, @@ -304,6 +327,7 @@ public class TestMatchesIterator extends LuceneTestCase { }); checkLabelCount(q, FIELD_WITH_OFFSETS, new int[]{ 1, 1, 1, 1, 0, 0 }); assertIsLeafMatch(q, FIELD_WITH_OFFSETS); + checkSubMatches(q, new String[][]{ {"q"}, {"q"}, {"q"}, {"q"}, {}, {}}); } public void testTermQueryNoStoredOffsets() throws IOException { @@ -325,8 +349,8 @@ public class TestMatchesIterator extends LuceneTestCase { } public void testDisjunction() throws IOException { - Query w1 = new TermQuery(new Term(FIELD_WITH_OFFSETS, "w1")); - Query w3 = new TermQuery(new Term(FIELD_WITH_OFFSETS, "w3")); + Query w1 = NamedMatches.wrapQuery("w1", new TermQuery(new Term(FIELD_WITH_OFFSETS, "w1"))); + Query w3 = NamedMatches.wrapQuery("w3", new TermQuery(new Term(FIELD_WITH_OFFSETS, "w3"))); Query q = new BooleanQuery.Builder() .add(w1, BooleanClause.Occur.SHOULD) .add(w3, BooleanClause.Occur.SHOULD) @@ -340,6 +364,7 @@ public class TestMatchesIterator extends LuceneTestCase { }); checkLabelCount(q, FIELD_WITH_OFFSETS, new int[]{ 2, 2, 1, 2, 0, 0 }); assertIsLeafMatch(q, FIELD_WITH_OFFSETS); + checkSubMatches(q, new String[][]{ {"w1", "w3"}, {"w1", "w3"}, {"w1"}, {"w1", "w3"}, {}, {}}); } public void testDisjunctionNoPositions() throws IOException { @@ -378,10 +403,10 @@ public class TestMatchesIterator extends LuceneTestCase { } public void testMinShouldMatch() throws IOException { - Query w1 = new TermQuery(new Term(FIELD_WITH_OFFSETS, "w1")); - Query w3 = new TermQuery(new Term(FIELD_WITH_OFFSETS, "w3")); + Query w1 = NamedMatches.wrapQuery("w1", new TermQuery(new Term(FIELD_WITH_OFFSETS, "w1"))); + Query w3 = NamedMatches.wrapQuery("w3", new TermQuery(new Term(FIELD_WITH_OFFSETS, "w3"))); Query w4 = new TermQuery(new Term(FIELD_WITH_OFFSETS, "w4")); - Query xx = new TermQuery(new Term(FIELD_WITH_OFFSETS, "xx")); + Query xx = NamedMatches.wrapQuery("xx", new TermQuery(new Term(FIELD_WITH_OFFSETS, "xx"))); Query q = new BooleanQuery.Builder() .add(w3, BooleanClause.Occur.SHOULD) .add(new BooleanQuery.Builder() @@ -400,6 +425,7 @@ public class TestMatchesIterator extends LuceneTestCase { }); checkLabelCount(q, FIELD_WITH_OFFSETS, new int[]{ 3, 1, 3, 3, 0, 0 }); assertIsLeafMatch(q, FIELD_WITH_OFFSETS); + checkSubMatches(q, new String[][]{ {"w1", "w3"}, {"w3"}, {"w1", "xx"}, {"w1", "w3"}, {}, {}}); } public void testMinShouldMatchNoPositions() throws IOException { @@ -544,6 +570,8 @@ public class TestMatchesIterator extends LuceneTestCase { assertEquals(2, fields.size()); assertTrue(fields.contains(FIELD_WITH_OFFSETS)); assertTrue(fields.contains("id")); + + assertEquals(2, AssertingMatches.unWrap(m).getSubMatches().size()); } // 0 1 2 3 4 5 6 7 diff --git a/lucene/test-framework/src/java/org/apache/lucene/search/AssertingMatches.java b/lucene/test-framework/src/java/org/apache/lucene/search/AssertingMatches.java index c5c6e981e1c..f57a83b7929 100644 --- a/lucene/test-framework/src/java/org/apache/lucene/search/AssertingMatches.java +++ b/lucene/test-framework/src/java/org/apache/lucene/search/AssertingMatches.java @@ -18,6 +18,8 @@ package org.apache.lucene.search; import java.io.IOException; +import java.util.Collection; +import java.util.Collections; import java.util.Iterator; class AssertingMatches implements Matches { @@ -36,8 +38,20 @@ class AssertingMatches implements Matches { return new AssertingMatchesIterator(mi); } + @Override + public Collection getSubMatches() { + return Collections.singleton(in); + } + @Override public Iterator iterator() { return in.iterator(); } + + public static Matches unWrap(Matches m) { + while (m instanceof AssertingMatches) { + m = (((AssertingMatches)m).in); + } + return m; + } }