mirror of https://github.com/apache/lucene.git
LUCENE-8747: Allow access to submatches from Matches
This commit is contained in:
parent
fa72da1c71
commit
21842999fe
|
@ -65,6 +65,11 @@ New Features
|
||||||
|
|
||||||
* LUCENE-8764: Add "export all terms" feature to Luke. (Leonardo Menezes via Tomoko Uchida)
|
* LUCENE-8764: Add "export all terms" feature to Luke. (Leonardo Menezes via Tomoko Uchida)
|
||||||
|
|
||||||
|
* LUCENE-8747: Composite Matches from multiple subqueries now allow access to
|
||||||
|
their submatches, and a new NamedMatches API allows marking of subqueries
|
||||||
|
and a simple way to find which subqueries have matched on a given document
|
||||||
|
(Alan Woodward, Jim Ferenczi)
|
||||||
|
|
||||||
Improvements
|
Improvements
|
||||||
|
|
||||||
* LUCENE-8874: Show SPI names instead of class names in Luke Analysis tab. (Tomoko Uchida)
|
* LUCENE-8874: Show SPI names instead of class names in Luke Analysis tab. (Tomoko Uchida)
|
||||||
|
|
|
@ -18,6 +18,7 @@
|
||||||
package org.apache.lucene.search;
|
package org.apache.lucene.search;
|
||||||
|
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
|
import java.util.Collection;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Reports the positions and optionally offsets of all matching terms in a query
|
* Reports the positions and optionally offsets of all matching terms in a query
|
||||||
|
@ -37,4 +38,10 @@ public interface Matches extends Iterable<String> {
|
||||||
*/
|
*/
|
||||||
MatchesIterator getMatches(String field) throws IOException;
|
MatchesIterator getMatches(String field) throws IOException;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Returns a collection of Matches that make up this instance; if it is not
|
||||||
|
* a composite, then this returns an empty list
|
||||||
|
*/
|
||||||
|
Collection<Matches> getSubMatches();
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -19,6 +19,7 @@ package org.apache.lucene.search;
|
||||||
|
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.util.ArrayList;
|
import java.util.ArrayList;
|
||||||
|
import java.util.Collection;
|
||||||
import java.util.Collections;
|
import java.util.Collections;
|
||||||
import java.util.Iterator;
|
import java.util.Iterator;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
|
@ -44,13 +45,18 @@ public final class MatchesUtils {
|
||||||
*/
|
*/
|
||||||
public static final Matches MATCH_WITH_NO_TERMS = new Matches() {
|
public static final Matches MATCH_WITH_NO_TERMS = new Matches() {
|
||||||
@Override
|
@Override
|
||||||
public Iterator<String> iterator() {
|
public MatchesIterator getMatches(String field) {
|
||||||
return Collections.emptyIterator();
|
return null;
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public MatchesIterator getMatches(String field) {
|
public Collection<Matches> getSubMatches() {
|
||||||
return null;
|
return Collections.emptyList();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public Iterator<String> iterator() {
|
||||||
|
return Collections.emptyIterator();
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
|
@ -87,6 +93,11 @@ public final class MatchesUtils {
|
||||||
// for each sub-match, iterate its fields (it's an Iterable of the fields), and return the distinct set
|
// for each sub-match, iterate its fields (it's an Iterable of the fields), and return the distinct set
|
||||||
return sm.stream().flatMap(m -> StreamSupport.stream(m.spliterator(), false)).distinct().iterator();
|
return sm.stream().flatMap(m -> StreamSupport.stream(m.spliterator(), false)).distinct().iterator();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public Collection<Matches> getSubMatches() {
|
||||||
|
return subMatches;
|
||||||
|
}
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -122,6 +133,11 @@ public final class MatchesUtils {
|
||||||
public Iterator<String> iterator() {
|
public Iterator<String> iterator() {
|
||||||
return Collections.singleton(field).iterator();
|
return Collections.singleton(field).iterator();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public Collection<Matches> getSubMatches() {
|
||||||
|
return Collections.emptyList();
|
||||||
|
}
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -0,0 +1,158 @@
|
||||||
|
/*
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||||
|
* contributor license agreements. See the NOTICE file distributed with
|
||||||
|
* this work for additional information regarding copyright ownership.
|
||||||
|
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||||
|
* (the "License"); you may not use this file except in compliance with
|
||||||
|
* the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
package org.apache.lucene.search;
|
||||||
|
|
||||||
|
import java.io.IOException;
|
||||||
|
import java.util.ArrayList;
|
||||||
|
import java.util.Collection;
|
||||||
|
import java.util.Collections;
|
||||||
|
import java.util.Iterator;
|
||||||
|
import java.util.LinkedList;
|
||||||
|
import java.util.List;
|
||||||
|
import java.util.Objects;
|
||||||
|
|
||||||
|
import org.apache.lucene.index.IndexReader;
|
||||||
|
import org.apache.lucene.index.LeafReaderContext;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Utility class to help extract the set of sub queries that have matched from
|
||||||
|
* a larger query.
|
||||||
|
*
|
||||||
|
* Individual subqueries may be wrapped using {@link #wrapQuery(String, Query)}, and
|
||||||
|
* the matching queries for a particular document can then be pulled from the parent
|
||||||
|
* Query's {@link Matches} object by calling {@link #findNamedMatches(Matches)}
|
||||||
|
*/
|
||||||
|
public class NamedMatches implements Matches {
|
||||||
|
|
||||||
|
private final Matches in;
|
||||||
|
private final String name;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Wraps a {@link Matches} object and associates a name with it
|
||||||
|
*/
|
||||||
|
public NamedMatches(String name, Matches in) {
|
||||||
|
this.in = Objects.requireNonNull(in);
|
||||||
|
this.name = name;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Returns the name of this {@link Matches}
|
||||||
|
*/
|
||||||
|
public String getName() {
|
||||||
|
return name;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public MatchesIterator getMatches(String field) throws IOException {
|
||||||
|
return in.getMatches(field);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public Collection<Matches> getSubMatches() {
|
||||||
|
return Collections.singleton(in);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public Iterator<String> iterator() {
|
||||||
|
return in.iterator();
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Wrap a Query so that it associates a name with its {@link Matches}
|
||||||
|
*/
|
||||||
|
public static Query wrapQuery(String name, Query in) {
|
||||||
|
return new NamedQuery(name, in);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Finds all {@link NamedMatches} in a {@link Matches} tree
|
||||||
|
*/
|
||||||
|
public static List<NamedMatches> findNamedMatches(Matches matches) {
|
||||||
|
List<NamedMatches> nm = new ArrayList<>();
|
||||||
|
List<Matches> toProcess = new LinkedList<>();
|
||||||
|
toProcess.add(matches);
|
||||||
|
while (toProcess.size() > 0) {
|
||||||
|
matches = toProcess.remove(0);
|
||||||
|
if (matches instanceof NamedMatches) {
|
||||||
|
nm.add((NamedMatches) matches);
|
||||||
|
}
|
||||||
|
toProcess.addAll(matches.getSubMatches());
|
||||||
|
}
|
||||||
|
return nm;
|
||||||
|
}
|
||||||
|
|
||||||
|
private static class NamedQuery extends Query {
|
||||||
|
|
||||||
|
private final String name;
|
||||||
|
private final Query in;
|
||||||
|
|
||||||
|
private NamedQuery(String name, Query in) {
|
||||||
|
this.name = name;
|
||||||
|
this.in = in;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public Weight createWeight(IndexSearcher searcher, ScoreMode scoreMode, float boost) throws IOException {
|
||||||
|
Weight w = in.createWeight(searcher, scoreMode, boost);
|
||||||
|
return new FilterWeight(w) {
|
||||||
|
@Override
|
||||||
|
public Matches matches(LeafReaderContext context, int doc) throws IOException {
|
||||||
|
Matches m = in.matches(context, doc);
|
||||||
|
if (m == null) {
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
return new NamedMatches(name, m);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public Query rewrite(IndexReader reader) throws IOException {
|
||||||
|
Query rewritten = in.rewrite(reader);
|
||||||
|
if (rewritten != in) {
|
||||||
|
return new NamedQuery(name, rewritten);
|
||||||
|
}
|
||||||
|
return this;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public String toString(String field) {
|
||||||
|
return "NamedQuery(" + name + "," + in.toString(field) + ")";
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void visit(QueryVisitor visitor) {
|
||||||
|
QueryVisitor sub = visitor.getSubVisitor(BooleanClause.Occur.MUST, this);
|
||||||
|
in.visit(sub);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public boolean equals(Object o) {
|
||||||
|
if (this == o) return true;
|
||||||
|
if (o == null || getClass() != o.getClass()) return false;
|
||||||
|
NamedQuery that = (NamedQuery) o;
|
||||||
|
return Objects.equals(name, that.name) &&
|
||||||
|
Objects.equals(in, that.in);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public int hashCode() {
|
||||||
|
return Objects.hash(name, in);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
|
@ -192,6 +192,29 @@ public class TestMatchesIterator extends LuceneTestCase {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private void checkSubMatches(Query q, String[][] expectedNames) throws IOException {
|
||||||
|
Weight w = searcher.createWeight(searcher.rewrite(q), ScoreMode.COMPLETE_NO_SCORES, 1);
|
||||||
|
for (int i = 0; i < expectedNames.length; i++) {
|
||||||
|
LeafReaderContext ctx = searcher.leafContexts.get(ReaderUtil.subIndex(i, searcher.leafContexts));
|
||||||
|
int doc = i - ctx.docBase;
|
||||||
|
Matches matches = w.matches(ctx, doc);
|
||||||
|
if (matches == null) {
|
||||||
|
assertEquals("Expected to get no matches on document " + i, 0, expectedNames[i].length);
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
Set<String> expectedQueries = new HashSet<>(Arrays.asList(expectedNames[i]));
|
||||||
|
Set<String> actualQueries = NamedMatches.findNamedMatches(matches)
|
||||||
|
.stream().map(NamedMatches::getName).collect(Collectors.toSet());
|
||||||
|
|
||||||
|
Set<String> unexpected = new HashSet<>(actualQueries);
|
||||||
|
unexpected.removeAll(expectedQueries);
|
||||||
|
assertEquals("Unexpected matching leaf queries: " + unexpected, 0, unexpected.size());
|
||||||
|
Set<String> missing = new HashSet<>(expectedQueries);
|
||||||
|
missing.removeAll(actualQueries);
|
||||||
|
assertEquals("Missing matching leaf queries: " + missing, 0, missing.size());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
private void assertIsLeafMatch(Query q, String field) throws IOException {
|
private void assertIsLeafMatch(Query q, String field) throws IOException {
|
||||||
Weight w = searcher.createWeight(searcher.rewrite(q), ScoreMode.COMPLETE, 1);
|
Weight w = searcher.createWeight(searcher.rewrite(q), ScoreMode.COMPLETE, 1);
|
||||||
for (int i = 0; i < searcher.reader.maxDoc(); i++) {
|
for (int i = 0; i < searcher.reader.maxDoc(); i++) {
|
||||||
|
@ -294,7 +317,7 @@ public class TestMatchesIterator extends LuceneTestCase {
|
||||||
|
|
||||||
public void testTermQuery() throws IOException {
|
public void testTermQuery() throws IOException {
|
||||||
Term t = new Term(FIELD_WITH_OFFSETS, "w1");
|
Term t = new Term(FIELD_WITH_OFFSETS, "w1");
|
||||||
Query q = new TermQuery(t);
|
Query q = NamedMatches.wrapQuery("q", new TermQuery(t));
|
||||||
checkMatches(q, FIELD_WITH_OFFSETS, new int[][]{
|
checkMatches(q, FIELD_WITH_OFFSETS, new int[][]{
|
||||||
{ 0, 0, 0, 0, 2 },
|
{ 0, 0, 0, 0, 2 },
|
||||||
{ 1, 0, 0, 0, 2 },
|
{ 1, 0, 0, 0, 2 },
|
||||||
|
@ -304,6 +327,7 @@ public class TestMatchesIterator extends LuceneTestCase {
|
||||||
});
|
});
|
||||||
checkLabelCount(q, FIELD_WITH_OFFSETS, new int[]{ 1, 1, 1, 1, 0, 0 });
|
checkLabelCount(q, FIELD_WITH_OFFSETS, new int[]{ 1, 1, 1, 1, 0, 0 });
|
||||||
assertIsLeafMatch(q, FIELD_WITH_OFFSETS);
|
assertIsLeafMatch(q, FIELD_WITH_OFFSETS);
|
||||||
|
checkSubMatches(q, new String[][]{ {"q"}, {"q"}, {"q"}, {"q"}, {}, {}});
|
||||||
}
|
}
|
||||||
|
|
||||||
public void testTermQueryNoStoredOffsets() throws IOException {
|
public void testTermQueryNoStoredOffsets() throws IOException {
|
||||||
|
@ -325,8 +349,8 @@ public class TestMatchesIterator extends LuceneTestCase {
|
||||||
}
|
}
|
||||||
|
|
||||||
public void testDisjunction() throws IOException {
|
public void testDisjunction() throws IOException {
|
||||||
Query w1 = new TermQuery(new Term(FIELD_WITH_OFFSETS, "w1"));
|
Query w1 = NamedMatches.wrapQuery("w1", new TermQuery(new Term(FIELD_WITH_OFFSETS, "w1")));
|
||||||
Query w3 = new TermQuery(new Term(FIELD_WITH_OFFSETS, "w3"));
|
Query w3 = NamedMatches.wrapQuery("w3", new TermQuery(new Term(FIELD_WITH_OFFSETS, "w3")));
|
||||||
Query q = new BooleanQuery.Builder()
|
Query q = new BooleanQuery.Builder()
|
||||||
.add(w1, BooleanClause.Occur.SHOULD)
|
.add(w1, BooleanClause.Occur.SHOULD)
|
||||||
.add(w3, BooleanClause.Occur.SHOULD)
|
.add(w3, BooleanClause.Occur.SHOULD)
|
||||||
|
@ -340,6 +364,7 @@ public class TestMatchesIterator extends LuceneTestCase {
|
||||||
});
|
});
|
||||||
checkLabelCount(q, FIELD_WITH_OFFSETS, new int[]{ 2, 2, 1, 2, 0, 0 });
|
checkLabelCount(q, FIELD_WITH_OFFSETS, new int[]{ 2, 2, 1, 2, 0, 0 });
|
||||||
assertIsLeafMatch(q, FIELD_WITH_OFFSETS);
|
assertIsLeafMatch(q, FIELD_WITH_OFFSETS);
|
||||||
|
checkSubMatches(q, new String[][]{ {"w1", "w3"}, {"w1", "w3"}, {"w1"}, {"w1", "w3"}, {}, {}});
|
||||||
}
|
}
|
||||||
|
|
||||||
public void testDisjunctionNoPositions() throws IOException {
|
public void testDisjunctionNoPositions() throws IOException {
|
||||||
|
@ -378,10 +403,10 @@ public class TestMatchesIterator extends LuceneTestCase {
|
||||||
}
|
}
|
||||||
|
|
||||||
public void testMinShouldMatch() throws IOException {
|
public void testMinShouldMatch() throws IOException {
|
||||||
Query w1 = new TermQuery(new Term(FIELD_WITH_OFFSETS, "w1"));
|
Query w1 = NamedMatches.wrapQuery("w1", new TermQuery(new Term(FIELD_WITH_OFFSETS, "w1")));
|
||||||
Query w3 = new TermQuery(new Term(FIELD_WITH_OFFSETS, "w3"));
|
Query w3 = NamedMatches.wrapQuery("w3", new TermQuery(new Term(FIELD_WITH_OFFSETS, "w3")));
|
||||||
Query w4 = new TermQuery(new Term(FIELD_WITH_OFFSETS, "w4"));
|
Query w4 = new TermQuery(new Term(FIELD_WITH_OFFSETS, "w4"));
|
||||||
Query xx = new TermQuery(new Term(FIELD_WITH_OFFSETS, "xx"));
|
Query xx = NamedMatches.wrapQuery("xx", new TermQuery(new Term(FIELD_WITH_OFFSETS, "xx")));
|
||||||
Query q = new BooleanQuery.Builder()
|
Query q = new BooleanQuery.Builder()
|
||||||
.add(w3, BooleanClause.Occur.SHOULD)
|
.add(w3, BooleanClause.Occur.SHOULD)
|
||||||
.add(new BooleanQuery.Builder()
|
.add(new BooleanQuery.Builder()
|
||||||
|
@ -400,6 +425,7 @@ public class TestMatchesIterator extends LuceneTestCase {
|
||||||
});
|
});
|
||||||
checkLabelCount(q, FIELD_WITH_OFFSETS, new int[]{ 3, 1, 3, 3, 0, 0 });
|
checkLabelCount(q, FIELD_WITH_OFFSETS, new int[]{ 3, 1, 3, 3, 0, 0 });
|
||||||
assertIsLeafMatch(q, FIELD_WITH_OFFSETS);
|
assertIsLeafMatch(q, FIELD_WITH_OFFSETS);
|
||||||
|
checkSubMatches(q, new String[][]{ {"w1", "w3"}, {"w3"}, {"w1", "xx"}, {"w1", "w3"}, {}, {}});
|
||||||
}
|
}
|
||||||
|
|
||||||
public void testMinShouldMatchNoPositions() throws IOException {
|
public void testMinShouldMatchNoPositions() throws IOException {
|
||||||
|
@ -544,6 +570,8 @@ public class TestMatchesIterator extends LuceneTestCase {
|
||||||
assertEquals(2, fields.size());
|
assertEquals(2, fields.size());
|
||||||
assertTrue(fields.contains(FIELD_WITH_OFFSETS));
|
assertTrue(fields.contains(FIELD_WITH_OFFSETS));
|
||||||
assertTrue(fields.contains("id"));
|
assertTrue(fields.contains("id"));
|
||||||
|
|
||||||
|
assertEquals(2, AssertingMatches.unWrap(m).getSubMatches().size());
|
||||||
}
|
}
|
||||||
|
|
||||||
// 0 1 2 3 4 5 6 7
|
// 0 1 2 3 4 5 6 7
|
||||||
|
|
|
@ -18,6 +18,8 @@
|
||||||
package org.apache.lucene.search;
|
package org.apache.lucene.search;
|
||||||
|
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
|
import java.util.Collection;
|
||||||
|
import java.util.Collections;
|
||||||
import java.util.Iterator;
|
import java.util.Iterator;
|
||||||
|
|
||||||
class AssertingMatches implements Matches {
|
class AssertingMatches implements Matches {
|
||||||
|
@ -36,8 +38,20 @@ class AssertingMatches implements Matches {
|
||||||
return new AssertingMatchesIterator(mi);
|
return new AssertingMatchesIterator(mi);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public Collection<Matches> getSubMatches() {
|
||||||
|
return Collections.singleton(in);
|
||||||
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public Iterator<String> iterator() {
|
public Iterator<String> iterator() {
|
||||||
return in.iterator();
|
return in.iterator();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public static Matches unWrap(Matches m) {
|
||||||
|
while (m instanceof AssertingMatches) {
|
||||||
|
m = (((AssertingMatches)m).in);
|
||||||
|
}
|
||||||
|
return m;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue