mirror of https://github.com/apache/lucene.git
LUCENE-6716: Change SpanPayloadCheckQuery to take List<BytesRef>
git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1702872 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
8d44733846
commit
22cc3a1560
|
@ -82,6 +82,9 @@ API Changes
|
|||
In order to apply boosts, you now need to wrap queries in a BoostQuery.
|
||||
(Adrien Grand)
|
||||
|
||||
* LUCENE-6716: SpanPayloadCheckQuery now takes a List<BytesRef> rather than
|
||||
a Collection<byte[]>. (Alan Woodward)
|
||||
|
||||
Optimizations
|
||||
|
||||
* LUCENE-6708: TopFieldCollector does not compute the score several times on the
|
||||
|
|
|
@ -17,13 +17,12 @@ package org.apache.lucene.search.payloads;
|
|||
*/
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.Arrays;
|
||||
import java.util.Collection;
|
||||
import java.util.Iterator;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.Set;
|
||||
|
||||
import org.apache.lucene.index.LeafReaderContext;
|
||||
import org.apache.lucene.index.PostingsEnum;
|
||||
import org.apache.lucene.index.Term;
|
||||
import org.apache.lucene.index.TermContext;
|
||||
import org.apache.lucene.index.Terms;
|
||||
|
@ -32,10 +31,12 @@ import org.apache.lucene.search.Scorer;
|
|||
import org.apache.lucene.search.similarities.Similarity;
|
||||
import org.apache.lucene.search.spans.FilterSpans;
|
||||
import org.apache.lucene.search.spans.FilterSpans.AcceptStatus;
|
||||
import org.apache.lucene.search.spans.SpanCollector;
|
||||
import org.apache.lucene.search.spans.SpanQuery;
|
||||
import org.apache.lucene.search.spans.SpanScorer;
|
||||
import org.apache.lucene.search.spans.SpanWeight;
|
||||
import org.apache.lucene.search.spans.Spans;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.apache.lucene.util.ToStringUtils;
|
||||
|
||||
/**
|
||||
|
@ -43,14 +44,14 @@ import org.apache.lucene.util.ToStringUtils;
|
|||
*/
|
||||
public class SpanPayloadCheckQuery extends SpanQuery {
|
||||
|
||||
protected final Collection<byte[]> payloadToMatch;
|
||||
protected final List<BytesRef> payloadToMatch;
|
||||
protected final SpanQuery match;
|
||||
|
||||
/**
|
||||
* @param match The underlying {@link org.apache.lucene.search.spans.SpanQuery} to check
|
||||
* @param payloadToMatch The {@link java.util.Collection} of payloads to match
|
||||
* @param payloadToMatch The {@link java.util.List} of payloads to match
|
||||
*/
|
||||
public SpanPayloadCheckQuery(SpanQuery match, Collection<byte[]> payloadToMatch) {
|
||||
public SpanPayloadCheckQuery(SpanQuery match, List<BytesRef> payloadToMatch) {
|
||||
this.match = match;
|
||||
this.payloadToMatch = payloadToMatch;
|
||||
}
|
||||
|
@ -90,14 +91,14 @@ public class SpanPayloadCheckQuery extends SpanQuery {
|
|||
|
||||
@Override
|
||||
public Spans getSpans(final LeafReaderContext context, Postings requiredPostings) throws IOException {
|
||||
final PayloadSpanCollector collector = new PayloadSpanCollector();
|
||||
final PayloadChecker collector = new PayloadChecker();
|
||||
Spans matchSpans = matchWeight.getSpans(context, requiredPostings.atLeast(Postings.PAYLOADS));
|
||||
return (matchSpans == null) ? null : new FilterSpans(matchSpans) {
|
||||
@Override
|
||||
protected AcceptStatus accept(Spans candidate) throws IOException {
|
||||
collector.reset();
|
||||
candidate.collect(collector);
|
||||
return checkPayloads(collector.getPayloads());
|
||||
return collector.match();
|
||||
}
|
||||
};
|
||||
}
|
||||
|
@ -118,27 +119,42 @@ public class SpanPayloadCheckQuery extends SpanQuery {
|
|||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Check to see if the collected payloads match the required set.
|
||||
*
|
||||
* @param candidate a collection of payloads from the current Spans
|
||||
* @return whether or not the payloads match
|
||||
*/
|
||||
protected AcceptStatus checkPayloads(Collection<byte[]> candidate) {
|
||||
if (candidate.size() == payloadToMatch.size()){
|
||||
//TODO: check the byte arrays are the same
|
||||
Iterator<byte[]> toMatchIter = payloadToMatch.iterator();
|
||||
//check each of the byte arrays, in order
|
||||
for (byte[] candBytes : candidate) {
|
||||
//if one is a mismatch, then return false
|
||||
if (Arrays.equals(candBytes, toMatchIter.next()) == false){
|
||||
return AcceptStatus.NO;
|
||||
}
|
||||
private class PayloadChecker implements SpanCollector {
|
||||
|
||||
int upto = 0;
|
||||
boolean matches = true;
|
||||
|
||||
@Override
|
||||
public void collectLeaf(PostingsEnum postings, int position, Term term) throws IOException {
|
||||
if (!matches)
|
||||
return;
|
||||
if (upto >= payloadToMatch.size()) {
|
||||
matches = false;
|
||||
return;
|
||||
}
|
||||
//we've verified all the bytes
|
||||
return AcceptStatus.YES;
|
||||
} else {
|
||||
return AcceptStatus.NO;
|
||||
BytesRef payload = postings.getPayload();
|
||||
if (payloadToMatch.get(upto) == null) {
|
||||
matches = payload == null;
|
||||
upto++;
|
||||
return;
|
||||
}
|
||||
if (payload == null) {
|
||||
matches = false;
|
||||
upto++;
|
||||
return;
|
||||
}
|
||||
matches = payloadToMatch.get(upto).bytesEquals(payload);
|
||||
upto++;
|
||||
}
|
||||
|
||||
AcceptStatus match() {
|
||||
return matches && upto == payloadToMatch.size() ? AcceptStatus.YES : AcceptStatus.NO;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void reset() {
|
||||
this.upto = 0;
|
||||
this.matches = true;
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -148,8 +164,8 @@ public class SpanPayloadCheckQuery extends SpanQuery {
|
|||
buffer.append("spanPayCheck(");
|
||||
buffer.append(match.toString(field));
|
||||
buffer.append(", payloadRef: ");
|
||||
for (byte[] bytes : payloadToMatch) {
|
||||
ToStringUtils.byteArray(buffer, bytes);
|
||||
for (BytesRef bytes : payloadToMatch) {
|
||||
buffer.append(bytes.utf8ToString());
|
||||
buffer.append(';');
|
||||
}
|
||||
buffer.append(")");
|
||||
|
|
|
@ -20,7 +20,6 @@ package org.apache.lucene.search.payloads;
|
|||
import java.io.IOException;
|
||||
import java.nio.charset.StandardCharsets;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Collection;
|
||||
import java.util.Collections;
|
||||
import java.util.List;
|
||||
|
||||
|
@ -49,7 +48,7 @@ import org.junit.AfterClass;
|
|||
import org.junit.BeforeClass;
|
||||
|
||||
/** basic test of payload-spans */
|
||||
public class TestPayloadBasics extends LuceneTestCase {
|
||||
public class TestPayloadCheckQuery extends LuceneTestCase {
|
||||
private static IndexSearcher searcher;
|
||||
private static IndexReader reader;
|
||||
private static Directory directory;
|
||||
|
@ -94,8 +93,8 @@ public class TestPayloadBasics extends LuceneTestCase {
|
|||
|
||||
public void testSpanPayloadCheck() throws Exception {
|
||||
SpanQuery term1 = new SpanTermQuery(new Term("field", "five"));
|
||||
BytesRef pay = new BytesRef(("pos: " + 5).getBytes(StandardCharsets.UTF_8));
|
||||
SpanQuery query = new SpanPayloadCheckQuery(term1, Collections.singletonList(pay.bytes));
|
||||
BytesRef pay = new BytesRef("pos: " + 5);
|
||||
SpanQuery query = new SpanPayloadCheckQuery(term1, Collections.singletonList(pay));
|
||||
checkHits(query, new int[]
|
||||
{1125, 1135, 1145, 1155, 1165, 1175, 1185, 1195, 1225, 1235, 1245, 1255, 1265, 1275, 1285, 1295, 1325, 1335, 1345, 1355, 1365, 1375, 1385, 1395, 1425, 1435, 1445, 1455, 1465, 1475, 1485, 1495, 1525, 1535, 1545, 1555, 1565, 1575, 1585, 1595, 1625, 1635, 1645, 1655, 1665, 1675, 1685, 1695, 1725, 1735, 1745, 1755, 1765, 1775, 1785, 1795, 1825, 1835, 1845, 1855, 1865, 1875, 1885, 1895, 1925, 1935, 1945, 1955, 1965, 1975, 1985, 1995});
|
||||
assertTrue(searcher.explain(query, 1125).getValue() > 0.0f);
|
||||
|
@ -103,17 +102,17 @@ public class TestPayloadBasics extends LuceneTestCase {
|
|||
SpanTermQuery term2 = new SpanTermQuery(new Term("field", "hundred"));
|
||||
SpanNearQuery snq;
|
||||
SpanQuery[] clauses;
|
||||
List<byte[]> list;
|
||||
List<BytesRef> list;
|
||||
BytesRef pay2;
|
||||
clauses = new SpanQuery[2];
|
||||
clauses[0] = term1;
|
||||
clauses[1] = term2;
|
||||
snq = new SpanNearQuery(clauses, 0, true);
|
||||
pay = new BytesRef(("pos: " + 0).getBytes(StandardCharsets.UTF_8));
|
||||
pay2 = new BytesRef(("pos: " + 1).getBytes(StandardCharsets.UTF_8));
|
||||
pay = new BytesRef("pos: " + 0);
|
||||
pay2 = new BytesRef("pos: " + 1);
|
||||
list = new ArrayList<>();
|
||||
list.add(pay.bytes);
|
||||
list.add(pay2.bytes);
|
||||
list.add(pay);
|
||||
list.add(pay2);
|
||||
query = new SpanPayloadCheckQuery(snq, list);
|
||||
checkHits(query, new int[]
|
||||
{500, 501, 502, 503, 504, 505, 506, 507, 508, 509, 510, 511, 512, 513, 514, 515, 516, 517, 518, 519, 520, 521, 522, 523, 524, 525, 526, 527, 528, 529, 530, 531, 532, 533, 534, 535, 536, 537, 538, 539, 540, 541, 542, 543, 544, 545, 546, 547, 548, 549, 550, 551, 552, 553, 554, 555, 556, 557, 558, 559, 560, 561, 562, 563, 564, 565, 566, 567, 568, 569, 570, 571, 572, 573, 574, 575, 576, 577, 578, 579, 580, 581, 582, 583, 584, 585, 586, 587, 588, 589, 590, 591, 592, 593, 594, 595, 596, 597, 598, 599});
|
||||
|
@ -122,13 +121,13 @@ public class TestPayloadBasics extends LuceneTestCase {
|
|||
clauses[1] = term2;
|
||||
clauses[2] = new SpanTermQuery(new Term("field", "five"));
|
||||
snq = new SpanNearQuery(clauses, 0, true);
|
||||
pay = new BytesRef(("pos: " + 0).getBytes(StandardCharsets.UTF_8));
|
||||
pay2 = new BytesRef(("pos: " + 1).getBytes(StandardCharsets.UTF_8));
|
||||
BytesRef pay3 = new BytesRef(("pos: " + 2).getBytes(StandardCharsets.UTF_8));
|
||||
pay = new BytesRef("pos: " + 0);
|
||||
pay2 = new BytesRef("pos: " + 1);
|
||||
BytesRef pay3 = new BytesRef("pos: " + 2);
|
||||
list = new ArrayList<>();
|
||||
list.add(pay.bytes);
|
||||
list.add(pay2.bytes);
|
||||
list.add(pay3.bytes);
|
||||
list.add(pay);
|
||||
list.add(pay2);
|
||||
list.add(pay3);
|
||||
query = new SpanPayloadCheckQuery(snq, list);
|
||||
checkHits(query, new int[]
|
||||
{505});
|
||||
|
@ -141,14 +140,22 @@ public class TestPayloadBasics extends LuceneTestCase {
|
|||
SpanTermQuery term4 = new SpanTermQuery(new Term("field", "four"));
|
||||
SpanNearQuery nearQuery = new SpanNearQuery(new SpanQuery[]{term5, term100, term4}, 0, false);
|
||||
|
||||
List<byte[]> payloads = new ArrayList<>();
|
||||
payloads.add(("pos: " + 2).getBytes(StandardCharsets.UTF_8));
|
||||
payloads.add(("pos: " + 1).getBytes(StandardCharsets.UTF_8));
|
||||
payloads.add(("pos: " + 0).getBytes(StandardCharsets.UTF_8));
|
||||
List<BytesRef> payloads = new ArrayList<>();
|
||||
payloads.add(new BytesRef("pos: " + 2));
|
||||
payloads.add(new BytesRef("pos: " + 1));
|
||||
payloads.add(new BytesRef("pos: " + 0));
|
||||
|
||||
SpanPayloadCheckQuery payloadQuery = new SpanPayloadCheckQuery(nearQuery, payloads);
|
||||
checkHits(payloadQuery, new int[]{ 405 });
|
||||
|
||||
payloads.clear();
|
||||
payloads.add(new BytesRef("pos: " + 0));
|
||||
payloads.add(new BytesRef("pos: " + 1));
|
||||
payloads.add(new BytesRef("pos: " + 2));
|
||||
|
||||
payloadQuery = new SpanPayloadCheckQuery(nearQuery, payloads);
|
||||
checkHits(payloadQuery, new int[]{ 504 });
|
||||
|
||||
}
|
||||
|
||||
public void testComplexSpanChecks() throws Exception {
|
||||
|
@ -169,15 +176,15 @@ public class TestPayloadBasics extends LuceneTestCase {
|
|||
query = new SpanPositionRangeQuery(oneThousHunThree, 0, 6);
|
||||
checkHits(query, new int[]{1103, 1203,1303,1403,1503,1603,1703,1803,1903});
|
||||
|
||||
Collection<byte[]> payloads = new ArrayList<>();
|
||||
List<BytesRef> payloads = new ArrayList<>();
|
||||
BytesRef pay = new BytesRef(("pos: " + 0).getBytes(StandardCharsets.UTF_8));
|
||||
BytesRef pay2 = new BytesRef(("pos: " + 1).getBytes(StandardCharsets.UTF_8));
|
||||
BytesRef pay3 = new BytesRef(("pos: " + 3).getBytes(StandardCharsets.UTF_8));
|
||||
BytesRef pay4 = new BytesRef(("pos: " + 4).getBytes(StandardCharsets.UTF_8));
|
||||
payloads.add(pay.bytes);
|
||||
payloads.add(pay2.bytes);
|
||||
payloads.add(pay3.bytes);
|
||||
payloads.add(pay4.bytes);
|
||||
payloads.add(pay);
|
||||
payloads.add(pay2);
|
||||
payloads.add(pay3);
|
||||
payloads.add(pay4);
|
||||
query = new SpanPayloadCheckQuery(oneThousHunThree, payloads);
|
||||
checkHits(query, new int[]{1103, 1203,1303,1403,1503,1603,1703,1803,1903});
|
||||
|
|
@ -17,6 +17,8 @@ package org.apache.lucene.search.highlight;
|
|||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import javax.xml.parsers.DocumentBuilder;
|
||||
import javax.xml.parsers.DocumentBuilderFactory;
|
||||
import java.io.ByteArrayInputStream;
|
||||
import java.io.IOException;
|
||||
import java.nio.charset.StandardCharsets;
|
||||
|
@ -29,9 +31,6 @@ import java.util.List;
|
|||
import java.util.Map;
|
||||
import java.util.StringTokenizer;
|
||||
|
||||
import javax.xml.parsers.DocumentBuilder;
|
||||
import javax.xml.parsers.DocumentBuilderFactory;
|
||||
|
||||
import org.apache.lucene.analysis.Analyzer;
|
||||
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
|
||||
import org.apache.lucene.analysis.CachingTokenFilter;
|
||||
|
@ -56,11 +55,9 @@ import org.apache.lucene.index.IndexReader;
|
|||
import org.apache.lucene.index.IndexWriter;
|
||||
import org.apache.lucene.index.IndexWriterConfig;
|
||||
import org.apache.lucene.index.IndexWriterConfig.OpenMode;
|
||||
import org.apache.lucene.index.LeafReaderContext;
|
||||
import org.apache.lucene.index.StoredDocument;
|
||||
import org.apache.lucene.index.Term;
|
||||
import org.apache.lucene.queries.CommonTermsQuery;
|
||||
import org.apache.lucene.queries.CustomScoreProvider;
|
||||
import org.apache.lucene.queries.CustomScoreQuery;
|
||||
import org.apache.lucene.search.BooleanClause.Occur;
|
||||
import org.apache.lucene.search.BooleanQuery;
|
||||
|
@ -81,8 +78,8 @@ import org.apache.lucene.search.TermRangeQuery;
|
|||
import org.apache.lucene.search.TopDocs;
|
||||
import org.apache.lucene.search.WildcardQuery;
|
||||
import org.apache.lucene.search.highlight.SynonymTokenizer.TestHighlightRunner;
|
||||
import org.apache.lucene.search.join.QueryBitSetProducer;
|
||||
import org.apache.lucene.search.join.BitSetProducer;
|
||||
import org.apache.lucene.search.join.QueryBitSetProducer;
|
||||
import org.apache.lucene.search.join.ScoreMode;
|
||||
import org.apache.lucene.search.join.ToChildBlockJoinQuery;
|
||||
import org.apache.lucene.search.join.ToParentBlockJoinQuery;
|
||||
|
@ -1957,7 +1954,7 @@ public class HighlighterTest extends BaseTokenStreamTestCase implements Formatte
|
|||
}
|
||||
try (IndexReader reader = DirectoryReader.open(dir)) {
|
||||
Query query = new SpanPayloadCheckQuery(new SpanTermQuery(new Term(FIELD_NAME, "words")),
|
||||
Collections.singleton("pos: 1".getBytes("UTF-8")));//just match the first "word" occurrence
|
||||
Collections.singletonList(new BytesRef("pos: 1")));//just match the first "word" occurrence
|
||||
IndexSearcher searcher = newSearcher(reader);
|
||||
QueryScorer scorer = new QueryScorer(query, searcher.getIndexReader(), FIELD_NAME);
|
||||
scorer.setUsePayloads(true);
|
||||
|
|
|
@ -30,6 +30,7 @@ import org.apache.lucene.index.Term;
|
|||
import org.apache.lucene.search.Query;
|
||||
import org.apache.lucene.search.payloads.SpanPayloadCheckQuery;
|
||||
import org.apache.lucene.search.spans.SpanTermQuery;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.apache.solr.SolrTestCaseJ4;
|
||||
import org.apache.solr.common.params.HighlightParams;
|
||||
import org.apache.solr.handler.component.HighlightComponent;
|
||||
|
@ -1108,7 +1109,7 @@ public class HighlighterTest extends SolrTestCaseJ4 {
|
|||
|
||||
//Create query matching this payload
|
||||
Query query = new SpanPayloadCheckQuery(new SpanTermQuery(new Term(FIELD_NAME, "word")),
|
||||
Collections.singleton(new byte[]{0,0,0,7}));//bytes for integer 7
|
||||
Collections.singletonList(new BytesRef(new byte[]{0, 0, 0, 7})));//bytes for integer 7
|
||||
|
||||
//invoke highlight component... the hard way
|
||||
final SearchComponent hlComp = h.getCore().getSearchComponent("highlight");
|
||||
|
|
Loading…
Reference in New Issue