LUCENE-6567: Simplify payload checks in SpanPayloadCheckQuery

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1685736 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Alan Woodward 2015-06-16 08:51:53 +00:00
parent a864970331
commit 8d8818fea1
3 changed files with 29 additions and 54 deletions

View File

@ -167,6 +167,9 @@ Build
* LUCENE-6518: Don't report false thread leaks from IBM J9 * LUCENE-6518: Don't report false thread leaks from IBM J9
ClassCache Reaper in test framework. (Dawid Weiss) ClassCache Reaper in test framework. (Dawid Weiss)
* LUCENE-6567: Simplify payload checking in SpanPayloadCheckQuery (Alan
Woodward)
======================= Lucene 5.2.1 ======================= ======================= Lucene 5.2.1 =======================
Bug Fixes Bug Fixes

View File

@ -16,6 +16,13 @@ package org.apache.lucene.search.payloads;
* limitations under the License. * limitations under the License.
*/ */
import java.io.IOException;
import java.util.Arrays;
import java.util.Collection;
import java.util.Iterator;
import java.util.Map;
import java.util.Set;
import org.apache.lucene.index.LeafReaderContext; import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.index.Term; import org.apache.lucene.index.Term;
import org.apache.lucene.index.TermContext; import org.apache.lucene.index.TermContext;
@ -25,7 +32,6 @@ import org.apache.lucene.search.Scorer;
import org.apache.lucene.search.similarities.Similarity; import org.apache.lucene.search.similarities.Similarity;
import org.apache.lucene.search.spans.FilterSpans; import org.apache.lucene.search.spans.FilterSpans;
import org.apache.lucene.search.spans.FilterSpans.AcceptStatus; import org.apache.lucene.search.spans.FilterSpans.AcceptStatus;
import org.apache.lucene.search.spans.SpanNearQuery;
import org.apache.lucene.search.spans.SpanQuery; import org.apache.lucene.search.spans.SpanQuery;
import org.apache.lucene.search.spans.SpanScorer; import org.apache.lucene.search.spans.SpanScorer;
import org.apache.lucene.search.spans.SpanWeight; import org.apache.lucene.search.spans.SpanWeight;
@ -33,14 +39,6 @@ import org.apache.lucene.search.spans.Spans;
import org.apache.lucene.util.Bits; import org.apache.lucene.util.Bits;
import org.apache.lucene.util.ToStringUtils; import org.apache.lucene.util.ToStringUtils;
import java.io.IOException;
import java.util.Arrays;
import java.util.Collection;
import java.util.Iterator;
import java.util.Map;
import java.util.Set;
/** /**
* Only return those matches that have a specific payload at the given position. * Only return those matches that have a specific payload at the given position.
*/ */
@ -98,17 +96,9 @@ public class SpanPayloadCheckQuery extends SpanQuery {
return (matchSpans == null) ? null : new FilterSpans(matchSpans) { return (matchSpans == null) ? null : new FilterSpans(matchSpans) {
@Override @Override
protected AcceptStatus accept(Spans candidate) throws IOException { protected AcceptStatus accept(Spans candidate) throws IOException {
collector.reset(); collector.reset();
candidate.collect(collector); candidate.collect(collector);
Collection<byte[]> collected = collector.getPayloads(); return checkPayloads(collector.getPayloads());
if (match instanceof SpanNearQuery) {
return checkCompositePayloads(collected);
}
else {
return checkOrderedPayloads(collected);
}
} }
}; };
} }
@ -132,19 +122,14 @@ public class SpanPayloadCheckQuery extends SpanQuery {
/** /**
* Check to see if the collected payloads match the required set. * Check to see if the collected payloads match the required set.
* *
* This is called for Near span queries which collect their sub spans
* out-of-order, meaning that we can't rely on the order of payloads
* in the collection
*
* @param candidate a collection of payloads from the current Spans * @param candidate a collection of payloads from the current Spans
* @return whether or not the payloads match * @return whether or not the payloads match
*/ */
protected AcceptStatus checkOrderedPayloads(Collection<byte[]> candidate) { protected AcceptStatus checkPayloads(Collection<byte[]> candidate) {
if (candidate.size() == payloadToMatch.size()){ if (candidate.size() == payloadToMatch.size()){
//TODO: check the byte arrays are the same //TODO: check the byte arrays are the same
Iterator<byte[]> toMatchIter = payloadToMatch.iterator(); Iterator<byte[]> toMatchIter = payloadToMatch.iterator();
//check each of the byte arrays, in order //check each of the byte arrays, in order
//hmm, can't rely on order here
for (byte[] candBytes : candidate) { for (byte[] candBytes : candidate) {
//if one is a mismatch, then return false //if one is a mismatch, then return false
if (Arrays.equals(candBytes, toMatchIter.next()) == false){ if (Arrays.equals(candBytes, toMatchIter.next()) == false){
@ -158,36 +143,6 @@ public class SpanPayloadCheckQuery extends SpanQuery {
} }
} }
/**
* Check to see if the collected payloads match the required set.
* @param candidate a collection of payloads from the current Spans
* @return whether or not the payloads match
*/
protected AcceptStatus checkCompositePayloads(Collection<byte[]> candidate) {
if (candidate.size() == payloadToMatch.size()) {
//TODO: check the byte arrays are the same
//hmm, can't rely on order here
int matches = 0;
for (byte[] candBytes : candidate) {
//Unfortunately, we can't rely on order, so we need to compare all
for (byte[] payBytes : payloadToMatch) {
if (Arrays.equals(candBytes, payBytes) == true) {
matches++;
break;
}
}
}
if (matches == payloadToMatch.size()){
//we've verified all the bytes
return AcceptStatus.YES;
} else {
return AcceptStatus.NO;
}
} else {
return AcceptStatus.NO;
}
}
@Override @Override
public String toString(String field) { public String toString(String field) {
StringBuilder buffer = new StringBuilder(); StringBuilder buffer = new StringBuilder();

View File

@ -134,6 +134,23 @@ public class TestPayloadBasics extends LuceneTestCase {
{505}); {505});
} }
public void testUnorderedPayloadChecks() throws Exception {
SpanTermQuery term5 = new SpanTermQuery(new Term("field", "five"));
SpanTermQuery term100 = new SpanTermQuery(new Term("field", "hundred"));
SpanTermQuery term4 = new SpanTermQuery(new Term("field", "four"));
SpanNearQuery nearQuery = new SpanNearQuery(new SpanQuery[]{term5, term100, term4}, 0, false);
List<byte[]> payloads = new ArrayList<>();
payloads.add(("pos: " + 2).getBytes(StandardCharsets.UTF_8));
payloads.add(("pos: " + 1).getBytes(StandardCharsets.UTF_8));
payloads.add(("pos: " + 0).getBytes(StandardCharsets.UTF_8));
SpanPayloadCheckQuery payloadQuery = new SpanPayloadCheckQuery(nearQuery, payloads);
checkHits(payloadQuery, new int[]{ 405 });
}
public void testComplexSpanChecks() throws Exception { public void testComplexSpanChecks() throws Exception {
SpanTermQuery one = new SpanTermQuery(new Term("field", "one")); SpanTermQuery one = new SpanTermQuery(new Term("field", "one"));
SpanTermQuery thous = new SpanTermQuery(new Term("field", "thousand")); SpanTermQuery thous = new SpanTermQuery(new Term("field", "thousand"));