LUCENE-6388: Optimize SpanNearQuery

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1671078 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Robert Muir 2015-04-03 14:39:33 +00:00
parent db2faf0c25
commit 0666344038
5 changed files with 27 additions and 36 deletions

View File

@ -56,6 +56,9 @@ Optimizations
faster IndexWriter.deleteAll in that case (Robert Muir, Adrien
Grand, Mike McCandless)
* LUCENE-6388: Optimize SpanNearQuery when payloads are not present.
(Robert Muir)
Bug Fixes
* LUCENE-6378: Fix all RuntimeExceptions to throw the underlying root cause.

View File

@ -29,11 +29,11 @@ import java.util.Objects;
* Common super class for un/ordered Spans
*/
abstract class NearSpans extends Spans {
SpanNearQuery query;
int allowedSlop;
final SpanNearQuery query;
final int allowedSlop;
List<Spans> subSpans; // in query order
DocIdSetIterator conjunction; // use to move to next doc with all clauses
final Spans[] subSpans; // in query order
final DocIdSetIterator conjunction; // use to move to next doc with all clauses
boolean atFirstInCurrentDoc;
boolean oneExhaustedInCurrentDoc; // no more results possbile in current doc
@ -44,7 +44,7 @@ abstract class NearSpans extends Spans {
if (subSpans.size() < 2) {
throw new IllegalArgumentException("Less than 2 subSpans: " + query);
}
this.subSpans = Objects.requireNonNull(subSpans); // in query order
this.subSpans = subSpans.toArray(new Spans[subSpans.size()]); // in query order
this.conjunction = ConjunctionDISI.intersect(subSpans);
}
@ -91,13 +91,8 @@ abstract class NearSpans extends Spans {
return res;
}
private Spans[] subSpansArray = null; // init only when needed.
public Spans[] getSubSpans() {
if (subSpansArray == null) {
subSpansArray = subSpans.toArray(new Spans[subSpans.size()]);
}
return subSpansArray;
return subSpans;
}
}

View File

@ -18,12 +18,8 @@ package org.apache.lucene.search.spans;
*/
import java.io.IOException;
import java.util.ArrayList;
import java.util.HashSet;
import java.util.LinkedList;
import java.util.List;
import java.util.Collection;
import java.util.Set;
/** A Spans that is formed from the ordered subspans of a SpanNearQuery
* where the subspans do not overlap and have a maximum slop between them,
@ -146,11 +142,11 @@ public class NearSpansOrdered extends NearSpans {
* otherwise at least one is exhausted in the current doc.
*/
private boolean stretchToOrder() throws IOException {
Spans prevSpans = subSpans.get(0);
Spans prevSpans = subSpans[0];
assert prevSpans.startPosition() != NO_MORE_POSITIONS : "prevSpans no start position "+prevSpans;
assert prevSpans.endPosition() != NO_MORE_POSITIONS;
for (int i = 1; i < subSpans.size(); i++) {
Spans spans = subSpans.get(i);
for (int i = 1; i < subSpans.length; i++) {
Spans spans = subSpans[i];
assert spans.startPosition() != NO_MORE_POSITIONS;
assert spans.endPosition() != NO_MORE_POSITIONS;
@ -169,15 +165,14 @@ public class NearSpansOrdered extends NearSpans {
* on all subSpans, except the last one, in reverse order.
*/
protected boolean shrinkToAfterShortestMatch() throws IOException {
Spans lastSubSpans = subSpans.get(subSpans.size() - 1);
Spans lastSubSpans = subSpans[subSpans.length - 1];
matchStart = lastSubSpans.startPosition();
matchEnd = lastSubSpans.endPosition();
int matchSlop = 0;
int lastStart = matchStart;
int lastEnd = matchEnd;
for (int i = subSpans.size() - 2; i >= 0; i--) {
Spans prevSpans = subSpans.get(i);
for (int i = subSpans.length - 2; i >= 0; i--) {
Spans prevSpans = subSpans[i];
int prevStart = prevSpans.startPosition();
int prevEnd = prevSpans.endPosition();
@ -206,7 +201,6 @@ public class NearSpansOrdered extends NearSpans {
*/
matchStart = prevStart;
lastStart = prevStart;
lastEnd = prevEnd;
}
boolean match = matchSlop <= allowedSlop;
@ -224,16 +218,14 @@ public class NearSpansOrdered extends NearSpans {
return atFirstInCurrentDoc ? -1 : matchEnd;
}
/** Throws an UnsupportedOperationException */
@Override
public Collection<byte[]> getPayload() throws IOException {
throw new UnsupportedOperationException("Use NearSpansPayloadOrdered instead");
return null;
}
/** Throws an UnsupportedOperationException */
@Override
public boolean isPayloadAvailable() {
throw new UnsupportedOperationException("Use NearSpansPayloadOrdered instead");
return false;
}
@Override

View File

@ -47,7 +47,7 @@ public class NearSpansPayloadOrdered extends NearSpansOrdered {
* Also collect the payloads.
*/
protected boolean shrinkToAfterShortestMatch() throws IOException {
Spans lastSubSpans = subSpans.get(subSpans.size() - 1);
Spans lastSubSpans = subSpans[subSpans.length - 1];
matchStart = lastSubSpans.startPosition();
matchEnd = lastSubSpans.endPosition();
@ -62,9 +62,8 @@ public class NearSpansPayloadOrdered extends NearSpansOrdered {
int matchSlop = 0;
int lastStart = matchStart;
int lastEnd = matchEnd;
for (int i = subSpans.size() - 2; i >= 0; i--) {
Spans prevSpans = subSpans.get(i);
for (int i = subSpans.length - 2; i >= 0; i--) {
Spans prevSpans = subSpans[i];
if (prevSpans.isPayloadAvailable()) {
Collection<byte[]> payload = prevSpans.getPayload();
@ -112,7 +111,6 @@ public class NearSpansPayloadOrdered extends NearSpansOrdered {
*/
matchStart = prevStart;
lastStart = prevStart;
lastEnd = prevEnd;
}
boolean match = matchSlop <= allowedSlop;

View File

@ -18,19 +18,17 @@ package org.apache.lucene.search.spans;
*/
import java.io.IOException;
import java.util.List;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.Map;
import java.util.Set;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.Term;
import org.apache.lucene.index.TermContext;
import org.apache.lucene.index.Terms;
import org.apache.lucene.search.Query;
import org.apache.lucene.util.Bits;
import org.apache.lucene.util.ToStringUtils;
@ -132,9 +130,14 @@ public class SpanNearQuery extends SpanQuery implements Cloneable {
}
}
Terms terms = context.reader().terms(field);
if (terms == null) {
return null; // field does not exist
}
// all NearSpans require at least two subSpans
return (! inOrder) ? new NearSpansUnordered(this, subSpans)
: collectPayloads ? new NearSpansPayloadOrdered(this, subSpans)
: collectPayloads && terms.hasPayloads() ? new NearSpansPayloadOrdered(this, subSpans)
: new NearSpansOrdered(this, subSpans);
}