mirror of https://github.com/apache/lucene.git
LUCENE-569: Fixed bug in ordered SpanNearQueries
git-svn-id: https://svn.apache.org/repos/asf/lucene/java/trunk@468554 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
7abe11514a
commit
1a5ac808c8
|
@ -161,6 +161,10 @@ Bug fixes
|
||||||
18. LUCENE-696: Fix bug when scorer for DisjunctionMaxQuery has skipTo()
|
18. LUCENE-696: Fix bug when scorer for DisjunctionMaxQuery has skipTo()
|
||||||
called on it before next(). (Yonik Seeley)
|
called on it before next(). (Yonik Seeley)
|
||||||
|
|
||||||
|
19. LUCENE-569: Fixed SpanNearQuery bug, for 'inOrder' queries it would fail
|
||||||
|
to recognize ordered spans if they overlaped with unordered spans.
|
||||||
|
(Paul Elschot via Chris Hostetter)
|
||||||
|
|
||||||
Optimizations
|
Optimizations
|
||||||
|
|
||||||
1. LUCENE-586: TermDocs.skipTo() is now more efficient for
|
1. LUCENE-586: TermDocs.skipTo() is now more efficient for
|
||||||
|
|
|
@ -0,0 +1,265 @@
|
||||||
|
package org.apache.lucene.search.spans;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Copyright 2006 The Apache Software Foundation
|
||||||
|
*
|
||||||
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
* you may not use this file except in compliance with the License.
|
||||||
|
* You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
import java.io.IOException;
|
||||||
|
|
||||||
|
import java.util.Arrays;
|
||||||
|
import java.util.Comparator;
|
||||||
|
|
||||||
|
import org.apache.lucene.index.IndexReader;
|
||||||
|
|
||||||
|
/** A Spans that is formed from the ordered subspans of a SpanNearQuery
|
||||||
|
* where the subspans do not overlap and have a maximum slop between them.
|
||||||
|
* <p>
|
||||||
|
* The formed spans only contains minimum slop matches.<br>
|
||||||
|
* The matching slop is computed from the distance(s) between
|
||||||
|
* the non overlapping matching Spans.<br>
|
||||||
|
* Successive matches are always formed from the successive Spans
|
||||||
|
* of the SpanNearQuery.
|
||||||
|
* <p>
|
||||||
|
* The formed spans may contain overlaps when the slop is at least 1.
|
||||||
|
* For example, when querying using
|
||||||
|
* <pre>t1 t2 t3</pre>
|
||||||
|
* with slop at least 1, the fragment:
|
||||||
|
* <pre>t1 t2 t1 t3 t2 t3</pre>
|
||||||
|
* matches twice:
|
||||||
|
* <pre>t1 t2 .. t3 </pre>
|
||||||
|
* <pre> t1 .. t2 t3</pre>
|
||||||
|
*/
|
||||||
|
class NearSpansOrdered implements Spans {
|
||||||
|
private final int allowedSlop;
|
||||||
|
private boolean firstTime = true;
|
||||||
|
private boolean more = false;
|
||||||
|
|
||||||
|
/** The spans in the same order as the SpanNearQuery */
|
||||||
|
private final Spans[] subSpans;
|
||||||
|
|
||||||
|
/** Indicates that all subSpans have same doc() */
|
||||||
|
private boolean inSameDoc = false;
|
||||||
|
|
||||||
|
private int matchDoc = -1;
|
||||||
|
private int matchStart = -1;
|
||||||
|
private int matchEnd = -1;
|
||||||
|
|
||||||
|
private final Spans[] subSpansByDoc;
|
||||||
|
private final Comparator spanDocComparator = new Comparator() {
|
||||||
|
public int compare(Object o1, Object o2) {
|
||||||
|
return ((Spans)o1).doc() - ((Spans)o2).doc();
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
private SpanNearQuery query;
|
||||||
|
|
||||||
|
public NearSpansOrdered(SpanNearQuery spanNearQuery, IndexReader reader)
|
||||||
|
throws IOException {
|
||||||
|
if (spanNearQuery.getClauses().length < 2) {
|
||||||
|
throw new IllegalArgumentException("Less than 2 clauses: "
|
||||||
|
+ spanNearQuery);
|
||||||
|
}
|
||||||
|
allowedSlop = spanNearQuery.getSlop();
|
||||||
|
SpanQuery[] clauses = spanNearQuery.getClauses();
|
||||||
|
subSpans = new Spans[clauses.length];
|
||||||
|
subSpansByDoc = new Spans[clauses.length];
|
||||||
|
for (int i = 0; i < clauses.length; i++) {
|
||||||
|
subSpans[i] = clauses[i].getSpans(reader);
|
||||||
|
subSpansByDoc[i] = subSpans[i]; // used in toSameDoc()
|
||||||
|
}
|
||||||
|
query = spanNearQuery; // kept for toString() only.
|
||||||
|
}
|
||||||
|
|
||||||
|
// inherit javadocs
|
||||||
|
public int doc() { return matchDoc; }
|
||||||
|
|
||||||
|
// inherit javadocs
|
||||||
|
public int start() { return matchStart; }
|
||||||
|
|
||||||
|
// inherit javadocs
|
||||||
|
public int end() { return matchEnd; }
|
||||||
|
|
||||||
|
// inherit javadocs
|
||||||
|
public boolean next() throws IOException {
|
||||||
|
if (firstTime) {
|
||||||
|
firstTime = false;
|
||||||
|
for (int i = 0; i < subSpans.length; i++) {
|
||||||
|
if (! subSpans[i].next()) {
|
||||||
|
more = false;
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
more = true;
|
||||||
|
}
|
||||||
|
return advanceAfterOrdered();
|
||||||
|
}
|
||||||
|
|
||||||
|
// inherit javadocs
|
||||||
|
public boolean skipTo(int target) throws IOException {
|
||||||
|
if (firstTime) {
|
||||||
|
firstTime = false;
|
||||||
|
for (int i = 0; i < subSpans.length; i++) {
|
||||||
|
if (! subSpans[i].skipTo(target)) {
|
||||||
|
more = false;
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
more = true;
|
||||||
|
} else if (more && (subSpans[0].doc() < target)) {
|
||||||
|
if (subSpans[0].skipTo(target)) {
|
||||||
|
inSameDoc = false;
|
||||||
|
} else {
|
||||||
|
more = false;
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return advanceAfterOrdered();
|
||||||
|
}
|
||||||
|
|
||||||
|
/** Advances the subSpans to just after an ordered match with a minimum slop
|
||||||
|
* that is smaller than the slop allowed by the SpanNearQuery.
|
||||||
|
* @return true iff there is such a match.
|
||||||
|
*/
|
||||||
|
private boolean advanceAfterOrdered() throws IOException {
|
||||||
|
while (more && (inSameDoc || toSameDoc())) {
|
||||||
|
if (stretchToOrder() && shrinkToAfterShortestMatch()) {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return false; // no more matches
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/** Advance the subSpans to the same document */
|
||||||
|
private boolean toSameDoc() throws IOException {
|
||||||
|
Arrays.sort(subSpansByDoc, spanDocComparator);
|
||||||
|
int firstIndex = 0;
|
||||||
|
int maxDoc = subSpansByDoc[subSpansByDoc.length - 1].doc();
|
||||||
|
while (subSpansByDoc[firstIndex].doc() != maxDoc) {
|
||||||
|
if (! subSpansByDoc[firstIndex].skipTo(maxDoc)) {
|
||||||
|
more = false;
|
||||||
|
inSameDoc = false;
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
maxDoc = subSpansByDoc[firstIndex].doc();
|
||||||
|
if (++firstIndex == subSpansByDoc.length) {
|
||||||
|
firstIndex = 0;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
for (int i = 0; i < subSpansByDoc.length; i++) {
|
||||||
|
assert (subSpansByDoc[i].doc() == maxDoc)
|
||||||
|
: " NearSpansOrdered.toSameDoc() spans " + subSpansByDoc[0]
|
||||||
|
+ "\n at doc " + subSpansByDoc[i].doc()
|
||||||
|
+ ", but should be at " + maxDoc;
|
||||||
|
}
|
||||||
|
inSameDoc = true;
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
/** Check whether two Spans in the same document are ordered.
|
||||||
|
* @param spans1
|
||||||
|
* @param spans2
|
||||||
|
* @return true iff spans1 starts before spans2
|
||||||
|
* or the spans start at the same position,
|
||||||
|
* and spans1 ends before spans2.
|
||||||
|
*/
|
||||||
|
static final boolean docSpansOrdered(Spans spans1, Spans spans2) {
|
||||||
|
assert spans1.doc() == spans2.doc() : "doc1 " + spans1.doc() + " != doc2 " + spans2.doc();
|
||||||
|
int start1 = spans1.start();
|
||||||
|
int start2 = spans2.start();
|
||||||
|
/* Do not call docSpansOrdered(int,int,int,int) to avoid invoking .end() : */
|
||||||
|
return (start1 == start2) ? (spans1.end() < spans2.end()) : (start1 < start2);
|
||||||
|
}
|
||||||
|
|
||||||
|
/** Like {@link #docSpansOrdered(Spans,Spans)}, but use the spans
|
||||||
|
* starts and ends as parameters.
|
||||||
|
*/
|
||||||
|
private static final boolean docSpansOrdered(int start1, int end1, int start2, int end2) {
|
||||||
|
return (start1 == start2) ? (end1 < end2) : (start1 < start2);
|
||||||
|
}
|
||||||
|
|
||||||
|
/** Order the subSpans within the same document by advancing all later spans
|
||||||
|
* after the previous one.
|
||||||
|
*/
|
||||||
|
private boolean stretchToOrder() throws IOException {
|
||||||
|
matchDoc = subSpans[0].doc();
|
||||||
|
for (int i = 1; inSameDoc && (i < subSpans.length); i++) {
|
||||||
|
while (! docSpansOrdered(subSpans[i-1], subSpans[i])) {
|
||||||
|
if (! subSpans[i].next()) {
|
||||||
|
inSameDoc = false;
|
||||||
|
more = false;
|
||||||
|
break;
|
||||||
|
} else if (matchDoc != subSpans[i].doc()) {
|
||||||
|
inSameDoc = false;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return inSameDoc;
|
||||||
|
}
|
||||||
|
|
||||||
|
/** The subSpans are ordered in the same doc, so there is a possible match.
|
||||||
|
* Compute the slop while making the match as short as possible by advancing
|
||||||
|
* all subSpans except the last one in reverse order.
|
||||||
|
*/
|
||||||
|
private boolean shrinkToAfterShortestMatch() throws IOException {
|
||||||
|
matchStart = subSpans[subSpans.length - 1].start();
|
||||||
|
matchEnd = subSpans[subSpans.length - 1].end();
|
||||||
|
int matchSlop = 0;
|
||||||
|
int lastStart = matchStart;
|
||||||
|
int lastEnd = matchEnd;
|
||||||
|
for (int i = subSpans.length - 2; i >= 0; i--) {
|
||||||
|
Spans prevSpans = subSpans[i];
|
||||||
|
int prevStart = prevSpans.start();
|
||||||
|
int prevEnd = prevSpans.end();
|
||||||
|
while (true) { // Advance prevSpans until after (lastStart, lastEnd)
|
||||||
|
if (! prevSpans.next()) {
|
||||||
|
inSameDoc = false;
|
||||||
|
more = false;
|
||||||
|
break; // Check remaining subSpans for final match.
|
||||||
|
} else if (matchDoc != prevSpans.doc()) {
|
||||||
|
inSameDoc = false; // The last subSpans is not advanced here.
|
||||||
|
break; // Check remaining subSpans for last match in this document.
|
||||||
|
} else {
|
||||||
|
int ppStart = prevSpans.start();
|
||||||
|
int ppEnd = prevSpans.end(); // Cannot avoid invoking .end()
|
||||||
|
if (! docSpansOrdered(ppStart, ppEnd, lastStart, lastEnd)) {
|
||||||
|
break; // Check remaining subSpans.
|
||||||
|
} else { // prevSpans still before (lastStart, lastEnd)
|
||||||
|
prevStart = ppStart;
|
||||||
|
prevEnd = ppEnd;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
assert prevStart <= matchStart;
|
||||||
|
if (matchStart > prevEnd) { // Only non overlapping spans add to slop.
|
||||||
|
matchSlop += (matchStart - prevEnd);
|
||||||
|
}
|
||||||
|
/* Do not break on (matchSlop > allowedSlop) here to make sure
|
||||||
|
* that subSpans[0] is advanced after the match, if any.
|
||||||
|
*/
|
||||||
|
matchStart = prevStart;
|
||||||
|
lastStart = prevStart;
|
||||||
|
lastEnd = prevEnd;
|
||||||
|
}
|
||||||
|
return matchSlop <= allowedSlop; // ordered and allowed slop
|
||||||
|
}
|
||||||
|
|
||||||
|
public String toString() {
|
||||||
|
return getClass().getName() + "("+query.toString()+")@"+
|
||||||
|
(firstTime?"START":(more?(doc()+":"+start()+"-"+end()):"END"));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
|
@ -1,7 +1,7 @@
|
||||||
package org.apache.lucene.search.spans;
|
package org.apache.lucene.search.spans;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Copyright 2004 The Apache Software Foundation
|
* Copyright 2006 The Apache Software Foundation
|
||||||
*
|
*
|
||||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
* you may not use this file except in compliance with the License.
|
* you may not use this file except in compliance with the License.
|
||||||
|
@ -24,12 +24,11 @@ import java.util.ArrayList;
|
||||||
import org.apache.lucene.index.IndexReader;
|
import org.apache.lucene.index.IndexReader;
|
||||||
import org.apache.lucene.util.PriorityQueue;
|
import org.apache.lucene.util.PriorityQueue;
|
||||||
|
|
||||||
class NearSpans implements Spans {
|
class NearSpansUnordered implements Spans {
|
||||||
private SpanNearQuery query;
|
private SpanNearQuery query;
|
||||||
|
|
||||||
private List ordered = new ArrayList(); // spans in query order
|
private List ordered = new ArrayList(); // spans in query order
|
||||||
private int slop; // from query
|
private int slop; // from query
|
||||||
private boolean inOrder; // from query
|
|
||||||
|
|
||||||
private SpansCell first; // linked list of spans
|
private SpansCell first; // linked list of spans
|
||||||
private SpansCell last; // sorted by doc only
|
private SpansCell last; // sorted by doc only
|
||||||
|
@ -51,15 +50,7 @@ class NearSpans implements Spans {
|
||||||
SpansCell spans1 = (SpansCell)o1;
|
SpansCell spans1 = (SpansCell)o1;
|
||||||
SpansCell spans2 = (SpansCell)o2;
|
SpansCell spans2 = (SpansCell)o2;
|
||||||
if (spans1.doc() == spans2.doc()) {
|
if (spans1.doc() == spans2.doc()) {
|
||||||
if (spans1.start() == spans2.start()) {
|
return NearSpansOrdered.docSpansOrdered(spans1, spans2);
|
||||||
if (spans1.end() == spans2.end()) {
|
|
||||||
return spans1.index > spans2.index;
|
|
||||||
} else {
|
|
||||||
return spans1.end() < spans2.end();
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
return spans1.start() < spans2.start();
|
|
||||||
}
|
|
||||||
} else {
|
} else {
|
||||||
return spans1.doc() < spans2.doc();
|
return spans1.doc() < spans2.doc();
|
||||||
}
|
}
|
||||||
|
@ -80,39 +71,28 @@ class NearSpans implements Spans {
|
||||||
}
|
}
|
||||||
|
|
||||||
public boolean next() throws IOException {
|
public boolean next() throws IOException {
|
||||||
if (length != -1) // subtract old length
|
return adjust(spans.next());
|
||||||
totalLength -= length;
|
|
||||||
|
|
||||||
boolean more = spans.next(); // move to next
|
|
||||||
|
|
||||||
if (more) {
|
|
||||||
length = end() - start(); // compute new length
|
|
||||||
totalLength += length; // add new length to total
|
|
||||||
|
|
||||||
if (max == null || doc() > max.doc() || // maintain max
|
|
||||||
(doc() == max.doc() && end() > max.end()))
|
|
||||||
max = this;
|
|
||||||
}
|
|
||||||
|
|
||||||
return more;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
public boolean skipTo(int target) throws IOException {
|
public boolean skipTo(int target) throws IOException {
|
||||||
if (length != -1) // subtract old length
|
return adjust(spans.skipTo(target));
|
||||||
totalLength -= length;
|
|
||||||
|
|
||||||
boolean more = spans.skipTo(target); // skip
|
|
||||||
|
|
||||||
if (more) {
|
|
||||||
length = end() - start(); // compute new length
|
|
||||||
totalLength += length; // add new length to total
|
|
||||||
|
|
||||||
if (max == null || doc() > max.doc() || // maintain max
|
|
||||||
(doc() == max.doc() && end() > max.end()))
|
|
||||||
max = this;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
return more;
|
private boolean adjust(boolean condition) {
|
||||||
|
if (length != -1) {
|
||||||
|
totalLength -= length; // subtract old length
|
||||||
|
}
|
||||||
|
if (condition) {
|
||||||
|
length = end() - start();
|
||||||
|
totalLength += length; // add new length
|
||||||
|
|
||||||
|
if (max == null || doc() > max.doc()
|
||||||
|
|| (doc() == max.doc()) && (end() > max.end())) {
|
||||||
|
max = this;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
more = condition;
|
||||||
|
return condition;
|
||||||
}
|
}
|
||||||
|
|
||||||
public int doc() { return spans.doc(); }
|
public int doc() { return spans.doc(); }
|
||||||
|
@ -122,18 +102,18 @@ class NearSpans implements Spans {
|
||||||
public String toString() { return spans.toString() + "#" + index; }
|
public String toString() { return spans.toString() + "#" + index; }
|
||||||
}
|
}
|
||||||
|
|
||||||
public NearSpans(SpanNearQuery query, IndexReader reader)
|
|
||||||
|
public NearSpansUnordered(SpanNearQuery query, IndexReader reader)
|
||||||
throws IOException {
|
throws IOException {
|
||||||
this.query = query;
|
this.query = query;
|
||||||
this.slop = query.getSlop();
|
this.slop = query.getSlop();
|
||||||
this.inOrder = query.isInOrder();
|
|
||||||
|
|
||||||
SpanQuery[] clauses = query.getClauses(); // initialize spans & list
|
SpanQuery[] clauses = query.getClauses();
|
||||||
queue = new CellQueue(clauses.length);
|
queue = new CellQueue(clauses.length);
|
||||||
for (int i = 0; i < clauses.length; i++) {
|
for (int i = 0; i < clauses.length; i++) {
|
||||||
SpansCell cell = // construct clause spans
|
SpansCell cell =
|
||||||
new SpansCell(clauses[i].getSpans(reader), i);
|
new SpansCell(clauses[i].getSpans(reader), i);
|
||||||
ordered.add(cell); // add to ordered
|
ordered.add(cell);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -143,9 +123,11 @@ class NearSpans implements Spans {
|
||||||
listToQueue(); // initialize queue
|
listToQueue(); // initialize queue
|
||||||
firstTime = false;
|
firstTime = false;
|
||||||
} else if (more) {
|
} else if (more) {
|
||||||
more = min().next(); // trigger further scanning
|
if (min().next()) { // trigger further scanning
|
||||||
if (more)
|
|
||||||
queue.adjustTop(); // maintain queue
|
queue.adjustTop(); // maintain queue
|
||||||
|
} else {
|
||||||
|
more = false;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
while (more) {
|
while (more) {
|
||||||
|
@ -174,23 +156,15 @@ class NearSpans implements Spans {
|
||||||
queueStale = false;
|
queueStale = false;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (atMatch())
|
if (atMatch()) {
|
||||||
return true;
|
return true;
|
||||||
|
|
||||||
// trigger further scanning
|
|
||||||
if (inOrder && checkSlop()) {
|
|
||||||
/* There is a non ordered match within slop and an ordered match is needed. */
|
|
||||||
more = firstNonOrderedNextToPartialList();
|
|
||||||
if (more) {
|
|
||||||
partialListToQueue();
|
|
||||||
}
|
}
|
||||||
} else {
|
|
||||||
more = min().next();
|
more = min().next();
|
||||||
if (more) {
|
if (more) {
|
||||||
queue.adjustTop(); // maintain queue
|
queue.adjustTop(); // maintain queue
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
|
||||||
return false; // no more matches
|
return false; // no more matches
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -206,20 +180,14 @@ class NearSpans implements Spans {
|
||||||
firstTime = false;
|
firstTime = false;
|
||||||
} else { // normal case
|
} else { // normal case
|
||||||
while (more && min().doc() < target) { // skip as needed
|
while (more && min().doc() < target) { // skip as needed
|
||||||
more = min().skipTo(target);
|
if (min().skipTo(target)) {
|
||||||
if (more)
|
|
||||||
queue.adjustTop();
|
queue.adjustTop();
|
||||||
|
} else {
|
||||||
|
more = false;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if (more) {
|
|
||||||
|
|
||||||
if (atMatch()) // at a match?
|
|
||||||
return true;
|
|
||||||
|
|
||||||
return next(); // no, scan
|
|
||||||
}
|
}
|
||||||
|
return more && (atMatch() || next());
|
||||||
return false;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
private SpansCell min() { return (SpansCell)queue.top(); }
|
private SpansCell min() { return (SpansCell)queue.top(); }
|
||||||
|
@ -230,7 +198,7 @@ class NearSpans implements Spans {
|
||||||
|
|
||||||
|
|
||||||
public String toString() {
|
public String toString() {
|
||||||
return "spans("+query.toString()+")@"+
|
return getClass().getName() + "("+query.toString()+")@"+
|
||||||
(firstTime?"START":(more?(doc()+":"+start()+"-"+end()):"END"));
|
(firstTime?"START":(more?(doc()+":"+start()+"-"+end()):"END"));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -268,35 +236,8 @@ class NearSpans implements Spans {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
private boolean firstNonOrderedNextToPartialList() throws IOException {
|
|
||||||
/* Creates a partial list consisting of first non ordered and earlier.
|
|
||||||
* Returns first non ordered .next().
|
|
||||||
*/
|
|
||||||
last = first = null;
|
|
||||||
int orderedIndex = 0;
|
|
||||||
while (queue.top() != null) {
|
|
||||||
SpansCell cell = (SpansCell)queue.pop();
|
|
||||||
addToList(cell);
|
|
||||||
if (cell.index == orderedIndex) {
|
|
||||||
orderedIndex++;
|
|
||||||
} else {
|
|
||||||
return cell.next();
|
|
||||||
// FIXME: continue here, rename to eg. checkOrderedMatch():
|
|
||||||
// when checkSlop() and not ordered, repeat cell.next().
|
|
||||||
// when checkSlop() and ordered, add to list and repeat queue.pop()
|
|
||||||
// without checkSlop(): no match, rebuild the queue from the partial list.
|
|
||||||
// When queue is empty and checkSlop() and ordered there is a match.
|
|
||||||
}
|
|
||||||
}
|
|
||||||
throw new RuntimeException("Unexpected: ordered");
|
|
||||||
}
|
|
||||||
|
|
||||||
private void listToQueue() {
|
private void listToQueue() {
|
||||||
queue.clear(); // rebuild queue
|
queue.clear(); // rebuild queue
|
||||||
partialListToQueue();
|
|
||||||
}
|
|
||||||
|
|
||||||
private void partialListToQueue() {
|
|
||||||
for (SpansCell cell = first; cell != null; cell = cell.next) {
|
for (SpansCell cell = first; cell != null; cell = cell.next) {
|
||||||
queue.put(cell); // add to queue from list
|
queue.put(cell); // add to queue from list
|
||||||
}
|
}
|
||||||
|
@ -304,23 +245,6 @@ class NearSpans implements Spans {
|
||||||
|
|
||||||
private boolean atMatch() {
|
private boolean atMatch() {
|
||||||
return (min().doc() == max.doc())
|
return (min().doc() == max.doc())
|
||||||
&& checkSlop()
|
&& ((max.end() - min().start() - totalLength) <= slop);
|
||||||
&& (!inOrder || matchIsOrdered());
|
|
||||||
}
|
|
||||||
|
|
||||||
private boolean checkSlop() {
|
|
||||||
int matchLength = max.end() - min().start();
|
|
||||||
return (matchLength - totalLength) <= slop;
|
|
||||||
}
|
|
||||||
|
|
||||||
private boolean matchIsOrdered() {
|
|
||||||
int lastStart = -1;
|
|
||||||
for (int i = 0; i < ordered.size(); i++) {
|
|
||||||
int start = ((SpansCell)ordered.get(i)).start();
|
|
||||||
if (!(start > lastStart))
|
|
||||||
return false;
|
|
||||||
lastStart = start;
|
|
||||||
}
|
|
||||||
return true;
|
|
||||||
}
|
}
|
||||||
}
|
}
|
|
@ -124,7 +124,9 @@ public class SpanNearQuery extends SpanQuery {
|
||||||
if (clauses.size() == 1) // optimize 1-clause case
|
if (clauses.size() == 1) // optimize 1-clause case
|
||||||
return ((SpanQuery)clauses.get(0)).getSpans(reader);
|
return ((SpanQuery)clauses.get(0)).getSpans(reader);
|
||||||
|
|
||||||
return new NearSpans(this, reader);
|
return inOrder
|
||||||
|
? (Spans) new NearSpansOrdered(this, reader)
|
||||||
|
: (Spans) new NearSpansUnordered(this, reader);
|
||||||
}
|
}
|
||||||
|
|
||||||
public Query rewrite(IndexReader reader) throws IOException {
|
public Query rewrite(IndexReader reader) throws IOException {
|
||||||
|
|
|
@ -0,0 +1,182 @@
|
||||||
|
package org.apache.lucene.search.spans;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Copyright 2006 Apache Software Foundation
|
||||||
|
*
|
||||||
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
* you may not use this file except in compliance with the License.
|
||||||
|
* You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
import org.apache.lucene.search.Explanation;
|
||||||
|
import org.apache.lucene.search.IndexSearcher;
|
||||||
|
import org.apache.lucene.search.Scorer;
|
||||||
|
import org.apache.lucene.search.Weight;
|
||||||
|
import org.apache.lucene.search.CheckHits;
|
||||||
|
import org.apache.lucene.store.RAMDirectory;
|
||||||
|
|
||||||
|
import org.apache.lucene.index.IndexWriter;
|
||||||
|
import org.apache.lucene.index.Term;
|
||||||
|
|
||||||
|
import org.apache.lucene.analysis.WhitespaceAnalyzer;
|
||||||
|
|
||||||
|
import org.apache.lucene.document.Document;
|
||||||
|
import org.apache.lucene.document.Field;
|
||||||
|
|
||||||
|
import org.apache.lucene.queryParser.QueryParser;
|
||||||
|
|
||||||
|
import junit.framework.TestCase;
|
||||||
|
|
||||||
|
public class TestNearSpansOrdered extends TestCase {
|
||||||
|
protected IndexSearcher searcher;
|
||||||
|
|
||||||
|
public static final String FIELD = "field";
|
||||||
|
public static final QueryParser qp =
|
||||||
|
new QueryParser(FIELD, new WhitespaceAnalyzer());
|
||||||
|
|
||||||
|
public void tearDown() throws Exception {
|
||||||
|
searcher.close();
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setUp() throws Exception {
|
||||||
|
RAMDirectory directory = new RAMDirectory();
|
||||||
|
IndexWriter writer= new IndexWriter(directory, new WhitespaceAnalyzer(), true);
|
||||||
|
for (int i = 0; i < docFields.length; i++) {
|
||||||
|
Document doc = new Document();
|
||||||
|
doc.add(new Field(FIELD, docFields[i], Field.Store.NO, Field.Index.TOKENIZED));
|
||||||
|
writer.addDocument(doc);
|
||||||
|
}
|
||||||
|
writer.close();
|
||||||
|
searcher = new IndexSearcher(directory);
|
||||||
|
}
|
||||||
|
|
||||||
|
protected String[] docFields = {
|
||||||
|
"w1 w2 w3 w4 w5",
|
||||||
|
"w1 w3 w2 w3 zz",
|
||||||
|
"w1 xx w2 yy w3",
|
||||||
|
"w1 w3 xx w2 yy w3 zz"
|
||||||
|
};
|
||||||
|
|
||||||
|
protected SpanNearQuery makeQuery(String s1, String s2, String s3,
|
||||||
|
int slop, boolean inOrder) {
|
||||||
|
return new SpanNearQuery
|
||||||
|
(new SpanQuery[] {
|
||||||
|
new SpanTermQuery(new Term(FIELD, s1)),
|
||||||
|
new SpanTermQuery(new Term(FIELD, s2)),
|
||||||
|
new SpanTermQuery(new Term(FIELD, s3)) },
|
||||||
|
slop,
|
||||||
|
inOrder);
|
||||||
|
}
|
||||||
|
protected SpanNearQuery makeQuery() {
|
||||||
|
return makeQuery("w1","w2","w3",1,true);
|
||||||
|
}
|
||||||
|
|
||||||
|
public void testSpanNearQuery() throws Exception {
|
||||||
|
SpanNearQuery q = makeQuery();
|
||||||
|
CheckHits.checkHits(q, FIELD, searcher, new int[] {0,1});
|
||||||
|
}
|
||||||
|
|
||||||
|
public String s(Spans span) {
|
||||||
|
return s(span.doc(), span.start(), span.end());
|
||||||
|
}
|
||||||
|
public String s(int doc, int start, int end) {
|
||||||
|
return "s(" + doc + "," + start + "," + end +")";
|
||||||
|
}
|
||||||
|
|
||||||
|
public void testNearSpansNext() throws Exception {
|
||||||
|
SpanNearQuery q = makeQuery();
|
||||||
|
Spans span = q.getSpans(searcher.getIndexReader());
|
||||||
|
assertEquals(true, span.next());
|
||||||
|
assertEquals(s(0,0,3), s(span));
|
||||||
|
assertEquals(true, span.next());
|
||||||
|
assertEquals(s(1,0,4), s(span));
|
||||||
|
assertEquals(false, span.next());
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* test does not imply that skipTo(doc+1) should work exactly the
|
||||||
|
* same as next -- it's only applicable in this case since we know doc
|
||||||
|
* does not contain more than one span
|
||||||
|
*/
|
||||||
|
public void testNearSpansSkipToLikeNext() throws Exception {
|
||||||
|
SpanNearQuery q = makeQuery();
|
||||||
|
Spans span = q.getSpans(searcher.getIndexReader());
|
||||||
|
assertEquals(true, span.skipTo(0));
|
||||||
|
assertEquals(s(0,0,3), s(span));
|
||||||
|
assertEquals(true, span.skipTo(1));
|
||||||
|
assertEquals(s(1,0,4), s(span));
|
||||||
|
assertEquals(false, span.skipTo(2));
|
||||||
|
}
|
||||||
|
|
||||||
|
public void testNearSpansNextThenSkipTo() throws Exception {
|
||||||
|
SpanNearQuery q = makeQuery();
|
||||||
|
Spans span = q.getSpans(searcher.getIndexReader());
|
||||||
|
assertEquals(true, span.next());
|
||||||
|
assertEquals(s(0,0,3), s(span));
|
||||||
|
assertEquals(true, span.skipTo(1));
|
||||||
|
assertEquals(s(1,0,4), s(span));
|
||||||
|
assertEquals(false, span.next());
|
||||||
|
}
|
||||||
|
|
||||||
|
public void testNearSpansNextThenSkipPast() throws Exception {
|
||||||
|
SpanNearQuery q = makeQuery();
|
||||||
|
Spans span = q.getSpans(searcher.getIndexReader());
|
||||||
|
assertEquals(true, span.next());
|
||||||
|
assertEquals(s(0,0,3), s(span));
|
||||||
|
assertEquals(false, span.skipTo(2));
|
||||||
|
}
|
||||||
|
|
||||||
|
public void testNearSpansSkipPast() throws Exception {
|
||||||
|
SpanNearQuery q = makeQuery();
|
||||||
|
Spans span = q.getSpans(searcher.getIndexReader());
|
||||||
|
assertEquals(false, span.skipTo(2));
|
||||||
|
}
|
||||||
|
|
||||||
|
public void testNearSpansSkipTo0() throws Exception {
|
||||||
|
SpanNearQuery q = makeQuery();
|
||||||
|
Spans span = q.getSpans(searcher.getIndexReader());
|
||||||
|
assertEquals(true, span.skipTo(0));
|
||||||
|
assertEquals(s(0,0,3), s(span));
|
||||||
|
}
|
||||||
|
|
||||||
|
public void testNearSpansSkipTo1() throws Exception {
|
||||||
|
SpanNearQuery q = makeQuery();
|
||||||
|
Spans span = q.getSpans(searcher.getIndexReader());
|
||||||
|
assertEquals(true, span.skipTo(1));
|
||||||
|
assertEquals(s(1,0,4), s(span));
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* not a direct test of NearSpans, but a demonstration of how/when
|
||||||
|
* this causes problems
|
||||||
|
*/
|
||||||
|
public void testSpanNearScorerSkipTo1() throws Exception {
|
||||||
|
SpanNearQuery q = makeQuery();
|
||||||
|
Weight w = q.createWeight(searcher);
|
||||||
|
Scorer s = w.scorer(searcher.getIndexReader());
|
||||||
|
assertEquals(true, s.skipTo(1));
|
||||||
|
assertEquals(1, s.doc());
|
||||||
|
}
|
||||||
|
/**
|
||||||
|
* not a direct test of NearSpans, but a demonstration of how/when
|
||||||
|
* this causes problems
|
||||||
|
*/
|
||||||
|
public void testSpanNearScorerExplain() throws Exception {
|
||||||
|
SpanNearQuery q = makeQuery();
|
||||||
|
Weight w = q.createWeight(searcher);
|
||||||
|
Scorer s = w.scorer(searcher.getIndexReader());
|
||||||
|
Explanation e = s.explain(1);
|
||||||
|
assertTrue("Scorer explanation value for doc#1 isn't positive: "
|
||||||
|
+ e.toString(),
|
||||||
|
0.0f < e.getValue());
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
|
@ -151,6 +151,10 @@ public class TestSpanExplanations extends TestExplanations {
|
||||||
SpanQuery q = snear("xx","w1","w2",0,false);
|
SpanQuery q = snear("xx","w1","w2",0,false);
|
||||||
qtest(q, new int[] {2});
|
qtest(q, new int[] {2});
|
||||||
}
|
}
|
||||||
|
public void testSNear11() throws Exception {
|
||||||
|
SpanQuery q = snear("w1","w2","w3",1,true);
|
||||||
|
qtest(q, new int[] {0,1});
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
/* some SpanNotQueries */
|
/* some SpanNotQueries */
|
||||||
|
|
Loading…
Reference in New Issue