mirror of https://github.com/apache/lucene.git
Added new span-based query API.
git-svn-id: https://svn.apache.org/repos/asf/lucene/java/trunk@150185 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
1df2ba0dec
commit
93ff39de13
|
@ -42,6 +42,9 @@ $Id$
|
||||||
6. Changed FSDirectory to auto-create a full directory tree that it
|
6. Changed FSDirectory to auto-create a full directory tree that it
|
||||||
needs by using mkdirs() instead of mkdir(). (Mladen Turk via Otis)
|
needs by using mkdirs() instead of mkdir(). (Mladen Turk via Otis)
|
||||||
|
|
||||||
|
7. Added a new span-based query API. This implements, among other
|
||||||
|
things, nested phrases. See javadocs for details. (Doug Cutting)
|
||||||
|
|
||||||
|
|
||||||
1.3 final
|
1.3 final
|
||||||
|
|
||||||
|
|
|
@ -55,7 +55,9 @@ package org.apache.lucene.search;
|
||||||
*/
|
*/
|
||||||
|
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.util.Vector;
|
|
||||||
|
import java.util.Collection;
|
||||||
|
import java.util.Iterator;
|
||||||
|
|
||||||
import org.apache.lucene.index.Term;
|
import org.apache.lucene.index.Term;
|
||||||
|
|
||||||
|
@ -296,14 +298,15 @@ public abstract class Similarity {
|
||||||
* <p>The default implementation sums the {@link #idf(Term,Searcher)} factor
|
* <p>The default implementation sums the {@link #idf(Term,Searcher)} factor
|
||||||
* for each term in the phrase.
|
* for each term in the phrase.
|
||||||
*
|
*
|
||||||
* @param terms the vector of terms in the phrase
|
* @param terms the terms in the phrase
|
||||||
* @param searcher the document collection being searched
|
* @param searcher the document collection being searched
|
||||||
* @return a score factor for the phrase
|
* @return a score factor for the phrase
|
||||||
*/
|
*/
|
||||||
public float idf(Vector terms, Searcher searcher) throws IOException {
|
public float idf(Collection terms, Searcher searcher) throws IOException {
|
||||||
float idf = 0.0f;
|
float idf = 0.0f;
|
||||||
for (int i = 0; i < terms.size(); i++) {
|
Iterator i = terms.iterator();
|
||||||
idf += idf((Term)terms.elementAt(i), searcher);
|
while (i.hasNext()) {
|
||||||
|
idf += idf((Term)i.next(), searcher);
|
||||||
}
|
}
|
||||||
return idf;
|
return idf;
|
||||||
}
|
}
|
||||||
|
|
|
@ -0,0 +1,236 @@
|
||||||
|
package org.apache.lucene.search.spans;
|
||||||
|
|
||||||
|
import java.io.IOException;
|
||||||
|
|
||||||
|
import java.util.List;
|
||||||
|
import java.util.ArrayList;
|
||||||
|
import java.util.Iterator;
|
||||||
|
|
||||||
|
import org.apache.lucene.index.IndexReader;
|
||||||
|
|
||||||
|
class NearSpans implements Spans {
|
||||||
|
private SpanNearQuery query;
|
||||||
|
|
||||||
|
private List ordered = new ArrayList(); // spans in query order
|
||||||
|
private int slop; // from query
|
||||||
|
private boolean inOrder; // from query
|
||||||
|
|
||||||
|
private SpansCell first; // linked list of spans
|
||||||
|
private SpansCell last; // sorted by doc only
|
||||||
|
|
||||||
|
private int totalLength; // sum of current lengths
|
||||||
|
|
||||||
|
private SpanQueue queue; // sorted queue of spans
|
||||||
|
private SpansCell max; // max element in queue
|
||||||
|
|
||||||
|
private boolean more = true; // true iff not done
|
||||||
|
private boolean firstTime = true; // true before first next()
|
||||||
|
|
||||||
|
private boolean queueStale = false; // true if queue not sorted
|
||||||
|
private boolean listStale = true; // true if list not sorted
|
||||||
|
|
||||||
|
/** Wraps a Spans, and can be used to form a linked list. */
|
||||||
|
private class SpansCell implements Spans {
|
||||||
|
private Spans spans;
|
||||||
|
private SpansCell next;
|
||||||
|
private int length = -1;
|
||||||
|
|
||||||
|
public SpansCell(Spans spans) { this.spans = spans; }
|
||||||
|
|
||||||
|
public boolean next() throws IOException {
|
||||||
|
if (length != -1) // subtract old length
|
||||||
|
totalLength -= length;
|
||||||
|
|
||||||
|
boolean more = spans.next(); // move to next
|
||||||
|
|
||||||
|
if (more) {
|
||||||
|
length = end() - start(); // compute new length
|
||||||
|
totalLength += length; // add new length to total
|
||||||
|
|
||||||
|
if (max == null || doc() > max.doc() || // maintain max
|
||||||
|
(doc() == max.doc() && end() > max.end()))
|
||||||
|
max = this;
|
||||||
|
}
|
||||||
|
|
||||||
|
return more;
|
||||||
|
}
|
||||||
|
|
||||||
|
public boolean skipTo(int target) throws IOException {
|
||||||
|
if (length != -1) // subtract old length
|
||||||
|
totalLength -= length;
|
||||||
|
|
||||||
|
boolean more = spans.skipTo(target); // skip
|
||||||
|
|
||||||
|
if (more) {
|
||||||
|
length = end() - start(); // compute new length
|
||||||
|
totalLength += length; // add new length to total
|
||||||
|
|
||||||
|
if (max == null || doc() > max.doc() || // maintain max
|
||||||
|
(doc() == max.doc() && end() > max.end()))
|
||||||
|
max = this;
|
||||||
|
}
|
||||||
|
|
||||||
|
return more;
|
||||||
|
}
|
||||||
|
|
||||||
|
public int doc() { return spans.doc(); }
|
||||||
|
public int start() { return spans.start(); }
|
||||||
|
public int end() { return spans.end(); }
|
||||||
|
|
||||||
|
public String toString() { return spans.toString(); }
|
||||||
|
}
|
||||||
|
|
||||||
|
public NearSpans(SpanNearQuery query, IndexReader reader)
|
||||||
|
throws IOException {
|
||||||
|
this.query = query;
|
||||||
|
this.slop = query.getSlop();
|
||||||
|
this.inOrder = query.isInOrder();
|
||||||
|
|
||||||
|
SpanQuery[] clauses = query.getClauses(); // initialize spans & list
|
||||||
|
queue = new SpanQueue(clauses.length);
|
||||||
|
for (int i = 0; i < clauses.length; i++) {
|
||||||
|
SpansCell cell = // construct clause spans
|
||||||
|
new SpansCell(clauses[i].getSpans(reader));
|
||||||
|
ordered.add(cell); // add to ordered
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public boolean next() throws IOException {
|
||||||
|
if (firstTime) {
|
||||||
|
initList(true);
|
||||||
|
listToQueue(); // initialize queue
|
||||||
|
firstTime = false;
|
||||||
|
} else {
|
||||||
|
more = last.next(); // trigger scan
|
||||||
|
queueStale = true;
|
||||||
|
}
|
||||||
|
|
||||||
|
while (more) {
|
||||||
|
|
||||||
|
if (listStale) { // maintain list
|
||||||
|
queueToList();
|
||||||
|
listStale = false;
|
||||||
|
}
|
||||||
|
|
||||||
|
// skip to doc w/ all clauses
|
||||||
|
|
||||||
|
while (more && first.doc() < last.doc()) {
|
||||||
|
more = first.skipTo(last.doc()); // skip first upto last
|
||||||
|
firstToLast(); // and move it to the end
|
||||||
|
queueStale = true;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!more) return false;
|
||||||
|
|
||||||
|
// found doc w/ all clauses
|
||||||
|
|
||||||
|
if (queueStale) { // maintain the queue
|
||||||
|
listToQueue();
|
||||||
|
queueStale = false;
|
||||||
|
}
|
||||||
|
|
||||||
|
int matchLength = max.end() - min().start();
|
||||||
|
if (((matchLength - totalLength) <= slop) // check slop
|
||||||
|
&& (!inOrder || matchIsOrdered())) { // check order
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
more = min().next(); // trigger further scanning
|
||||||
|
|
||||||
|
if (more) {
|
||||||
|
queue.adjustTop(); // maintain queue
|
||||||
|
if (min().doc() != max.doc()) {
|
||||||
|
listStale = true; // maintain list
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return false; // no more matches
|
||||||
|
}
|
||||||
|
|
||||||
|
public boolean skipTo(int target) throws IOException {
|
||||||
|
if (firstTime) {
|
||||||
|
initList(false);
|
||||||
|
firstTime = false;
|
||||||
|
}
|
||||||
|
|
||||||
|
for (SpansCell cell = first; more && cell!=null; cell=cell.next) {
|
||||||
|
more = cell.skipTo(target);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (more) {
|
||||||
|
listToQueue();
|
||||||
|
listStale = true;
|
||||||
|
|
||||||
|
if (min().doc() == max.doc()) { // at a match?
|
||||||
|
int matchLength = max.end() - min().start();
|
||||||
|
if ((matchLength - totalLength) <= slop) {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return next(); // no, scan
|
||||||
|
}
|
||||||
|
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
private SpansCell min() { return (SpansCell)queue.top(); }
|
||||||
|
|
||||||
|
public int doc() { return min().doc(); }
|
||||||
|
public int start() { return min().start(); }
|
||||||
|
public int end() { return max.end(); }
|
||||||
|
|
||||||
|
public String toString() { return "spans(" + query.toString() + ")"; }
|
||||||
|
|
||||||
|
private void initList(boolean next) throws IOException {
|
||||||
|
for (int i = 0; more && i < ordered.size(); i++) {
|
||||||
|
SpansCell cell = (SpansCell)ordered.get(i);
|
||||||
|
if (next)
|
||||||
|
more = cell.next(); // move to first entry
|
||||||
|
if (more) {
|
||||||
|
addToList(cell); // add to list
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private void addToList(SpansCell cell) {
|
||||||
|
if (last != null) { // add next to end of list
|
||||||
|
last.next = cell;
|
||||||
|
} else
|
||||||
|
first = cell;
|
||||||
|
last = cell;
|
||||||
|
cell.next = null;
|
||||||
|
}
|
||||||
|
|
||||||
|
private void firstToLast() {
|
||||||
|
last.next = first; // move first to end of list
|
||||||
|
last = first;
|
||||||
|
first = first.next;
|
||||||
|
last.next = null;
|
||||||
|
}
|
||||||
|
|
||||||
|
private void queueToList() {
|
||||||
|
last = first = null;
|
||||||
|
while (queue.top() != null) {
|
||||||
|
addToList((SpansCell)queue.pop());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private void listToQueue() {
|
||||||
|
queue.clear();
|
||||||
|
for (SpansCell cell = first; cell != null; cell = cell.next) {
|
||||||
|
queue.put(cell); // build queue from list
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private boolean matchIsOrdered() {
|
||||||
|
int lastStart = -1;
|
||||||
|
for (int i = 0; i < ordered.size(); i++) {
|
||||||
|
int start = ((SpansCell)ordered.get(i)).start();
|
||||||
|
if (!(start > lastStart))
|
||||||
|
return false;
|
||||||
|
lastStart = start;
|
||||||
|
}
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
|
@ -0,0 +1,74 @@
|
||||||
|
package org.apache.lucene.search.spans;
|
||||||
|
|
||||||
|
import java.io.IOException;
|
||||||
|
|
||||||
|
import java.util.Collection;
|
||||||
|
|
||||||
|
import org.apache.lucene.index.IndexReader;
|
||||||
|
|
||||||
|
/** Matches spans near the beginning of a field. */
|
||||||
|
public class SpanFirstQuery extends SpanQuery {
|
||||||
|
private SpanQuery match;
|
||||||
|
private int end;
|
||||||
|
|
||||||
|
/** Construct a SpanFirstQuery matching spans in <code>match</code> whose end
|
||||||
|
* position is less than or equal to <code>end</code>. */
|
||||||
|
public SpanFirstQuery(SpanQuery match, int end) {
|
||||||
|
this.match = match;
|
||||||
|
this.end = end;
|
||||||
|
}
|
||||||
|
|
||||||
|
/** Return the SpanQuery whose matches are filtered. */
|
||||||
|
public SpanQuery getMatch() { return match; }
|
||||||
|
|
||||||
|
/** Return the maximum end position permitted in a match. */
|
||||||
|
public int getEnd() { return end; }
|
||||||
|
|
||||||
|
public String getField() { return match.getField(); }
|
||||||
|
|
||||||
|
public Collection getTerms() { return match.getTerms(); }
|
||||||
|
|
||||||
|
public String toString(String field) {
|
||||||
|
StringBuffer buffer = new StringBuffer();
|
||||||
|
buffer.append("spanFirst(");
|
||||||
|
buffer.append(match.toString(field));
|
||||||
|
buffer.append(", ");
|
||||||
|
buffer.append(end);
|
||||||
|
buffer.append(")");
|
||||||
|
return buffer.toString();
|
||||||
|
}
|
||||||
|
|
||||||
|
public Spans getSpans(final IndexReader reader) throws IOException {
|
||||||
|
return new Spans() {
|
||||||
|
private Spans spans = match.getSpans(reader);
|
||||||
|
|
||||||
|
public boolean next() throws IOException {
|
||||||
|
while (spans.next()) { // scan to next match
|
||||||
|
if (end() <= end)
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
public boolean skipTo(int target) throws IOException {
|
||||||
|
if (!spans.skipTo(target))
|
||||||
|
return false;
|
||||||
|
|
||||||
|
if (spans.end() <= end) // there is a match
|
||||||
|
return true;
|
||||||
|
|
||||||
|
return next(); // scan to next match
|
||||||
|
}
|
||||||
|
|
||||||
|
public int doc() { return spans.doc(); }
|
||||||
|
public int start() { return spans.start(); }
|
||||||
|
public int end() { return spans.end(); }
|
||||||
|
|
||||||
|
public String toString() {
|
||||||
|
return "spans(" + SpanFirstQuery.this.toString() + ")";
|
||||||
|
}
|
||||||
|
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
|
@ -0,0 +1,97 @@
|
||||||
|
package org.apache.lucene.search.spans;
|
||||||
|
|
||||||
|
import java.io.IOException;
|
||||||
|
|
||||||
|
import java.util.Collection;
|
||||||
|
import java.util.List;
|
||||||
|
import java.util.ArrayList;
|
||||||
|
import java.util.Iterator;
|
||||||
|
|
||||||
|
|
||||||
|
import org.apache.lucene.index.IndexReader;
|
||||||
|
|
||||||
|
/** Matches spans which are near one another. One can specify <i>slop</i>, the
|
||||||
|
* maximum number of intervening unmatched positions, as well as whether
|
||||||
|
* matches are required to be in-order. */
|
||||||
|
public class SpanNearQuery extends SpanQuery {
|
||||||
|
private List clauses;
|
||||||
|
private int slop;
|
||||||
|
private boolean inOrder;
|
||||||
|
|
||||||
|
private String field;
|
||||||
|
|
||||||
|
/** Construct a SpanNearQuery. Matches spans matching a span from each
|
||||||
|
* clause, with up to <code>slop</code> total unmatched positions between
|
||||||
|
* them. * When <code>inOrder</code> is true, the spans from each clause
|
||||||
|
* must be * ordered as in <code>clauses</code>. */
|
||||||
|
public SpanNearQuery(SpanQuery[] clauses, int slop, boolean inOrder) {
|
||||||
|
|
||||||
|
// copy clauses array into an ArrayList
|
||||||
|
this.clauses = new ArrayList(clauses.length);
|
||||||
|
for (int i = 0; i < clauses.length; i++) {
|
||||||
|
SpanQuery clause = clauses[i];
|
||||||
|
if (i == 0) { // check field
|
||||||
|
field = clause.getField();
|
||||||
|
} else if (!clause.getField().equals(field)) {
|
||||||
|
throw new IllegalArgumentException("Clauses must have same field.");
|
||||||
|
}
|
||||||
|
this.clauses.add(clause);
|
||||||
|
}
|
||||||
|
|
||||||
|
this.slop = slop;
|
||||||
|
this.inOrder = inOrder;
|
||||||
|
}
|
||||||
|
|
||||||
|
/** Return the clauses whose spans are matched. */
|
||||||
|
public SpanQuery[] getClauses() {
|
||||||
|
return (SpanQuery[])clauses.toArray(new SpanQuery[clauses.size()]);
|
||||||
|
}
|
||||||
|
|
||||||
|
/** Return the maximum number of intervening unmatched positions permitted.*/
|
||||||
|
public int getSlop() { return slop; }
|
||||||
|
|
||||||
|
/** Return true if matches are required to be in-order.*/
|
||||||
|
public boolean isInOrder() { return inOrder; }
|
||||||
|
|
||||||
|
public String getField() { return field; }
|
||||||
|
|
||||||
|
public Collection getTerms() {
|
||||||
|
Collection terms = new ArrayList();
|
||||||
|
Iterator i = clauses.iterator();
|
||||||
|
while (i.hasNext()) {
|
||||||
|
SpanQuery clause = (SpanQuery)i.next();
|
||||||
|
terms.addAll(clause.getTerms());
|
||||||
|
}
|
||||||
|
return terms;
|
||||||
|
}
|
||||||
|
|
||||||
|
public String toString(String field) {
|
||||||
|
StringBuffer buffer = new StringBuffer();
|
||||||
|
buffer.append("spanNear([");
|
||||||
|
Iterator i = clauses.iterator();
|
||||||
|
while (i.hasNext()) {
|
||||||
|
SpanQuery clause = (SpanQuery)i.next();
|
||||||
|
buffer.append(clause.toString(field));
|
||||||
|
if (i.hasNext()) {
|
||||||
|
buffer.append(", ");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
buffer.append("], ");
|
||||||
|
buffer.append(slop);
|
||||||
|
buffer.append(", ");
|
||||||
|
buffer.append(inOrder);
|
||||||
|
buffer.append(")");
|
||||||
|
return buffer.toString();
|
||||||
|
}
|
||||||
|
|
||||||
|
public Spans getSpans(final IndexReader reader) throws IOException {
|
||||||
|
if (clauses.size() == 0) // optimize 0-clause case
|
||||||
|
return new SpanOrQuery(getClauses()).getSpans(reader);
|
||||||
|
|
||||||
|
if (clauses.size() == 1) // optimize 1-clause case
|
||||||
|
return ((SpanQuery)clauses.get(0)).getSpans(reader);
|
||||||
|
|
||||||
|
return new NearSpans(this, reader);
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
|
@ -0,0 +1,114 @@
|
||||||
|
package org.apache.lucene.search.spans;
|
||||||
|
|
||||||
|
import java.io.IOException;
|
||||||
|
|
||||||
|
import java.util.Collection;
|
||||||
|
|
||||||
|
import org.apache.lucene.index.IndexReader;
|
||||||
|
|
||||||
|
/** Removes matches which overlap with another SpanQuery. */
|
||||||
|
public class SpanNotQuery extends SpanQuery {
|
||||||
|
private SpanQuery include;
|
||||||
|
private SpanQuery exclude;
|
||||||
|
|
||||||
|
/** Construct a SpanNotQuery matching spans from <code>include</code> which
|
||||||
|
* have no overlap with spans from <code>exclude</code>.*/
|
||||||
|
public SpanNotQuery(SpanQuery include, SpanQuery exclude) {
|
||||||
|
this.include = include;
|
||||||
|
this.exclude = exclude;
|
||||||
|
|
||||||
|
if (!include.getField().equals(exclude.getField()))
|
||||||
|
throw new IllegalArgumentException("Clauses must have same field.");
|
||||||
|
}
|
||||||
|
|
||||||
|
/** Return the SpanQuery whose matches are filtered. */
|
||||||
|
public SpanQuery getInclude() { return include; }
|
||||||
|
|
||||||
|
/** Return the SpanQuery whose matches must not overlap those returned. */
|
||||||
|
public SpanQuery getExclude() { return exclude; }
|
||||||
|
|
||||||
|
public String getField() { return include.getField(); }
|
||||||
|
|
||||||
|
public Collection getTerms() { return include.getTerms(); }
|
||||||
|
|
||||||
|
public String toString(String field) {
|
||||||
|
StringBuffer buffer = new StringBuffer();
|
||||||
|
buffer.append("spanNot(");
|
||||||
|
buffer.append(include.toString(field));
|
||||||
|
buffer.append(", ");
|
||||||
|
buffer.append(exclude.toString(field));
|
||||||
|
buffer.append(")");
|
||||||
|
return buffer.toString();
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
public Spans getSpans(final IndexReader reader) throws IOException {
|
||||||
|
return new Spans() {
|
||||||
|
private Spans includeSpans = include.getSpans(reader);
|
||||||
|
private boolean moreInclude = true;
|
||||||
|
|
||||||
|
private Spans excludeSpans = exclude.getSpans(reader);
|
||||||
|
private boolean moreExclude = true;
|
||||||
|
|
||||||
|
public boolean next() throws IOException {
|
||||||
|
if (moreInclude) // move to next include
|
||||||
|
moreInclude = includeSpans.next();
|
||||||
|
|
||||||
|
while (moreInclude && moreExclude) {
|
||||||
|
|
||||||
|
if (includeSpans.doc() > excludeSpans.doc()) // skip exclude
|
||||||
|
moreExclude = excludeSpans.skipTo(includeSpans.doc());
|
||||||
|
|
||||||
|
while (moreExclude // while exclude is before
|
||||||
|
&& includeSpans.doc() == excludeSpans.doc()
|
||||||
|
&& excludeSpans.end() <= includeSpans.start()) {
|
||||||
|
moreExclude = excludeSpans.next(); // increment exclude
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!moreExclude // if no intersection
|
||||||
|
|| includeSpans.doc() != excludeSpans.doc()
|
||||||
|
|| includeSpans.end() <= excludeSpans.start())
|
||||||
|
break; // we found a match
|
||||||
|
|
||||||
|
moreInclude = includeSpans.next(); // intersected: keep scanning
|
||||||
|
}
|
||||||
|
return moreInclude;
|
||||||
|
}
|
||||||
|
|
||||||
|
public boolean skipTo(int target) throws IOException {
|
||||||
|
if (moreInclude) // skip include
|
||||||
|
moreInclude = includeSpans.skipTo(target);
|
||||||
|
|
||||||
|
if (!moreInclude)
|
||||||
|
return false;
|
||||||
|
|
||||||
|
if (moreExclude // skip exclude
|
||||||
|
&& includeSpans.doc() > excludeSpans.doc())
|
||||||
|
moreExclude = excludeSpans.skipTo(includeSpans.doc());
|
||||||
|
|
||||||
|
while (moreExclude // while exclude is before
|
||||||
|
&& includeSpans.doc() == excludeSpans.doc()
|
||||||
|
&& excludeSpans.end() <= includeSpans.start()) {
|
||||||
|
moreExclude = excludeSpans.next(); // increment exclude
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!moreExclude // if no intersection
|
||||||
|
|| includeSpans.doc() != excludeSpans.doc()
|
||||||
|
|| includeSpans.end() <= excludeSpans.start())
|
||||||
|
return true; // we found a match
|
||||||
|
|
||||||
|
return next(); // scan to next match
|
||||||
|
}
|
||||||
|
|
||||||
|
public int doc() { return includeSpans.doc(); }
|
||||||
|
public int start() { return includeSpans.start(); }
|
||||||
|
public int end() { return includeSpans.end(); }
|
||||||
|
|
||||||
|
public String toString() {
|
||||||
|
return "spans(" + SpanNotQuery.this.toString() + ")";
|
||||||
|
}
|
||||||
|
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
|
@ -0,0 +1,132 @@
|
||||||
|
package org.apache.lucene.search.spans;
|
||||||
|
|
||||||
|
import java.io.IOException;
|
||||||
|
|
||||||
|
import java.util.List;
|
||||||
|
import java.util.Collection;
|
||||||
|
import java.util.ArrayList;
|
||||||
|
import java.util.Iterator;
|
||||||
|
|
||||||
|
import org.apache.lucene.index.IndexReader;
|
||||||
|
|
||||||
|
/** Matches the union of its clauses.*/
|
||||||
|
public class SpanOrQuery extends SpanQuery {
|
||||||
|
private List clauses;
|
||||||
|
private String field;
|
||||||
|
|
||||||
|
/** Construct a SpanOrQuery merging the provided clauses. */
|
||||||
|
public SpanOrQuery(SpanQuery[] clauses) {
|
||||||
|
|
||||||
|
// copy clauses array into an ArrayList
|
||||||
|
this.clauses = new ArrayList(clauses.length);
|
||||||
|
for (int i = 0; i < clauses.length; i++) {
|
||||||
|
SpanQuery clause = clauses[i];
|
||||||
|
if (i == 0) { // check field
|
||||||
|
field = clause.getField();
|
||||||
|
} else if (!clause.getField().equals(field)) {
|
||||||
|
throw new IllegalArgumentException("Clauses must have same field.");
|
||||||
|
}
|
||||||
|
this.clauses.add(clause);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/** Return the clauses whose spans are matched. */
|
||||||
|
public SpanQuery[] getClauses() {
|
||||||
|
return (SpanQuery[])clauses.toArray(new SpanQuery[clauses.size()]);
|
||||||
|
}
|
||||||
|
|
||||||
|
public String getField() { return field; }
|
||||||
|
|
||||||
|
public Collection getTerms() {
|
||||||
|
Collection terms = new ArrayList();
|
||||||
|
Iterator i = clauses.iterator();
|
||||||
|
while (i.hasNext()) {
|
||||||
|
SpanQuery clause = (SpanQuery)i.next();
|
||||||
|
terms.addAll(clause.getTerms());
|
||||||
|
}
|
||||||
|
return terms;
|
||||||
|
}
|
||||||
|
|
||||||
|
public String toString(String field) {
|
||||||
|
StringBuffer buffer = new StringBuffer();
|
||||||
|
buffer.append("spanOr([");
|
||||||
|
Iterator i = clauses.iterator();
|
||||||
|
while (i.hasNext()) {
|
||||||
|
SpanQuery clause = (SpanQuery)i.next();
|
||||||
|
buffer.append(clause.toString(field));
|
||||||
|
if (i.hasNext()) {
|
||||||
|
buffer.append(", ");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
buffer.append("])");
|
||||||
|
return buffer.toString();
|
||||||
|
}
|
||||||
|
|
||||||
|
public Spans getSpans(final IndexReader reader) throws IOException {
|
||||||
|
if (clauses.size() == 1) // optimize 1-clause case
|
||||||
|
return ((SpanQuery)clauses.get(0)).getSpans(reader);
|
||||||
|
|
||||||
|
return new Spans() {
|
||||||
|
private List all = new ArrayList(clauses.size());
|
||||||
|
private SpanQueue queue = new SpanQueue(clauses.size());
|
||||||
|
|
||||||
|
{
|
||||||
|
Iterator i = clauses.iterator();
|
||||||
|
while (i.hasNext()) { // initialize all
|
||||||
|
all.add(((SpanQuery)i.next()).getSpans(reader));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private boolean firstTime = true;
|
||||||
|
|
||||||
|
public boolean next() throws IOException {
|
||||||
|
if (firstTime) { // first time -- initialize
|
||||||
|
for (int i = 0; i < all.size(); i++) {
|
||||||
|
Spans spans = (Spans)all.get(i);
|
||||||
|
if (spans.next()) { // move to first entry
|
||||||
|
queue.put(spans); // build queue
|
||||||
|
}
|
||||||
|
}
|
||||||
|
firstTime = false;
|
||||||
|
return queue.size() != 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (queue.size() == 0) { // all done
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (top().next()) { // move to next
|
||||||
|
queue.adjustTop();
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
queue.pop(); // exhausted a clause
|
||||||
|
return queue.size() != 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
private Spans top() { return (Spans)queue.top(); }
|
||||||
|
|
||||||
|
public boolean skipTo(int target) throws IOException {
|
||||||
|
queue.clear(); // clear the queue
|
||||||
|
for (int i = 0; i < all.size(); i++) {
|
||||||
|
Spans spans = (Spans)all.get(i);
|
||||||
|
if (spans.skipTo(target)) { // skip each spans in all
|
||||||
|
queue.put(spans); // rebuild queue
|
||||||
|
}
|
||||||
|
}
|
||||||
|
firstTime = false;
|
||||||
|
return queue.size() != 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
public int doc() { return top().doc(); }
|
||||||
|
public int start() { return top().start(); }
|
||||||
|
public int end() { return top().end(); }
|
||||||
|
|
||||||
|
public String toString() {
|
||||||
|
return "spans(" + SpanOrQuery.this.toString() + ")";
|
||||||
|
}
|
||||||
|
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
|
@ -0,0 +1,29 @@
|
||||||
|
package org.apache.lucene.search.spans;
|
||||||
|
|
||||||
|
import java.io.IOException;
|
||||||
|
|
||||||
|
import java.util.Collection;
|
||||||
|
|
||||||
|
import org.apache.lucene.index.IndexReader;
|
||||||
|
import org.apache.lucene.search.Query;
|
||||||
|
import org.apache.lucene.search.Weight;
|
||||||
|
import org.apache.lucene.search.Searcher;
|
||||||
|
|
||||||
|
/** Base class for span-based queries. */
|
||||||
|
public abstract class SpanQuery extends Query {
|
||||||
|
/** Expert: Returns the matches for this query in an index. Used internally
|
||||||
|
* to search for spans. */
|
||||||
|
public abstract Spans getSpans(IndexReader reader) throws IOException;
|
||||||
|
|
||||||
|
/** Returns the name of the field matched by this query.*/
|
||||||
|
public abstract String getField();
|
||||||
|
|
||||||
|
/** Returns a collection of all terms matched by this query.*/
|
||||||
|
public abstract Collection getTerms();
|
||||||
|
|
||||||
|
protected Weight createWeight(Searcher searcher) {
|
||||||
|
return new SpanWeight(this, searcher);
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
|
@ -0,0 +1,23 @@
|
||||||
|
package org.apache.lucene.search.spans;
|
||||||
|
|
||||||
|
import org.apache.lucene.util.PriorityQueue;
|
||||||
|
|
||||||
|
class SpanQueue extends PriorityQueue {
|
||||||
|
public SpanQueue(int size) {
|
||||||
|
initialize(size);
|
||||||
|
}
|
||||||
|
|
||||||
|
protected final boolean lessThan(Object o1, Object o2) {
|
||||||
|
Spans spans1 = (Spans)o1;
|
||||||
|
Spans spans2 = (Spans)o2;
|
||||||
|
if (spans1.doc() == spans2.doc()) {
|
||||||
|
if (spans1.start() == spans2.start()) {
|
||||||
|
return spans1.end() < spans2.end();
|
||||||
|
} else {
|
||||||
|
return spans1.start() < spans2.start();
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
return spans1.doc() < spans2.doc();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,89 @@
|
||||||
|
package org.apache.lucene.search.spans;
|
||||||
|
|
||||||
|
import java.io.IOException;
|
||||||
|
|
||||||
|
import org.apache.lucene.index.IndexReader;
|
||||||
|
import org.apache.lucene.search.Weight;
|
||||||
|
import org.apache.lucene.search.Searcher;
|
||||||
|
import org.apache.lucene.search.Scorer;
|
||||||
|
import org.apache.lucene.search.Explanation;
|
||||||
|
import org.apache.lucene.search.Similarity;
|
||||||
|
|
||||||
|
|
||||||
|
class SpanScorer extends Scorer {
|
||||||
|
private Spans spans;
|
||||||
|
private Weight weight;
|
||||||
|
private byte[] norms;
|
||||||
|
private float value;
|
||||||
|
|
||||||
|
private boolean firstTime = true;
|
||||||
|
private boolean more = true;
|
||||||
|
|
||||||
|
private int doc;
|
||||||
|
private float freq;
|
||||||
|
|
||||||
|
SpanScorer(Spans spans, Weight weight, Similarity similarity, byte[] norms)
|
||||||
|
throws IOException {
|
||||||
|
super(similarity);
|
||||||
|
this.spans = spans;
|
||||||
|
this.norms = norms;
|
||||||
|
this.weight = weight;
|
||||||
|
this.value = weight.getValue();
|
||||||
|
}
|
||||||
|
|
||||||
|
public boolean next() throws IOException {
|
||||||
|
if (firstTime) {
|
||||||
|
more = spans.next();
|
||||||
|
firstTime = false;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!more) return false;
|
||||||
|
|
||||||
|
freq = 0.0f;
|
||||||
|
doc = spans.doc();
|
||||||
|
|
||||||
|
while (more && doc == spans.doc()) {
|
||||||
|
int matchLength = spans.end() - spans.start();
|
||||||
|
freq += getSimilarity().sloppyFreq(matchLength);
|
||||||
|
more = spans.next();
|
||||||
|
}
|
||||||
|
|
||||||
|
return more || freq != 0.0f;
|
||||||
|
}
|
||||||
|
|
||||||
|
public int doc() { return doc; }
|
||||||
|
|
||||||
|
public float score() throws IOException {
|
||||||
|
float raw = getSimilarity().tf(freq) * value; // raw score
|
||||||
|
return raw * Similarity.decodeNorm(norms[doc]); // normalize
|
||||||
|
}
|
||||||
|
|
||||||
|
public boolean skipTo(int target) throws IOException {
|
||||||
|
more = spans.skipTo(target);
|
||||||
|
|
||||||
|
if (!more) return false;
|
||||||
|
|
||||||
|
freq = 0.0f;
|
||||||
|
doc = spans.doc();
|
||||||
|
|
||||||
|
while (more && spans.doc() == target) {
|
||||||
|
freq += getSimilarity().sloppyFreq(spans.end() - spans.start());
|
||||||
|
more = spans.next();
|
||||||
|
}
|
||||||
|
|
||||||
|
return more || freq != 0.0f;
|
||||||
|
}
|
||||||
|
|
||||||
|
public Explanation explain(final int doc) throws IOException {
|
||||||
|
Explanation tfExplanation = new Explanation();
|
||||||
|
|
||||||
|
skipTo(doc);
|
||||||
|
|
||||||
|
float phraseFreq = (doc() == doc) ? freq : 0.0f;
|
||||||
|
tfExplanation.setValue(getSimilarity().tf(phraseFreq));
|
||||||
|
tfExplanation.setDescription("tf(phraseFreq=" + phraseFreq + ")");
|
||||||
|
|
||||||
|
return tfExplanation;
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
|
@ -0,0 +1,84 @@
|
||||||
|
package org.apache.lucene.search.spans;
|
||||||
|
|
||||||
|
import java.io.IOException;
|
||||||
|
|
||||||
|
import java.util.Collection;
|
||||||
|
import java.util.ArrayList;
|
||||||
|
|
||||||
|
import org.apache.lucene.index.IndexReader;
|
||||||
|
import org.apache.lucene.index.Term;
|
||||||
|
import org.apache.lucene.index.TermPositions;
|
||||||
|
|
||||||
|
/** Matches spans containing a term. */
|
||||||
|
public class SpanTermQuery extends SpanQuery {
|
||||||
|
private Term term;
|
||||||
|
|
||||||
|
/** Construct a SpanTermQuery matching the named term's spans. */
|
||||||
|
public SpanTermQuery(Term term) { this.term = term; }
|
||||||
|
|
||||||
|
/** Return the term whose spans are matched. */
|
||||||
|
public Term getTerm() { return term; }
|
||||||
|
|
||||||
|
public String getField() { return term.field(); }
|
||||||
|
|
||||||
|
public Collection getTerms() {
|
||||||
|
Collection terms = new ArrayList();
|
||||||
|
terms.add(term);
|
||||||
|
return terms;
|
||||||
|
}
|
||||||
|
|
||||||
|
public String toString(String field) {
|
||||||
|
if (term.field().equals(field))
|
||||||
|
return term.text();
|
||||||
|
else
|
||||||
|
return term.toString();
|
||||||
|
}
|
||||||
|
|
||||||
|
public Spans getSpans(final IndexReader reader) throws IOException {
|
||||||
|
return new Spans() {
|
||||||
|
private TermPositions positions = reader.termPositions(term);
|
||||||
|
|
||||||
|
private int doc;
|
||||||
|
private int freq;
|
||||||
|
private int count;
|
||||||
|
private int position;
|
||||||
|
|
||||||
|
public boolean next() throws IOException {
|
||||||
|
if (count == freq) {
|
||||||
|
if (!positions.next())
|
||||||
|
return false;
|
||||||
|
doc = positions.doc();
|
||||||
|
freq = positions.freq();
|
||||||
|
count = 0;
|
||||||
|
}
|
||||||
|
position = positions.nextPosition();
|
||||||
|
count++;
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
public boolean skipTo(int target) throws IOException {
|
||||||
|
if (!positions.skipTo(target))
|
||||||
|
return false;
|
||||||
|
|
||||||
|
doc = positions.doc();
|
||||||
|
freq = positions.freq();
|
||||||
|
count = 0;
|
||||||
|
|
||||||
|
position = positions.nextPosition();
|
||||||
|
count++;
|
||||||
|
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
public int doc() { return doc; }
|
||||||
|
public int start() { return position; }
|
||||||
|
public int end() { return position + 1; }
|
||||||
|
|
||||||
|
public String toString() {
|
||||||
|
return "spans(" + SpanTermQuery.this.toString() + ")";
|
||||||
|
}
|
||||||
|
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
|
@ -0,0 +1,127 @@
|
||||||
|
package org.apache.lucene.search.spans;
|
||||||
|
|
||||||
|
import java.io.IOException;
|
||||||
|
|
||||||
|
import java.util.Iterator;
|
||||||
|
import java.util.Collection;
|
||||||
|
|
||||||
|
import org.apache.lucene.index.IndexReader;
|
||||||
|
import org.apache.lucene.index.Term;
|
||||||
|
|
||||||
|
import org.apache.lucene.search.Query;
|
||||||
|
import org.apache.lucene.search.Weight;
|
||||||
|
import org.apache.lucene.search.Searcher;
|
||||||
|
import org.apache.lucene.search.Scorer;
|
||||||
|
import org.apache.lucene.search.Explanation;
|
||||||
|
import org.apache.lucene.search.Similarity;
|
||||||
|
|
||||||
|
class SpanWeight implements Weight {
|
||||||
|
private Searcher searcher;
|
||||||
|
private float value;
|
||||||
|
private float idf;
|
||||||
|
private float queryNorm;
|
||||||
|
private float queryWeight;
|
||||||
|
|
||||||
|
private Collection terms;
|
||||||
|
private SpanQuery query;
|
||||||
|
|
||||||
|
public SpanWeight(SpanQuery query, Searcher searcher) {
|
||||||
|
this.searcher = searcher;
|
||||||
|
this.query = query;
|
||||||
|
this.terms = query.getTerms();
|
||||||
|
}
|
||||||
|
|
||||||
|
public Query getQuery() { return query; }
|
||||||
|
public float getValue() { return value; }
|
||||||
|
|
||||||
|
public float sumOfSquaredWeights() throws IOException {
|
||||||
|
idf = searcher.getSimilarity().idf(terms, searcher);
|
||||||
|
queryWeight = idf * query.getBoost(); // compute query weight
|
||||||
|
return queryWeight * queryWeight; // square it
|
||||||
|
}
|
||||||
|
|
||||||
|
public void normalize(float queryNorm) {
|
||||||
|
this.queryNorm = queryNorm;
|
||||||
|
queryWeight *= queryNorm; // normalize query weight
|
||||||
|
value = queryWeight * idf; // idf for document
|
||||||
|
}
|
||||||
|
|
||||||
|
public Scorer scorer(IndexReader reader) throws IOException {
|
||||||
|
return new SpanScorer(query.getSpans(reader), this,
|
||||||
|
searcher.getSimilarity(),
|
||||||
|
reader.norms(query.getField()));
|
||||||
|
}
|
||||||
|
|
||||||
|
public Explanation explain(IndexReader reader, int doc)
|
||||||
|
throws IOException {
|
||||||
|
|
||||||
|
Explanation result = new Explanation();
|
||||||
|
result.setDescription("weight("+getQuery()+" in "+doc+"), product of:");
|
||||||
|
String field = ((SpanQuery)getQuery()).getField();
|
||||||
|
|
||||||
|
StringBuffer docFreqs = new StringBuffer();
|
||||||
|
Iterator i = terms.iterator();
|
||||||
|
while (i.hasNext()) {
|
||||||
|
Term term = (Term)i.next();
|
||||||
|
docFreqs.append(term.text());
|
||||||
|
docFreqs.append("=");
|
||||||
|
docFreqs.append(searcher.docFreq(term));
|
||||||
|
|
||||||
|
if (i.hasNext()) {
|
||||||
|
docFreqs.append(" ");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
Explanation idfExpl =
|
||||||
|
new Explanation(idf, "idf(" + field + ": " + docFreqs + ")");
|
||||||
|
|
||||||
|
// explain query weight
|
||||||
|
Explanation queryExpl = new Explanation();
|
||||||
|
queryExpl.setDescription("queryWeight(" + getQuery() + "), product of:");
|
||||||
|
|
||||||
|
Explanation boostExpl = new Explanation(getQuery().getBoost(), "boost");
|
||||||
|
if (getQuery().getBoost() != 1.0f)
|
||||||
|
queryExpl.addDetail(boostExpl);
|
||||||
|
queryExpl.addDetail(idfExpl);
|
||||||
|
|
||||||
|
Explanation queryNormExpl = new Explanation(queryNorm,"queryNorm");
|
||||||
|
queryExpl.addDetail(queryNormExpl);
|
||||||
|
|
||||||
|
queryExpl.setValue(boostExpl.getValue() *
|
||||||
|
idfExpl.getValue() *
|
||||||
|
queryNormExpl.getValue());
|
||||||
|
|
||||||
|
result.addDetail(queryExpl);
|
||||||
|
|
||||||
|
// explain field weight
|
||||||
|
Explanation fieldExpl = new Explanation();
|
||||||
|
fieldExpl.setDescription("fieldWeight("+field+":"+query.toString(field)+
|
||||||
|
" in "+doc+"), product of:");
|
||||||
|
|
||||||
|
Explanation tfExpl = scorer(reader).explain(doc);
|
||||||
|
fieldExpl.addDetail(tfExpl);
|
||||||
|
fieldExpl.addDetail(idfExpl);
|
||||||
|
|
||||||
|
Explanation fieldNormExpl = new Explanation();
|
||||||
|
byte[] fieldNorms = reader.norms(field);
|
||||||
|
float fieldNorm =
|
||||||
|
fieldNorms!=null ? Similarity.decodeNorm(fieldNorms[doc]) : 0.0f;
|
||||||
|
fieldNormExpl.setValue(fieldNorm);
|
||||||
|
fieldNormExpl.setDescription("fieldNorm(field="+field+", doc="+doc+")");
|
||||||
|
fieldExpl.addDetail(fieldNormExpl);
|
||||||
|
|
||||||
|
fieldExpl.setValue(tfExpl.getValue() *
|
||||||
|
idfExpl.getValue() *
|
||||||
|
fieldNormExpl.getValue());
|
||||||
|
|
||||||
|
result.addDetail(fieldExpl);
|
||||||
|
|
||||||
|
// combine them
|
||||||
|
result.setValue(queryExpl.getValue() * fieldExpl.getValue());
|
||||||
|
|
||||||
|
if (queryExpl.getValue() == 1.0f)
|
||||||
|
return fieldExpl;
|
||||||
|
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,37 @@
|
||||||
|
package org.apache.lucene.search.spans;
|
||||||
|
|
||||||
|
import java.io.IOException;
|
||||||
|
|
||||||
|
/** Expert: an enumeration of span matches. Used to implement span searching.
|
||||||
|
* Each span represents a range of term positions within a document. Matches
|
||||||
|
* are enumerated in order, by increasing document number, within that by
|
||||||
|
* increasing start position and finally by increasing end position. */
|
||||||
|
public interface Spans {
|
||||||
|
/** Move to the next match, returning true iff any such exists. */
|
||||||
|
boolean next() throws IOException;
|
||||||
|
|
||||||
|
/** Skips to the first match beyond the current whose document number is
|
||||||
|
* greater than or equal to <i>target</i>. <p>Returns true iff there is such
|
||||||
|
* a match. <p>Behaves as if written: <pre>
|
||||||
|
* boolean skipTo(int target) {
|
||||||
|
* do {
|
||||||
|
* if (!next())
|
||||||
|
* return false;
|
||||||
|
* } while (target > doc());
|
||||||
|
* return true;
|
||||||
|
* }
|
||||||
|
* </pre>
|
||||||
|
* Most implementations are considerably more efficient than that.
|
||||||
|
*/
|
||||||
|
boolean skipTo(int target) throws IOException;
|
||||||
|
|
||||||
|
/** Returns the document number of the current match. Initially invalid. */
|
||||||
|
int doc();
|
||||||
|
|
||||||
|
/** Returns the start position of the current match. Initially invalid. */
|
||||||
|
int start();
|
||||||
|
|
||||||
|
/** Returns the end position of the current match. Initially invalid. */
|
||||||
|
int end();
|
||||||
|
|
||||||
|
}
|
|
@ -0,0 +1,7 @@
|
||||||
|
<!doctype html public "-//w3c//dtd html 4.0 transitional//en">
|
||||||
|
<html>
|
||||||
|
<head></head>
|
||||||
|
<body>
|
||||||
|
The calculus of spans.
|
||||||
|
</body>
|
||||||
|
</html>
|
|
@ -55,6 +55,12 @@ package org.apache.lucene.search;
|
||||||
*/
|
*/
|
||||||
|
|
||||||
import junit.framework.TestCase;
|
import junit.framework.TestCase;
|
||||||
|
|
||||||
|
import java.io.IOException;
|
||||||
|
|
||||||
|
import java.util.Set;
|
||||||
|
import java.util.TreeSet;
|
||||||
|
|
||||||
import org.apache.lucene.util.English;
|
import org.apache.lucene.util.English;
|
||||||
import org.apache.lucene.analysis.SimpleAnalyzer;
|
import org.apache.lucene.analysis.SimpleAnalyzer;
|
||||||
import org.apache.lucene.document.Document;
|
import org.apache.lucene.document.Document;
|
||||||
|
@ -63,9 +69,19 @@ import org.apache.lucene.index.IndexWriter;
|
||||||
import org.apache.lucene.index.Term;
|
import org.apache.lucene.index.Term;
|
||||||
import org.apache.lucene.store.RAMDirectory;
|
import org.apache.lucene.store.RAMDirectory;
|
||||||
|
|
||||||
|
import org.apache.lucene.search.spans.*;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Tests basic search capabilities.
|
* Tests basic search capabilities.
|
||||||
*
|
*
|
||||||
|
* <p>Uses a collection of 1000 documents, each the english rendition of their
|
||||||
|
* document number. For example, the document numbered 333 has text "three
|
||||||
|
* hundred thirty three".
|
||||||
|
*
|
||||||
|
* <p>Tests are each a single query, and its hits are checked to ensure that
|
||||||
|
* all and only the correct documents are returned, thus providing end-to-end
|
||||||
|
* testing of the indexing and search code.
|
||||||
|
*
|
||||||
* @author Doug Cutting
|
* @author Doug Cutting
|
||||||
*/
|
*/
|
||||||
public class TestBasics extends TestCase {
|
public class TestBasics extends TestCase {
|
||||||
|
@ -90,46 +106,181 @@ public class TestBasics extends TestCase {
|
||||||
|
|
||||||
public void testTerm() throws Exception {
|
public void testTerm() throws Exception {
|
||||||
Query query = new TermQuery(new Term("field", "seventy"));
|
Query query = new TermQuery(new Term("field", "seventy"));
|
||||||
Hits hits = searcher.search(query);
|
checkHits(query, new int[]
|
||||||
assertEquals(100, hits.length());
|
{70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 170, 171, 172, 173, 174, 175,
|
||||||
|
176, 177, 178, 179, 270, 271, 272, 273, 274, 275, 276, 277, 278, 279,
|
||||||
|
370, 371, 372, 373, 374, 375, 376, 377, 378, 379, 470, 471, 472, 473,
|
||||||
|
474, 475, 476, 477, 478, 479, 570, 571, 572, 573, 574, 575, 576, 577,
|
||||||
|
578, 579, 670, 671, 672, 673, 674, 675, 676, 677, 678, 679, 770, 771,
|
||||||
|
772, 773, 774, 775, 776, 777, 778, 779, 870, 871, 872, 873, 874, 875,
|
||||||
|
876, 877, 878, 879, 970, 971, 972, 973, 974, 975, 976, 977, 978, 979});
|
||||||
}
|
}
|
||||||
|
|
||||||
public void testTerm2() throws Exception {
|
public void testTerm2() throws Exception {
|
||||||
Query query = new TermQuery(new Term("field", "seventish"));
|
Query query = new TermQuery(new Term("field", "seventish"));
|
||||||
Hits hits = searcher.search(query);
|
checkHits(query, new int[] {});
|
||||||
assertEquals(0, hits.length());
|
|
||||||
}
|
}
|
||||||
|
|
||||||
public void testPhrase() throws Exception {
|
public void testPhrase() throws Exception {
|
||||||
PhraseQuery query = new PhraseQuery();
|
PhraseQuery query = new PhraseQuery();
|
||||||
query.add(new Term("field", "seventy"));
|
query.add(new Term("field", "seventy"));
|
||||||
query.add(new Term("field", "seven"));
|
query.add(new Term("field", "seven"));
|
||||||
Hits hits = searcher.search(query);
|
checkHits(query, new int[]
|
||||||
assertEquals(10, hits.length());
|
{77, 177, 277, 377, 477, 577, 677, 777, 877, 977});
|
||||||
}
|
}
|
||||||
|
|
||||||
public void testPhrase2() throws Exception {
|
public void testPhrase2() throws Exception {
|
||||||
PhraseQuery query = new PhraseQuery();
|
PhraseQuery query = new PhraseQuery();
|
||||||
query.add(new Term("field", "seventish"));
|
query.add(new Term("field", "seventish"));
|
||||||
query.add(new Term("field", "sevenon"));
|
query.add(new Term("field", "sevenon"));
|
||||||
Hits hits = searcher.search(query);
|
checkHits(query, new int[] {});
|
||||||
assertEquals(0, hits.length());
|
|
||||||
}
|
}
|
||||||
|
|
||||||
public void testBoolean() throws Exception {
|
public void testBoolean() throws Exception {
|
||||||
BooleanQuery query = new BooleanQuery();
|
BooleanQuery query = new BooleanQuery();
|
||||||
query.add(new TermQuery(new Term("field", "seventy")), true, false);
|
query.add(new TermQuery(new Term("field", "seventy")), true, false);
|
||||||
query.add(new TermQuery(new Term("field", "seven")), true, false);
|
query.add(new TermQuery(new Term("field", "seven")), true, false);
|
||||||
Hits hits = searcher.search(query);
|
checkHits(query, new int[]
|
||||||
assertEquals(19, hits.length());
|
{77, 777, 177, 277, 377, 477, 577, 677, 770, 771, 772, 773, 774, 775,
|
||||||
|
776, 778, 779, 877, 977});
|
||||||
}
|
}
|
||||||
|
|
||||||
public void testBoolean2() throws Exception {
|
public void testBoolean2() throws Exception {
|
||||||
BooleanQuery query = new BooleanQuery();
|
BooleanQuery query = new BooleanQuery();
|
||||||
query.add(new TermQuery(new Term("field", "sevento")), true, false);
|
query.add(new TermQuery(new Term("field", "sevento")), true, false);
|
||||||
query.add(new TermQuery(new Term("field", "sevenly")), true, false);
|
query.add(new TermQuery(new Term("field", "sevenly")), true, false);
|
||||||
|
checkHits(query, new int[] {});
|
||||||
|
}
|
||||||
|
|
||||||
|
public void testSpanNearExact() throws Exception {
|
||||||
|
SpanTermQuery term1 = new SpanTermQuery(new Term("field", "seventy"));
|
||||||
|
SpanTermQuery term2 = new SpanTermQuery(new Term("field", "seven"));
|
||||||
|
SpanNearQuery query = new SpanNearQuery(new SpanQuery[] {term1, term2},
|
||||||
|
0, true);
|
||||||
|
checkHits(query, new int[]
|
||||||
|
{77, 177, 277, 377, 477, 577, 677, 777, 877, 977});
|
||||||
|
|
||||||
|
//System.out.println(searcher.explain(query, 77));
|
||||||
|
//System.out.println(searcher.explain(query, 977));
|
||||||
|
}
|
||||||
|
|
||||||
|
public void testSpanNearUnordered() throws Exception {
|
||||||
|
SpanTermQuery term1 = new SpanTermQuery(new Term("field", "nine"));
|
||||||
|
SpanTermQuery term2 = new SpanTermQuery(new Term("field", "six"));
|
||||||
|
SpanNearQuery query = new SpanNearQuery(new SpanQuery[] {term1, term2},
|
||||||
|
4, false);
|
||||||
|
|
||||||
|
checkHits(query, new int[]
|
||||||
|
{609, 629, 639, 649, 659, 669, 679, 689, 699,
|
||||||
|
906, 926, 936, 946, 956, 966, 976, 986, 996});
|
||||||
|
}
|
||||||
|
|
||||||
|
public void testSpanNearOrdered() throws Exception {
|
||||||
|
SpanTermQuery term1 = new SpanTermQuery(new Term("field", "nine"));
|
||||||
|
SpanTermQuery term2 = new SpanTermQuery(new Term("field", "six"));
|
||||||
|
SpanNearQuery query = new SpanNearQuery(new SpanQuery[] {term1, term2},
|
||||||
|
4, true);
|
||||||
|
checkHits(query, new int[]
|
||||||
|
{906, 926, 936, 946, 956, 966, 976, 986, 996});
|
||||||
|
}
|
||||||
|
|
||||||
|
public void testSpanNot() throws Exception {
|
||||||
|
SpanTermQuery term1 = new SpanTermQuery(new Term("field", "eight"));
|
||||||
|
SpanTermQuery term2 = new SpanTermQuery(new Term("field", "one"));
|
||||||
|
SpanNearQuery near = new SpanNearQuery(new SpanQuery[] {term1, term2},
|
||||||
|
4, true);
|
||||||
|
SpanTermQuery term3 = new SpanTermQuery(new Term("field", "forty"));
|
||||||
|
SpanNotQuery query = new SpanNotQuery(near, term3);
|
||||||
|
|
||||||
|
checkHits(query, new int[]
|
||||||
|
{801, 821, 831, 851, 861, 871, 881, 891});
|
||||||
|
|
||||||
|
//System.out.println(searcher.explain(query, 801));
|
||||||
|
//System.out.println(searcher.explain(query, 891));
|
||||||
|
}
|
||||||
|
|
||||||
|
public void testSpanFirst() throws Exception {
|
||||||
|
SpanTermQuery term1 = new SpanTermQuery(new Term("field", "five"));
|
||||||
|
SpanFirstQuery query = new SpanFirstQuery(term1, 1);
|
||||||
|
|
||||||
|
checkHits(query, new int[]
|
||||||
|
{5, 500, 501, 502, 503, 504, 505, 506, 507, 508, 509, 510, 511, 512, 513,
|
||||||
|
514, 515, 516, 517, 518, 519, 520, 521, 522, 523, 524, 525, 526, 527,
|
||||||
|
528, 529, 530, 531, 532, 533, 534, 535, 536, 537, 538, 539, 540, 541,
|
||||||
|
542, 543, 544, 545, 546, 547, 548, 549, 550, 551, 552, 553, 554, 555,
|
||||||
|
556, 557, 558, 559, 560, 561, 562, 563, 564, 565, 566, 567, 568, 569,
|
||||||
|
570, 571, 572, 573, 574, 575, 576, 577, 578, 579, 580, 581, 582, 583,
|
||||||
|
584, 585, 586, 587, 588, 589, 590, 591, 592, 593, 594, 595, 596, 597,
|
||||||
|
598, 599});
|
||||||
|
|
||||||
|
//System.out.println(searcher.explain(query, 5));
|
||||||
|
//System.out.println(searcher.explain(query, 599));
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
public void testSpanOr() throws Exception {
|
||||||
|
SpanTermQuery term1 = new SpanTermQuery(new Term("field", "thirty"));
|
||||||
|
SpanTermQuery term2 = new SpanTermQuery(new Term("field", "three"));
|
||||||
|
SpanNearQuery near1 = new SpanNearQuery(new SpanQuery[] {term1, term2},
|
||||||
|
0, true);
|
||||||
|
SpanTermQuery term3 = new SpanTermQuery(new Term("field", "forty"));
|
||||||
|
SpanTermQuery term4 = new SpanTermQuery(new Term("field", "seven"));
|
||||||
|
SpanNearQuery near2 = new SpanNearQuery(new SpanQuery[] {term3, term4},
|
||||||
|
0, true);
|
||||||
|
|
||||||
|
SpanOrQuery query = new SpanOrQuery(new SpanQuery[] {near1, near2});
|
||||||
|
|
||||||
|
checkHits(query, new int[]
|
||||||
|
{33, 47, 133, 147, 233, 247, 333, 347, 433, 447, 533, 547, 633, 647, 733,
|
||||||
|
747, 833, 847, 933, 947});
|
||||||
|
|
||||||
|
//System.out.println(searcher.explain(query, 33));
|
||||||
|
//System.out.println(searcher.explain(query, 947));
|
||||||
|
}
|
||||||
|
|
||||||
|
public void testSpanExactNested() throws Exception {
|
||||||
|
SpanTermQuery term1 = new SpanTermQuery(new Term("field", "three"));
|
||||||
|
SpanTermQuery term2 = new SpanTermQuery(new Term("field", "hundred"));
|
||||||
|
SpanNearQuery near1 = new SpanNearQuery(new SpanQuery[] {term1, term2},
|
||||||
|
0, true);
|
||||||
|
SpanTermQuery term3 = new SpanTermQuery(new Term("field", "thirty"));
|
||||||
|
SpanTermQuery term4 = new SpanTermQuery(new Term("field", "three"));
|
||||||
|
SpanNearQuery near2 = new SpanNearQuery(new SpanQuery[] {term3, term4},
|
||||||
|
0, true);
|
||||||
|
|
||||||
|
SpanNearQuery query = new SpanNearQuery(new SpanQuery[] {near1, near2},
|
||||||
|
0, true);
|
||||||
|
|
||||||
|
checkHits(query, new int[] {333});
|
||||||
|
|
||||||
|
//System.out.println(searcher.explain(query, 333));
|
||||||
|
}
|
||||||
|
|
||||||
|
private void checkHits(Query query, int[] results) throws IOException {
|
||||||
Hits hits = searcher.search(query);
|
Hits hits = searcher.search(query);
|
||||||
assertEquals(0, hits.length());
|
|
||||||
|
Set correct = new TreeSet();
|
||||||
|
for (int i = 0; i < results.length; i++) {
|
||||||
|
correct.add(new Integer(results[i]));
|
||||||
|
}
|
||||||
|
|
||||||
|
Set actual = new TreeSet();
|
||||||
|
for (int i = 0; i < hits.length(); i++) {
|
||||||
|
actual.add(new Integer(hits.id(i)));
|
||||||
|
}
|
||||||
|
|
||||||
|
assertEquals(query.toString("field"), correct, actual);
|
||||||
|
}
|
||||||
|
|
||||||
|
private void printHits(Query query) throws IOException {
|
||||||
|
Hits hits = searcher.search(query);
|
||||||
|
System.out.print("new int[] {");
|
||||||
|
for (int i = 0; i < hits.length(); i++) {
|
||||||
|
System.out.print(hits.id(i));
|
||||||
|
if (i != hits.length()-1)
|
||||||
|
System.out.print(", ");
|
||||||
|
}
|
||||||
|
System.out.println("}");
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -56,7 +56,7 @@ package org.apache.lucene.search;
|
||||||
|
|
||||||
import junit.framework.TestCase;
|
import junit.framework.TestCase;
|
||||||
|
|
||||||
import java.util.Vector;
|
import java.util.Collection;
|
||||||
|
|
||||||
import org.apache.lucene.index.Term;
|
import org.apache.lucene.index.Term;
|
||||||
import org.apache.lucene.index.IndexWriter;
|
import org.apache.lucene.index.IndexWriter;
|
||||||
|
@ -81,7 +81,7 @@ public class TestSimilarity extends TestCase {
|
||||||
public float queryNorm(float sumOfSquaredWeights) { return 1.0f; }
|
public float queryNorm(float sumOfSquaredWeights) { return 1.0f; }
|
||||||
public float tf(float freq) { return freq; }
|
public float tf(float freq) { return freq; }
|
||||||
public float sloppyFreq(int distance) { return 2.0f; }
|
public float sloppyFreq(int distance) { return 2.0f; }
|
||||||
public float idf(Vector terms, Searcher searcher) { return 1.0f; }
|
public float idf(Collection terms, Searcher searcher) { return 1.0f; }
|
||||||
public float idf(int docFreq, int numDocs) { return 1.0f; }
|
public float idf(int docFreq, int numDocs) { return 1.0f; }
|
||||||
public float coord(int overlap, int maxOverlap) { return 1.0f; }
|
public float coord(int overlap, int maxOverlap) { return 1.0f; }
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue