+ * WARNING: The status of the Payloads feature is experimental.
+ * The APIs introduced here might change in the future and will not be
+ * supported anymore in such a case.
+ *
+ **/
+public interface PayloadSpans extends Spans{
+ /**
+ * Returns the payload data for the current span.
+ * This is invalid until {@link #next()} is called for
+ * the first time.
+ * This method must not be called more than once after each call
+ * of {@link #next()}. However, payloads are loaded lazily,
+ * so if the payload data for the current position is not needed,
+ * this method may not be called at all for performance reasons.
+ *
+ * Note that the return type is a collection, thus the ordering should not be relied upon.
+ *
+ *
+ * WARNING: The status of the Payloads feature is experimental.
+ * The APIs introduced here might change in the future and will not be
+ * supported anymore in such a case.
+ *
+ * @return a List of byte arrays containing the data of this payload, otherwise null if isPayloadAvailable is false
+ * @throws java.io.IOException
+ */
+ // TODO: Remove warning after API has been finalized
+ Collection/**/ getPayload() throws IOException;
+
+ /**
+ * Checks if a payload can be loaded at this position.
+ *
+ * Payloads can only be loaded once per call to
+ * {@link #next()}.
+ *
+ *
+ * WARNING: The status of the Payloads feature is experimental.
+ * The APIs introduced here might change in the future and will not be
+ * supported anymore in such a case.
+ *
+ * @return true if there is a payload available at this position that can be loaded
+ */
+ // TODO: Remove warning after API has been finalized
+ public boolean isPayloadAvailable();
+
+}
diff --git a/src/java/org/apache/lucene/search/spans/SpanFirstQuery.java b/src/java/org/apache/lucene/search/spans/SpanFirstQuery.java
index 0a0cea64a2d..1bef13fbb15 100644
--- a/src/java/org/apache/lucene/search/spans/SpanFirstQuery.java
+++ b/src/java/org/apache/lucene/search/spans/SpanFirstQuery.java
@@ -21,6 +21,7 @@ import java.io.IOException;
import java.util.Collection;
import java.util.Set;
+import java.util.ArrayList;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.search.Query;
@@ -65,11 +66,15 @@ public class SpanFirstQuery extends SpanQuery {
public void extractTerms(Set terms) {
match.extractTerms(terms);
- }
+ }
+
+ public PayloadSpans getPayloadSpans(IndexReader reader) throws IOException {
+ return (PayloadSpans) getSpans(reader);
+ }
public Spans getSpans(final IndexReader reader) throws IOException {
- return new Spans() {
- private Spans spans = match.getSpans(reader);
+ return new PayloadSpans() {
+ private PayloadSpans spans = match.getPayloadSpans(reader);
public boolean next() throws IOException {
while (spans.next()) { // scan to next match
@@ -83,17 +88,29 @@ public class SpanFirstQuery extends SpanQuery {
if (!spans.skipTo(target))
return false;
- if (spans.end() <= end) // there is a match
- return true;
+ return spans.end() <= end || next();
- return next(); // scan to next match
}
public int doc() { return spans.doc(); }
public int start() { return spans.start(); }
public int end() { return spans.end(); }
- public String toString() {
+ // TODO: Remove warning after API has been finalized
+ public Collection/**/ getPayload() throws IOException {
+ ArrayList result = null;
+ if (spans.isPayloadAvailable()) {
+ result = new ArrayList(spans.getPayload());
+ }
+ return result;//TODO: any way to avoid the new construction?
+ }
+
+ // TODO: Remove warning after API has been finalized
+ public boolean isPayloadAvailable() {
+ return spans.isPayloadAvailable();
+ }
+
+ public String toString() {
return "spans(" + SpanFirstQuery.this.toString() + ")";
}
diff --git a/src/java/org/apache/lucene/search/spans/SpanNearQuery.java b/src/java/org/apache/lucene/search/spans/SpanNearQuery.java
index 4cb59ead6e2..c47f939b77f 100644
--- a/src/java/org/apache/lucene/search/spans/SpanNearQuery.java
+++ b/src/java/org/apache/lucene/search/spans/SpanNearQuery.java
@@ -120,14 +120,18 @@ public class SpanNearQuery extends SpanQuery {
public Spans getSpans(final IndexReader reader) throws IOException {
if (clauses.size() == 0) // optimize 0-clause case
- return new SpanOrQuery(getClauses()).getSpans(reader);
+ return new SpanOrQuery(getClauses()).getPayloadSpans(reader);
if (clauses.size() == 1) // optimize 1-clause case
- return ((SpanQuery)clauses.get(0)).getSpans(reader);
+ return ((SpanQuery)clauses.get(0)).getPayloadSpans(reader);
return inOrder
- ? (Spans) new NearSpansOrdered(this, reader)
- : (Spans) new NearSpansUnordered(this, reader);
+ ? (PayloadSpans) new NearSpansOrdered(this, reader)
+ : (PayloadSpans) new NearSpansUnordered(this, reader);
+ }
+
+ public PayloadSpans getPayloadSpans(IndexReader reader) throws IOException {
+ return (PayloadSpans) getSpans(reader);
}
public Query rewrite(IndexReader reader) throws IOException {
diff --git a/src/java/org/apache/lucene/search/spans/SpanNotQuery.java b/src/java/org/apache/lucene/search/spans/SpanNotQuery.java
index ce16b62076b..315ca7a918d 100644
--- a/src/java/org/apache/lucene/search/spans/SpanNotQuery.java
+++ b/src/java/org/apache/lucene/search/spans/SpanNotQuery.java
@@ -17,15 +17,15 @@ package org.apache.lucene.search.spans;
* limitations under the License.
*/
-import java.io.IOException;
-
-import java.util.Collection;
-import java.util.Set;
-
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.search.Query;
import org.apache.lucene.util.ToStringUtils;
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.Collection;
+import java.util.Set;
+
/** Removes matches which overlap with another SpanQuery. */
public class SpanNotQuery extends SpanQuery {
private SpanQuery include;
@@ -70,8 +70,8 @@ public class SpanNotQuery extends SpanQuery {
public Spans getSpans(final IndexReader reader) throws IOException {
- return new Spans() {
- private Spans includeSpans = include.getSpans(reader);
+ return new PayloadSpans() {
+ private PayloadSpans includeSpans = include.getPayloadSpans(reader);
private boolean moreInclude = true;
private Spans excludeSpans = exclude.getSpans(reader);
@@ -131,13 +131,31 @@ public class SpanNotQuery extends SpanQuery {
public int start() { return includeSpans.start(); }
public int end() { return includeSpans.end(); }
- public String toString() {
+ // TODO: Remove warning after API has been finalizedb
+ public Collection/**/ getPayload() throws IOException {
+ ArrayList result = null;
+ if (includeSpans.isPayloadAvailable()) {
+ result = new ArrayList(includeSpans.getPayload());
+ }
+ return result;
+ }
+
+ // TODO: Remove warning after API has been finalized
+ public boolean isPayloadAvailable() {
+ return includeSpans.isPayloadAvailable();
+ }
+
+ public String toString() {
return "spans(" + SpanNotQuery.this.toString() + ")";
}
};
}
+ public PayloadSpans getPayloadSpans(IndexReader reader) throws IOException {
+ return (PayloadSpans) getSpans(reader);
+ }
+
public Query rewrite(IndexReader reader) throws IOException {
SpanNotQuery clone = null;
diff --git a/src/java/org/apache/lucene/search/spans/SpanOrQuery.java b/src/java/org/apache/lucene/search/spans/SpanOrQuery.java
index 399e71c47ab..39be65b1f87 100644
--- a/src/java/org/apache/lucene/search/spans/SpanOrQuery.java
+++ b/src/java/org/apache/lucene/search/spans/SpanOrQuery.java
@@ -154,19 +154,22 @@ public class SpanOrQuery extends SpanQuery {
}
}
+ public PayloadSpans getPayloadSpans(final IndexReader reader) throws IOException {
+ return (PayloadSpans)getSpans(reader);
+ }
public Spans getSpans(final IndexReader reader) throws IOException {
if (clauses.size() == 1) // optimize 1-clause case
- return ((SpanQuery)clauses.get(0)).getSpans(reader);
+ return ((SpanQuery)clauses.get(0)).getPayloadSpans(reader);
- return new Spans() {
+ return new PayloadSpans() {
private SpanQueue queue = null;
private boolean initSpanQueue(int target) throws IOException {
queue = new SpanQueue(clauses.size());
Iterator i = clauses.iterator();
while (i.hasNext()) {
- Spans spans = ((SpanQuery)i.next()).getSpans(reader);
+ PayloadSpans spans = ((SpanQuery)i.next()).getPayloadSpans(reader);
if ( ((target == -1) && spans.next())
|| ((target != -1) && spans.skipTo(target))) {
queue.put(spans);
@@ -193,7 +196,7 @@ public class SpanOrQuery extends SpanQuery {
return queue.size() != 0;
}
- private Spans top() { return (Spans)queue.top(); }
+ private PayloadSpans top() { return (PayloadSpans)queue.top(); }
public boolean skipTo(int target) throws IOException {
if (queue == null) {
@@ -215,7 +218,23 @@ public class SpanOrQuery extends SpanQuery {
public int start() { return top().start(); }
public int end() { return top().end(); }
- public String toString() {
+ // TODO: Remove warning after API has been finalized
+ public Collection/**/ getPayload() throws IOException {
+ ArrayList result = null;
+ PayloadSpans theTop = top();
+ if (theTop != null && theTop.isPayloadAvailable()) {
+ result = new ArrayList(theTop.getPayload());
+ }
+ return result;
+ }
+
+ // TODO: Remove warning after API has been finalized
+ public boolean isPayloadAvailable() {
+ PayloadSpans top = top();
+ return top != null && top.isPayloadAvailable();
+ }
+
+ public String toString() {
return "spans("+SpanOrQuery.this+")@"+
((queue == null)?"START"
:(queue.size()>0?(doc()+":"+start()+"-"+end()):"END"));
diff --git a/src/java/org/apache/lucene/search/spans/SpanQuery.java b/src/java/org/apache/lucene/search/spans/SpanQuery.java
index a2caa71059e..8a316848b5c 100644
--- a/src/java/org/apache/lucene/search/spans/SpanQuery.java
+++ b/src/java/org/apache/lucene/search/spans/SpanQuery.java
@@ -17,15 +17,14 @@ package org.apache.lucene.search.spans;
* limitations under the License.
*/
-import java.io.IOException;
-
-import java.util.Collection;
-import java.util.Set;
-
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.search.Query;
-import org.apache.lucene.search.Weight;
import org.apache.lucene.search.Searcher;
+import org.apache.lucene.search.Weight;
+
+import java.io.IOException;
+import java.util.Collection;
+import java.util.Set;
/** Base class for span-based queries. */
public abstract class SpanQuery extends Query {
@@ -33,6 +32,22 @@ public abstract class SpanQuery extends Query {
* to search for spans. */
public abstract Spans getSpans(IndexReader reader) throws IOException;
+ /**
+ * Returns the matches for this query in an index, including access to any {@link org.apache.lucene.index.Payload}s at those
+ * positions. Implementing classes that want access to the payloads will need to implement this.
+ * @param reader The {@link org.apache.lucene.index.IndexReader} to use to get spans/payloads
+ * @return null
+ * @throws IOException if there is an error accessing the payload
+ *
+ *
+ * WARNING: The status of the Payloads feature is experimental.
+ * The APIs introduced here might change in the future and will not be
+ * supported anymore in such a case.
+ */
+ public PayloadSpans getPayloadSpans(IndexReader reader) throws IOException{
+ return null;
+ };
+
/** Returns the name of the field matched by this query.*/
public abstract String getField();
diff --git a/src/java/org/apache/lucene/search/spans/SpanTermQuery.java b/src/java/org/apache/lucene/search/spans/SpanTermQuery.java
index e37d2d44890..2d60ffd2a3d 100644
--- a/src/java/org/apache/lucene/search/spans/SpanTermQuery.java
+++ b/src/java/org/apache/lucene/search/spans/SpanTermQuery.java
@@ -79,4 +79,9 @@ public class SpanTermQuery extends SpanQuery {
return new TermSpans(reader.termPositions(term), term);
}
+
+ public PayloadSpans getPayloadSpans(IndexReader reader) throws IOException {
+ return (PayloadSpans) getSpans(reader);
+ }
+
}
diff --git a/src/java/org/apache/lucene/search/spans/TermSpans.java b/src/java/org/apache/lucene/search/spans/TermSpans.java
index 40611581ec2..d2c81e4670e 100644
--- a/src/java/org/apache/lucene/search/spans/TermSpans.java
+++ b/src/java/org/apache/lucene/search/spans/TermSpans.java
@@ -20,12 +20,14 @@ import org.apache.lucene.index.Term;
import org.apache.lucene.index.TermPositions;
import java.io.IOException;
+import java.util.Collections;
+import java.util.Collection;
/**
* Expert:
* Public for extension only
*/
-public class TermSpans implements Spans {
+public class TermSpans implements PayloadSpans {
protected TermPositions positions;
protected Term term;
protected int doc;
@@ -89,6 +91,18 @@ public class TermSpans implements Spans {
return position + 1;
}
+ // TODO: Remove warning after API has been finalized
+ public Collection/**/ getPayload() throws IOException {
+ byte [] bytes = new byte[positions.getPayloadLength()];
+ bytes = positions.getPayload(bytes, 0);
+ return Collections.singletonList(bytes);
+ }
+
+ // TODO: Remove warning after API has been finalized
+ public boolean isPayloadAvailable() {
+ return positions.isPayloadAvailable();
+ }
+
public String toString() {
return "spans(" + term.toString() + ")@" +
(doc == -1 ? "START" : (doc == Integer.MAX_VALUE) ? "END" : doc + "-" + position);
diff --git a/src/test/org/apache/lucene/search/payloads/PayloadHelper.java b/src/test/org/apache/lucene/search/payloads/PayloadHelper.java
new file mode 100644
index 00000000000..2ebb44f557e
--- /dev/null
+++ b/src/test/org/apache/lucene/search/payloads/PayloadHelper.java
@@ -0,0 +1,104 @@
+package org.apache.lucene.search.payloads;
+
+
+import org.apache.lucene.analysis.*;
+import org.apache.lucene.index.Payload;
+import org.apache.lucene.index.IndexWriter;
+import org.apache.lucene.store.RAMDirectory;
+import org.apache.lucene.document.Document;
+import org.apache.lucene.document.Field;
+import org.apache.lucene.util.English;
+import org.apache.lucene.search.IndexSearcher;
+import org.apache.lucene.search.Similarity;
+
+import java.io.Reader;
+import java.io.IOException;
+
+/**
+ *
+ *
+ **/
+public class PayloadHelper {
+
+ private byte[] payloadField = new byte[]{1};
+ private byte[] payloadMultiField1 = new byte[]{2};
+ private byte[] payloadMultiField2 = new byte[]{4};
+ public static final String NO_PAYLOAD_FIELD = "noPayloadField";
+ public static final String MULTI_FIELD = "multiField";
+ public static final String FIELD = "field";
+
+ public class PayloadAnalyzer extends Analyzer {
+
+
+
+ public TokenStream tokenStream(String fieldName, Reader reader) {
+ TokenStream result = new LowerCaseTokenizer(reader);
+ result = new PayloadFilter(result, fieldName);
+ return result;
+ }
+ }
+
+ public class PayloadFilter extends TokenFilter {
+ String fieldName;
+ int numSeen = 0;
+
+ public PayloadFilter(TokenStream input, String fieldName) {
+ super(input);
+ this.fieldName = fieldName;
+ }
+
+ public Token next() throws IOException {
+ Token result = input.next();
+ if (result != null) {
+ if (fieldName.equals(FIELD))
+ {
+ result.setPayload(new Payload(payloadField));
+ }
+ else if (fieldName.equals(MULTI_FIELD))
+ {
+ if (numSeen % 2 == 0)
+ {
+ result.setPayload(new Payload(payloadMultiField1));
+ }
+ else
+ {
+ result.setPayload(new Payload(payloadMultiField2));
+ }
+ numSeen++;
+ }
+
+ }
+ return result;
+ }
+ }
+
+ /**
+ * Sets up a RAMDirectory, and adds documents (using English.intToEnglish()) with two fields: field and multiField
+ * and analyzes them using the PayloadAnalyzer
+ * @param similarity The Similarity class to use in the Searcher
+ * @param numDocs The num docs to add
+ * @return An IndexSearcher
+ * @throws IOException
+ */
+ public IndexSearcher setUp(Similarity similarity, int numDocs) throws IOException {
+ RAMDirectory directory = new RAMDirectory();
+ PayloadAnalyzer analyzer = new PayloadAnalyzer();
+ IndexWriter writer
+ = new IndexWriter(directory, analyzer, true);
+ writer.setSimilarity(similarity);
+ //writer.infoStream = System.out;
+ for (int i = 0; i < numDocs; i++) {
+ Document doc = new Document();
+ doc.add(new Field(FIELD, English.intToEnglish(i), Field.Store.YES, Field.Index.TOKENIZED));
+ doc.add(new Field(MULTI_FIELD, English.intToEnglish(i) + " " + English.intToEnglish(i), Field.Store.YES, Field.Index.TOKENIZED));
+ doc.add(new Field(NO_PAYLOAD_FIELD, English.intToEnglish(i), Field.Store.YES, Field.Index.TOKENIZED));
+ writer.addDocument(doc);
+ }
+ //writer.optimize();
+ writer.close();
+
+ IndexSearcher searcher = new IndexSearcher(directory);
+ searcher.setSimilarity(similarity);
+ return searcher;
+ }
+}
diff --git a/src/test/org/apache/lucene/search/payloads/TestBoostingTermQuery.java b/src/test/org/apache/lucene/search/payloads/TestBoostingTermQuery.java
index b0b78a4e307..b46f0865365 100644
--- a/src/test/org/apache/lucene/search/payloads/TestBoostingTermQuery.java
+++ b/src/test/org/apache/lucene/search/payloads/TestBoostingTermQuery.java
@@ -102,7 +102,7 @@ public class TestBoostingTermQuery extends LuceneTestCase {
//writer.infoStream = System.out;
for (int i = 0; i < 1000; i++) {
Document doc = new Document();
- Field noPayloadField = new Field("noPayLoad", English.intToEnglish(i), Field.Store.YES, Field.Index.TOKENIZED);
+ Field noPayloadField = new Field(PayloadHelper.NO_PAYLOAD_FIELD, English.intToEnglish(i), Field.Store.YES, Field.Index.TOKENIZED);
//noPayloadField.setBoost(0);
doc.add(noPayloadField);
doc.add(new Field("field", English.intToEnglish(i), Field.Store.YES, Field.Index.TOKENIZED));
@@ -130,7 +130,7 @@ public class TestBoostingTermQuery extends LuceneTestCase {
ScoreDoc doc = hits.scoreDocs[i];
assertTrue(doc.score + " does not equal: " + 1, doc.score == 1);
}
- CheckHits.checkExplanations(query, "field", searcher, true);
+ CheckHits.checkExplanations(query, PayloadHelper.FIELD, searcher, true);
Spans spans = query.getSpans(searcher.getIndexReader());
assertTrue("spans is null and it shouldn't be", spans != null);
assertTrue("spans is not an instanceof " + TermSpans.class, spans instanceof TermSpans);
@@ -143,7 +143,7 @@ public class TestBoostingTermQuery extends LuceneTestCase {
}
public void testMultipleMatchesPerDoc() throws Exception {
- BoostingTermQuery query = new BoostingTermQuery(new Term("multiField", "seventy"));
+ BoostingTermQuery query = new BoostingTermQuery(new Term(PayloadHelper.MULTI_FIELD, "seventy"));
TopDocs hits = searcher.search(query, null, 100);
assertTrue("hits is null and it shouldn't be", hits != null);
assertTrue("hits Size: " + hits.totalHits + " is not: " + 100, hits.totalHits == 100);
@@ -180,7 +180,7 @@ public class TestBoostingTermQuery extends LuceneTestCase {
}
public void testNoMatch() throws Exception {
- BoostingTermQuery query = new BoostingTermQuery(new Term("field", "junk"));
+ BoostingTermQuery query = new BoostingTermQuery(new Term(PayloadHelper.FIELD, "junk"));
TopDocs hits = searcher.search(query, null, 100);
assertTrue("hits is null and it shouldn't be", hits != null);
assertTrue("hits Size: " + hits.totalHits + " is not: " + 0, hits.totalHits == 0);
@@ -188,8 +188,8 @@ public class TestBoostingTermQuery extends LuceneTestCase {
}
public void testNoPayload() throws Exception {
- BoostingTermQuery q1 = new BoostingTermQuery(new Term("noPayLoad", "zero"));
- BoostingTermQuery q2 = new BoostingTermQuery(new Term("noPayLoad", "foo"));
+ BoostingTermQuery q1 = new BoostingTermQuery(new Term(PayloadHelper.NO_PAYLOAD_FIELD, "zero"));
+ BoostingTermQuery q2 = new BoostingTermQuery(new Term(PayloadHelper.NO_PAYLOAD_FIELD, "foo"));
BooleanClause c1 = new BooleanClause(q1, BooleanClause.Occur.MUST);
BooleanClause c2 = new BooleanClause(q2, BooleanClause.Occur.MUST_NOT);
BooleanQuery query = new BooleanQuery();
@@ -200,7 +200,7 @@ public class TestBoostingTermQuery extends LuceneTestCase {
assertTrue("hits Size: " + hits.totalHits + " is not: " + 1, hits.totalHits == 1);
int[] results = new int[1];
results[0] = 0;//hits.scoreDocs[0].doc;
- CheckHits.checkHitCollector(query, "noPayLoad", searcher, results);
+ CheckHits.checkHitCollector(query, PayloadHelper.NO_PAYLOAD_FIELD, searcher, results);
}
// must be static for weight serialization tests
diff --git a/src/test/org/apache/lucene/search/spans/TestPayloadSpans.java b/src/test/org/apache/lucene/search/spans/TestPayloadSpans.java
new file mode 100644
index 00000000000..d46c9d0466e
--- /dev/null
+++ b/src/test/org/apache/lucene/search/spans/TestPayloadSpans.java
@@ -0,0 +1,377 @@
+package org.apache.lucene.search.spans;
+
+/**
+ * Copyright 2004 The Apache Software Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.IOException;
+import java.io.Reader;
+import java.util.Collection;
+import java.util.HashSet;
+import java.util.Iterator;
+import java.util.Set;
+
+import junit.framework.TestCase;
+
+import org.apache.lucene.analysis.Analyzer;
+import org.apache.lucene.analysis.LowerCaseTokenizer;
+import org.apache.lucene.analysis.Token;
+import org.apache.lucene.analysis.TokenFilter;
+import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.document.Document;
+import org.apache.lucene.document.Field;
+import org.apache.lucene.index.IndexReader;
+import org.apache.lucene.index.IndexWriter;
+import org.apache.lucene.index.Payload;
+import org.apache.lucene.index.Term;
+import org.apache.lucene.search.DefaultSimilarity;
+import org.apache.lucene.search.IndexSearcher;
+import org.apache.lucene.search.Similarity;
+import org.apache.lucene.search.TermQuery;
+import org.apache.lucene.search.payloads.PayloadHelper;
+import org.apache.lucene.search.payloads.PayloadSpanUtil;
+import org.apache.lucene.store.RAMDirectory;
+
+public class TestPayloadSpans extends TestCase {
+ private final static boolean DEBUG = false;
+ private IndexSearcher searcher;
+ private Similarity similarity = new DefaultSimilarity();
+ protected IndexReader indexReader;
+
+ public TestPayloadSpans(String s) {
+ super(s);
+ }
+
+ protected void setUp() throws IOException {
+ PayloadHelper helper = new PayloadHelper();
+ searcher = helper.setUp(similarity, 1000);
+ indexReader = searcher.getIndexReader();
+ }
+
+ protected void tearDown() {
+
+ }
+
+ public void testSpanTermQuery() throws Exception {
+ SpanTermQuery stq;
+ PayloadSpans spans;
+ stq = new SpanTermQuery(new Term(PayloadHelper.FIELD, "seventy"));
+ spans = stq.getPayloadSpans(indexReader);
+ assertTrue("spans is null and it shouldn't be", spans != null);
+ checkSpans(spans, 100, 1, 1, 1);
+
+ stq = new SpanTermQuery(new Term(PayloadHelper.NO_PAYLOAD_FIELD, "seventy"));
+ spans = stq.getPayloadSpans(indexReader);
+ assertTrue("spans is null and it shouldn't be", spans != null);
+ checkSpans(spans, 100, 0, 0, 0);
+ }
+
+ public void testSpanFirst() throws IOException {
+
+ SpanQuery match;
+ SpanFirstQuery sfq;
+ match = new SpanTermQuery(new Term(PayloadHelper.FIELD, "one"));
+ sfq = new SpanFirstQuery(match, 2);
+ PayloadSpans spans = sfq.getPayloadSpans(indexReader);
+ checkSpans(spans, 109, 1, 1, 1);
+ //Test more complicated subclause
+ SpanQuery[] clauses = new SpanQuery[2];
+ clauses[0] = new SpanTermQuery(new Term(PayloadHelper.FIELD, "one"));
+ clauses[1] = new SpanTermQuery(new Term(PayloadHelper.FIELD, "hundred"));
+ match = new SpanNearQuery(clauses, 0, true);
+ sfq = new SpanFirstQuery(match, 2);
+ checkSpans(sfq.getPayloadSpans(indexReader), 100, 2, 1, 1);
+
+ match = new SpanNearQuery(clauses, 0, false);
+ sfq = new SpanFirstQuery(match, 2);
+ checkSpans(sfq.getPayloadSpans(indexReader), 100, 2, 1, 1);
+
+ }
+
+ public void testNestedSpans() throws Exception {
+ SpanTermQuery stq;
+ PayloadSpans spans;
+ IndexSearcher searcher = getSearcher();
+ stq = new SpanTermQuery(new Term(PayloadHelper.FIELD, "mark"));
+ spans = stq.getPayloadSpans(searcher.getIndexReader());
+ assertTrue("spans is null and it shouldn't be", spans != null);
+ checkSpans(spans, 0, null);
+
+
+ SpanQuery[] clauses = new SpanQuery[3];
+ clauses[0] = new SpanTermQuery(new Term(PayloadHelper.FIELD, "rr"));
+ clauses[1] = new SpanTermQuery(new Term(PayloadHelper.FIELD, "yy"));
+ clauses[2] = new SpanTermQuery(new Term(PayloadHelper.FIELD, "xx"));
+ SpanNearQuery spanNearQuery = new SpanNearQuery(clauses, 12, false);
+
+ spans = spanNearQuery.getPayloadSpans(searcher.getIndexReader());
+ assertTrue("spans is null and it shouldn't be", spans != null);
+ checkSpans(spans, 2, new int[]{3,3});
+
+
+ clauses[0] = new SpanTermQuery(new Term(PayloadHelper.FIELD, "xx"));
+ clauses[1] = new SpanTermQuery(new Term(PayloadHelper.FIELD, "rr"));
+ clauses[2] = new SpanTermQuery(new Term(PayloadHelper.FIELD, "yy"));
+
+ spanNearQuery = new SpanNearQuery(clauses, 6, true);
+
+
+ spans = spanNearQuery.getPayloadSpans(searcher.getIndexReader());
+ assertTrue("spans is null and it shouldn't be", spans != null);
+ checkSpans(spans, 1, new int[]{3});
+
+ clauses = new SpanQuery[2];
+
+ clauses[0] = new SpanTermQuery(new Term(PayloadHelper.FIELD, "xx"));
+ clauses[1] = new SpanTermQuery(new Term(PayloadHelper.FIELD, "rr"));
+
+ spanNearQuery = new SpanNearQuery(clauses, 6, true);
+
+
+ SpanQuery[] clauses2 = new SpanQuery[2];
+
+ clauses2[0] = new SpanTermQuery(new Term(PayloadHelper.FIELD, "yy"));
+ clauses2[1] = spanNearQuery;
+
+ SpanNearQuery nestedSpanNearQuery = new SpanNearQuery(clauses2, 6, false);
+
+ spans = nestedSpanNearQuery.getPayloadSpans(searcher.getIndexReader());
+ assertTrue("spans is null and it shouldn't be", spans != null);
+ checkSpans(spans, 2, new int[]{3,3});
+ }
+
+ public void testFirstClauseWithoutPayload() throws Exception {
+ PayloadSpans spans;
+ IndexSearcher searcher = getSearcher();
+
+ SpanQuery[] clauses = new SpanQuery[3];
+ clauses[0] = new SpanTermQuery(new Term(PayloadHelper.FIELD, "nopayload"));
+ clauses[1] = new SpanTermQuery(new Term(PayloadHelper.FIELD, "qq"));
+ clauses[2] = new SpanTermQuery(new Term(PayloadHelper.FIELD, "ss"));
+
+ SpanNearQuery spanNearQuery = new SpanNearQuery(clauses, 6, true);
+
+ SpanQuery[] clauses2 = new SpanQuery[2];
+
+ clauses2[0] = new SpanTermQuery(new Term(PayloadHelper.FIELD, "pp"));
+ clauses2[1] = spanNearQuery;
+
+ SpanNearQuery snq = new SpanNearQuery(clauses2, 6, false);
+
+ SpanQuery[] clauses3 = new SpanQuery[2];
+
+ clauses3[0] = new SpanTermQuery(new Term(PayloadHelper.FIELD, "np"));
+ clauses3[1] = snq;
+
+ SpanNearQuery nestedSpanNearQuery = new SpanNearQuery(clauses3, 6, false);
+
+ spans = nestedSpanNearQuery.getPayloadSpans(searcher.getIndexReader());
+ assertTrue("spans is null and it shouldn't be", spans != null);
+ checkSpans(spans, 1, new int[]{3});
+ }
+
+ public void testHeavilyNestedSpanQuery() throws Exception {
+ PayloadSpans spans;
+ IndexSearcher searcher = getSearcher();
+
+ SpanQuery[] clauses = new SpanQuery[3];
+ clauses[0] = new SpanTermQuery(new Term(PayloadHelper.FIELD, "one"));
+ clauses[1] = new SpanTermQuery(new Term(PayloadHelper.FIELD, "two"));
+ clauses[2] = new SpanTermQuery(new Term(PayloadHelper.FIELD, "three"));
+
+ SpanNearQuery spanNearQuery = new SpanNearQuery(clauses, 5, true);
+
+ clauses = new SpanQuery[3];
+ clauses[0] = spanNearQuery;
+ clauses[1] = new SpanTermQuery(new Term(PayloadHelper.FIELD, "five"));
+ clauses[2] = new SpanTermQuery(new Term(PayloadHelper.FIELD, "six"));
+
+ SpanNearQuery spanNearQuery2 = new SpanNearQuery(clauses, 6, true);
+
+ SpanQuery[] clauses2 = new SpanQuery[2];
+ clauses2[0] = new SpanTermQuery(new Term(PayloadHelper.FIELD, "eleven"));
+ clauses2[1] = new SpanTermQuery(new Term(PayloadHelper.FIELD, "ten"));
+ SpanNearQuery spanNearQuery3 = new SpanNearQuery(clauses2, 2, false);
+
+ SpanQuery[] clauses3 = new SpanQuery[3];
+ clauses3[0] = new SpanTermQuery(new Term(PayloadHelper.FIELD, "nine"));
+ clauses3[1] = spanNearQuery2;
+ clauses3[2] = spanNearQuery3;
+
+ SpanNearQuery nestedSpanNearQuery = new SpanNearQuery(clauses3, 6, false);
+
+ spans = nestedSpanNearQuery.getPayloadSpans(searcher.getIndexReader());
+ assertTrue("spans is null and it shouldn't be", spans != null);
+ checkSpans(spans, 2, new int[]{8, 8});
+ }
+
+ public void testPayloadSpanUtil() throws Exception {
+ RAMDirectory directory = new RAMDirectory();
+ PayloadAnalyzer analyzer = new PayloadAnalyzer();
+ String[] docs = new String[]{};
+ IndexWriter writer = new IndexWriter(directory, analyzer, true);
+ writer.setSimilarity(similarity);
+ Document doc = new Document();
+ doc.add(new Field(PayloadHelper.FIELD,"xx rr yy mm pp", Field.Store.YES, Field.Index.TOKENIZED));
+ writer.addDocument(doc);
+
+ writer.close();
+
+ IndexSearcher searcher = new IndexSearcher(directory);
+
+ IndexReader reader = searcher.getIndexReader();
+ PayloadSpanUtil psu = new PayloadSpanUtil(reader);
+
+ Collection payloads = psu.getPayloadsForQuery(new TermQuery(new Term(PayloadHelper.FIELD, "rr")));
+ if(DEBUG)
+ System.out.println("Num payloads:" + payloads.size());
+ Iterator it = payloads.iterator();
+ while(it.hasNext()) {
+ byte[] bytes = (byte[]) it.next();
+ if(DEBUG)
+ System.out.println(new String(bytes));
+ }
+
+ }
+
+ private void checkSpans(PayloadSpans spans, int expectedNumSpans, int expectedNumPayloads,
+ int expectedPayloadLength, int expectedFirstByte) throws IOException {
+ assertTrue("spans is null and it shouldn't be", spans != null);
+ //each position match should have a span associated with it, since there is just one underlying term query, there should
+ //only be one entry in the span
+ int seen = 0;
+ while (spans.next() == true)
+ {
+ //if we expect payloads, then isPayloadAvailable should be true
+ if (expectedNumPayloads > 0) {
+ assertTrue("isPayloadAvailable is not returning the correct value: " + spans.isPayloadAvailable()
+ + " and it should be: " + (expectedNumPayloads > 0),
+ spans.isPayloadAvailable() == true);
+ } else {
+ assertTrue("isPayloadAvailable should be false", spans.isPayloadAvailable() == false);
+ }
+ //See payload helper, for the PayloadHelper.FIELD field, there is a single byte payload at every token
+ if (spans.isPayloadAvailable()) {
+ Collection payload = spans.getPayload();
+ assertTrue("payload Size: " + payload.size() + " is not: " + expectedNumPayloads, payload.size() == expectedNumPayloads);
+ for (Iterator iterator = payload.iterator(); iterator.hasNext();) {
+ byte[] thePayload = (byte[]) iterator.next();
+ assertTrue("payload[0] Size: " + thePayload.length + " is not: " + expectedPayloadLength,
+ thePayload.length == expectedPayloadLength);
+ assertTrue(thePayload[0] + " does not equal: " + expectedFirstByte, thePayload[0] == expectedFirstByte);
+
+ }
+
+ }
+ seen++;
+ }
+ assertTrue(seen + " does not equal: " + expectedNumSpans, seen == expectedNumSpans);
+ }
+
+ private IndexSearcher getSearcher() throws Exception {
+ RAMDirectory directory = new RAMDirectory();
+ PayloadAnalyzer analyzer = new PayloadAnalyzer();
+ String[] docs = new String[]{"xx rr yy mm pp","xx yy mm rr pp", "nopayload qq ss pp np", "one two three four five six seven eight nine ten eleven", "nine one two three four five six seven eight eleven ten"};
+ IndexWriter writer = new IndexWriter(directory, analyzer, true);
+
+ writer.setSimilarity(similarity);
+
+ Document doc = null;
+ for(int i = 0; i < docs.length; i++) {
+ doc = new Document();
+ String docText = docs[i];
+ doc.add(new Field(PayloadHelper.FIELD,docText, Field.Store.YES, Field.Index.TOKENIZED));
+ writer.addDocument(doc);
+ }
+
+ writer.close();
+
+ IndexSearcher searcher = new IndexSearcher(directory);
+ return searcher;
+ }
+
+ private void checkSpans(PayloadSpans spans, int numSpans, int[] numPayloads) throws IOException {
+ int cnt = 0;
+
+ while (spans.next() == true) {
+ if(DEBUG)
+ System.out.println("\nSpans Dump --");
+ if (spans.isPayloadAvailable()) {
+ Collection payload = spans.getPayload();
+ if(DEBUG)
+ System.out.println("payloads for span:" + payload.size());
+ Iterator it = payload.iterator();
+ while(it.hasNext()) {
+ byte[] bytes = (byte[]) it.next();
+ if(DEBUG)
+ System.out.println("doc:" + spans.doc() + " s:" + spans.start() + " e:" + spans.end() + " "
+ + new String(bytes));
+ }
+
+ assertEquals(numPayloads[cnt],payload.size());
+ } else {
+ assertFalse("Expected spans:" + numPayloads[cnt] + " found: 0",numPayloads.length > 0 && numPayloads[cnt] > 0 );
+ }
+ cnt++;
+ }
+
+ assertEquals(numSpans, cnt);
+ }
+
+ class PayloadAnalyzer extends Analyzer {
+
+ public TokenStream tokenStream(String fieldName, Reader reader) {
+ TokenStream result = new LowerCaseTokenizer(reader);
+ result = new PayloadFilter(result, fieldName);
+ return result;
+ }
+ }
+
+ class PayloadFilter extends TokenFilter {
+ String fieldName;
+ int numSeen = 0;
+ Set entities = new HashSet();
+ Set nopayload = new HashSet();
+ int pos;
+
+ public PayloadFilter(TokenStream input, String fieldName) {
+ super(input);
+ this.fieldName = fieldName;
+ pos = 0;
+ entities.add("xx");
+ entities.add("one");
+ nopayload.add("nopayload");
+ nopayload.add("np");
+
+ }
+
+ public Token next() throws IOException {
+ Token result = input.next();
+ if (result != null) {
+ String token = new String(result.termBuffer(), 0, result.termLength());
+
+ if (!nopayload.contains(token)) {
+ if (entities.contains(token)) {
+ result.setPayload(new Payload((token + ":Entity:"+ pos ).getBytes()));
+ } else {
+ result.setPayload(new Payload((token + ":Noise:" + pos ).getBytes()));
+ }
+ }
+ pos += result.getPositionIncrement();
+ }
+ return result;
+ }
+ }
+}
\ No newline at end of file
diff --git a/src/test/org/apache/lucene/search/spans/TestSpans.java b/src/test/org/apache/lucene/search/spans/TestSpans.java
index fd4f8d27df4..254de90f912 100644
--- a/src/test/org/apache/lucene/search/spans/TestSpans.java
+++ b/src/test/org/apache/lucene/search/spans/TestSpans.java
@@ -46,7 +46,6 @@ public class TestSpans extends LuceneTestCase {
}
writer.close();
searcher = new IndexSearcher(directory);
-//System.out.println("set up " + getName());
}
private String[] docFields = {
@@ -192,6 +191,105 @@ public class TestSpans extends LuceneTestCase {
}
+ public void testSpanNearUnOrdered() throws Exception {
+
+ //See http://www.gossamer-threads.com/lists/lucene/java-dev/52270 for discussion about this test
+ SpanNearQuery snq;
+ snq = new SpanNearQuery(
+ new SpanQuery[] {
+ makeSpanTermQuery("u1"),
+ makeSpanTermQuery("u2") },
+ 0,
+ false);
+ Spans spans = snq.getSpans(searcher.getIndexReader());
+ assertTrue("Does not have next and it should", spans.next());
+ assertEquals("doc", 4, spans.doc());
+ assertEquals("start", 1, spans.start());
+ assertEquals("end", 3, spans.end());
+
+ assertTrue("Does not have next and it should", spans.next());
+ assertEquals("doc", 5, spans.doc());
+ assertEquals("start", 2, spans.start());
+ assertEquals("end", 4, spans.end());
+
+ assertTrue("Does not have next and it should", spans.next());
+ assertEquals("doc", 8, spans.doc());
+ assertEquals("start", 2, spans.start());
+ assertEquals("end", 4, spans.end());
+
+ assertTrue("Does not have next and it should", spans.next());
+ assertEquals("doc", 9, spans.doc());
+ assertEquals("start", 0, spans.start());
+ assertEquals("end", 2, spans.end());
+
+ assertTrue("Does not have next and it should", spans.next());
+ assertEquals("doc", 10, spans.doc());
+ assertEquals("start", 0, spans.start());
+ assertEquals("end", 2, spans.end());
+ assertTrue("Has next and it shouldn't: " + spans.doc(), spans.next() == false);
+
+ SpanNearQuery u1u2 = new SpanNearQuery(new SpanQuery[]{makeSpanTermQuery("u1"),
+ makeSpanTermQuery("u2")}, 0, false);
+ snq = new SpanNearQuery(
+ new SpanQuery[] {
+ u1u2,
+ makeSpanTermQuery("u2")
+ },
+ 1,
+ false);
+ spans = snq.getSpans(searcher.getIndexReader());
+ assertTrue("Does not have next and it should", spans.next());
+ assertEquals("doc", 4, spans.doc());
+ assertEquals("start", 0, spans.start());
+ assertEquals("end", 3, spans.end());
+
+ assertTrue("Does not have next and it should", spans.next());
+ //unordered spans can be subsets
+ assertEquals("doc", 4, spans.doc());
+ assertEquals("start", 1, spans.start());
+ assertEquals("end", 3, spans.end());
+
+ assertTrue("Does not have next and it should", spans.next());
+ assertEquals("doc", 5, spans.doc());
+ assertEquals("start", 0, spans.start());
+ assertEquals("end", 4, spans.end());
+
+ assertTrue("Does not have next and it should", spans.next());
+ assertEquals("doc", 5, spans.doc());
+ assertEquals("start", 2, spans.start());
+ assertEquals("end", 4, spans.end());
+
+ assertTrue("Does not have next and it should", spans.next());
+ assertEquals("doc", 8, spans.doc());
+ assertEquals("start", 0, spans.start());
+ assertEquals("end", 4, spans.end());
+
+
+ assertTrue("Does not have next and it should", spans.next());
+ assertEquals("doc", 8, spans.doc());
+ assertEquals("start", 2, spans.start());
+ assertEquals("end", 4, spans.end());
+
+ assertTrue("Does not have next and it should", spans.next());
+ assertEquals("doc", 9, spans.doc());
+ assertEquals("start", 0, spans.start());
+ assertEquals("end", 2, spans.end());
+
+ assertTrue("Does not have next and it should", spans.next());
+ assertEquals("doc", 9, spans.doc());
+ assertEquals("start", 0, spans.start());
+ assertEquals("end", 4, spans.end());
+
+ assertTrue("Does not have next and it should", spans.next());
+ assertEquals("doc", 10, spans.doc());
+ assertEquals("start", 0, spans.start());
+ assertEquals("end", 2, spans.end());
+
+ assertTrue("Has next and it shouldn't", spans.next() == false);
+ }
+
+
+
private Spans orSpans(String[] terms) throws Exception {
SpanQuery[] sqa = new SpanQuery[terms.length];
for (int i = 0; i < terms.length; i++) {