LUCENE-6489: Move Payload queries to queries module and PayloadSpanUtil to sandbox

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1703392 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Alan Woodward 2015-09-16 12:43:31 +00:00
parent 1b01528b95
commit b570fb0352
25 changed files with 263 additions and 133 deletions

View File

@ -90,6 +90,9 @@ API Changes
* LUCENE-6716: SpanPayloadCheckQuery now takes a List<BytesRef> rather than
a Collection<byte[]>. (Alan Woodward)
* LUCENE-6489: The various span payload queries have been moved to the queries
submodule, and PayloadSpanUtil is now in sandbox. (Alan Woodward)
Optimizations
* LUCENE-6708: TopFieldCollector does not compute the score several times on the

View File

@ -24,9 +24,7 @@ import org.apache.lucene.util.BytesRef;
* The payload of a Token.
* <p>
* The payload is stored in the index at each position, and can
* be used to influence scoring when using Payload-based queries
* in the {@link org.apache.lucene.search.payloads} and
* {@link org.apache.lucene.search.spans} packages.
* be used to influence scoring when using Payload-based queries.
* <p>
* NOTE: because the payload will be stored at each position, it's usually
* best to use the minimum number of bytes necessary. Some codec implementations

View File

@ -33,7 +33,7 @@
* <h2>Search Basics</h2>
* <p>
* Lucene offers a wide variety of {@link org.apache.lucene.search.Query} implementations, most of which are in
* this package, its subpackages ({@link org.apache.lucene.search.spans spans}, {@link org.apache.lucene.search.payloads payloads}),
* this package, its subpackage ({@link org.apache.lucene.search.spans spans},
* or the <a href="{@docRoot}/../queries/overview-summary.html">queries module</a>. These implementations can be combined in a wide
* variety of ways to provide complex querying capabilities along with information about where matches took place in the document
* collection. The <a href="#query">Query Classes</a> section below highlights some of the more important Query classes. For details

View File

@ -19,8 +19,8 @@ package org.apache.lucene.search;
import java.io.IOException;
import java.io.StringReader;
import java.nio.charset.StandardCharsets;
import java.util.Collection;
import java.util.ArrayList;
import java.util.List;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.MockPayloadAnalyzer;
@ -38,9 +38,8 @@ import org.apache.lucene.index.PostingsEnum;
import org.apache.lucene.index.RandomIndexWriter;
import org.apache.lucene.index.SlowCompositeReaderWrapper;
import org.apache.lucene.index.Term;
import org.apache.lucene.search.payloads.PayloadSpanCollector;
import org.apache.lucene.search.payloads.PayloadSpanUtil;
import org.apache.lucene.search.spans.MultiSpansWrapper;
import org.apache.lucene.search.spans.SpanCollector;
import org.apache.lucene.search.spans.SpanNearQuery;
import org.apache.lucene.search.spans.SpanQuery;
import org.apache.lucene.search.spans.SpanTermQuery;
@ -201,6 +200,22 @@ public class TestPositionIncrement extends LuceneTestCase {
store.close();
}
static class PayloadSpanCollector implements SpanCollector {
List<BytesRef> payloads = new ArrayList<>();
@Override
public void collectLeaf(PostingsEnum postings, int position, Term term) throws IOException {
if (postings.getPayload() != null)
payloads.add(BytesRef.deepCopyOf(postings.getPayload()));
}
@Override
public void reset() {
payloads.clear();
}
}
public void testPayloadsPos0() throws Exception {
Directory dir = newDirectory();
RandomIndexWriter writer = new RandomIndexWriter(random(), dir, new MockPayloadAnalyzer());
@ -248,12 +263,11 @@ public class TestPositionIncrement extends LuceneTestCase {
}
collector.reset();
pspans.collect(collector);
Collection<byte[]> payloads = collector.getPayloads();
sawZero |= pspans.startPosition() == 0;
for (byte[] bytes : payloads) {
for (BytesRef payload : collector.payloads) {
count++;
if (VERBOSE) {
System.out.println(" payload: " + new String(bytes, StandardCharsets.UTF_8));
System.out.println(" payload: " + Term.toString(payload));
}
}
}
@ -276,17 +290,6 @@ public class TestPositionIncrement extends LuceneTestCase {
assertEquals(4, count);
assertTrue(sawZero);
sawZero = false;
PayloadSpanUtil psu = new PayloadSpanUtil(is.getTopReaderContext());
Collection<byte[]> pls = psu.getPayloadsForQuery(snq);
count = pls.size();
for (byte[] bytes : pls) {
String s = new String(bytes, StandardCharsets.UTF_8);
//System.out.println(s);
sawZero |= s.equals("pos: 0");
}
assertEquals(8, count);
assertTrue(sawZero);
writer.close();
is.getIndexReader().close();
dir.close();

View File

@ -83,7 +83,7 @@ import org.apache.lucene.search.join.QueryBitSetProducer;
import org.apache.lucene.search.join.ScoreMode;
import org.apache.lucene.search.join.ToChildBlockJoinQuery;
import org.apache.lucene.search.join.ToParentBlockJoinQuery;
import org.apache.lucene.search.payloads.SpanPayloadCheckQuery;
import org.apache.lucene.queries.payloads.SpanPayloadCheckQuery;
import org.apache.lucene.search.spans.SpanMultiTermQueryWrapper;
import org.apache.lucene.search.spans.SpanNearQuery;
import org.apache.lucene.search.spans.SpanNotQuery;

View File

@ -1,4 +1,4 @@
package org.apache.lucene.search.payloads;
package org.apache.lucene.queries.payloads;
/*
* Licensed to the Apache Software Foundation (ASF) under one or more

View File

@ -1,4 +1,4 @@
package org.apache.lucene.search.payloads;
package org.apache.lucene.queries.payloads;
/*
* Licensed to the Apache Software Foundation (ASF) under one or more

View File

@ -1,4 +1,4 @@
package org.apache.lucene.search.payloads;
package org.apache.lucene.queries.payloads;
/*
* Licensed to the Apache Software Foundation (ASF) under one or more

View File

@ -1,4 +1,4 @@
package org.apache.lucene.search.payloads;
package org.apache.lucene.queries.payloads;
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
@ -22,7 +22,7 @@ import org.apache.lucene.search.Explanation;
* An abstract class that defines a way for PayloadScoreQuery instances to transform
* the cumulative effects of payload scores for a document.
*
* @see org.apache.lucene.search.payloads.PayloadScoreQuery for more information
* @see org.apache.lucene.queries.payloads.PayloadScoreQuery for more information
*
* @lucene.experimental This class and its derivations are experimental and subject to
* change

View File

@ -1,4 +1,4 @@
package org.apache.lucene.search.payloads;
package org.apache.lucene.queries.payloads;
/*
* Licensed to the Apache Software Foundation (ASF) under one or more

View File

@ -1,4 +1,4 @@
package org.apache.lucene.search.payloads;
package org.apache.lucene.queries.payloads;
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with

View File

@ -20,8 +20,8 @@
* <p>
* The following Query implementations are provided:
* <ol>
* <li>{@link org.apache.lucene.search.payloads.PayloadScoreQuery PayloadScoreQuery} -- For all terms matched by
* <li>{@link org.apache.lucene.queries.payloads.PayloadScoreQuery PayloadScoreQuery} -- For all terms matched by
* a SpanQuery, boost the score based on the value of the payload located at those terms.</li>
* </ol>
*/
package org.apache.lucene.search.payloads;
package org.apache.lucene.queries.payloads;

View File

@ -1,4 +1,4 @@
package org.apache.lucene.search.payloads;
package org.apache.lucene.queries.payloads;
/*
* Licensed to the Apache Software Foundation (ASF) under one or more

View File

@ -1,4 +1,4 @@
package org.apache.lucene.search.payloads;
package org.apache.lucene.queries.payloads;
/*
* Licensed to the Apache Software Foundation (ASF) under one or more

View File

@ -1,4 +1,4 @@
package org.apache.lucene.search.payloads;
package org.apache.lucene.queries.payloads;
/*
* Licensed to the Apache Software Foundation (ASF) under one or more

View File

@ -1,4 +1,4 @@
package org.apache.lucene.search.payloads;
package org.apache.lucene.queries.payloads;
/*
* Licensed to the Apache Software Foundation (ASF) under one or more

View File

@ -1,4 +1,4 @@
package org.apache.lucene.search.payloads;
package org.apache.lucene.queries.payloads;
/*
* Copyright 2004 The Apache Software Foundation
@ -16,6 +16,13 @@ package org.apache.lucene.search.payloads;
* limitations under the License.
*/
import java.io.IOException;
import java.io.StringReader;
import java.util.ArrayList;
import java.util.HashSet;
import java.util.List;
import java.util.Set;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.MockTokenizer;
import org.apache.lucene.analysis.TokenFilter;
@ -32,11 +39,11 @@ import org.apache.lucene.index.PostingsEnum;
import org.apache.lucene.index.RandomIndexWriter;
import org.apache.lucene.index.Term;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.TermQuery;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.search.similarities.ClassicSimilarity;
import org.apache.lucene.search.similarities.Similarity;
import org.apache.lucene.search.spans.MultiSpansWrapper;
import org.apache.lucene.search.spans.SpanCollector;
import org.apache.lucene.search.spans.SpanFirstQuery;
import org.apache.lucene.search.spans.SpanNearQuery;
import org.apache.lucene.search.spans.SpanNotQuery;
@ -48,13 +55,6 @@ import org.apache.lucene.store.Directory;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.LuceneTestCase;
import java.io.IOException;
import java.io.StringReader;
import java.nio.charset.StandardCharsets;
import java.util.Collection;
import java.util.HashSet;
import java.util.Set;
public class TestPayloadSpans extends LuceneTestCase {
private IndexSearcher searcher;
private Similarity similarity = new ClassicSimilarity();
@ -74,15 +74,15 @@ public class TestPayloadSpans extends LuceneTestCase {
SpanTermQuery stq;
Spans spans;
stq = new SpanTermQuery(new Term(PayloadHelper.FIELD, "seventy"));
PayloadSpanCollector collector = new PayloadSpanCollector();
spans = MultiSpansWrapper.wrap(indexReader, stq, SpanWeight.Postings.PAYLOADS);
assertTrue("spans is null and it shouldn't be", spans != null);
checkSpans(spans, collector, 100, 1, 1, 1);
checkSpans(spans, 100, 1, 1, 1);
stq = new SpanTermQuery(new Term(PayloadHelper.NO_PAYLOAD_FIELD, "seventy"));
spans = MultiSpansWrapper.wrap(indexReader, stq, SpanWeight.Postings.PAYLOADS);
assertTrue("spans is null and it shouldn't be", spans != null);
checkSpans(spans, collector, 100, 0, 0, 0);
checkSpans(spans, 100, 0, 0, 0);
}
public void testSpanFirst() throws IOException {
@ -91,20 +91,19 @@ public class TestPayloadSpans extends LuceneTestCase {
SpanFirstQuery sfq;
match = new SpanTermQuery(new Term(PayloadHelper.FIELD, "one"));
sfq = new SpanFirstQuery(match, 2);
PayloadSpanCollector collector = new PayloadSpanCollector();
Spans spans = MultiSpansWrapper.wrap(indexReader, sfq, SpanWeight.Postings.PAYLOADS);
checkSpans(spans, collector, 109, 1, 1, 1);
checkSpans(spans, 109, 1, 1, 1);
//Test more complicated subclause
SpanQuery[] clauses = new SpanQuery[2];
clauses[0] = new SpanTermQuery(new Term(PayloadHelper.FIELD, "one"));
clauses[1] = new SpanTermQuery(new Term(PayloadHelper.FIELD, "hundred"));
match = new SpanNearQuery(clauses, 0, true);
sfq = new SpanFirstQuery(match, 2);
checkSpans(MultiSpansWrapper.wrap(indexReader, sfq, SpanWeight.Postings.PAYLOADS), collector, 100, 2, 1, 1);
checkSpans(MultiSpansWrapper.wrap(indexReader, sfq, SpanWeight.Postings.PAYLOADS), 100, 2, 1, 1);
match = new SpanNearQuery(clauses, 0, false);
sfq = new SpanFirstQuery(match, 2);
checkSpans(MultiSpansWrapper.wrap(indexReader, sfq, SpanWeight.Postings.PAYLOADS), collector, 100, 2, 1, 1);
checkSpans(MultiSpansWrapper.wrap(indexReader, sfq, SpanWeight.Postings.PAYLOADS), 100, 2, 1, 1);
}
@ -127,8 +126,7 @@ public class TestPayloadSpans extends LuceneTestCase {
IndexReader reader = writer.getReader();
writer.close();
PayloadSpanCollector collector = new PayloadSpanCollector();
checkSpans(MultiSpansWrapper.wrap(reader, snq, SpanWeight.Postings.PAYLOADS), collector, 1, new int[]{2});
checkSpans(MultiSpansWrapper.wrap(reader, snq, SpanWeight.Postings.PAYLOADS), 1, new int[]{2});
reader.close();
directory.close();
}
@ -137,7 +135,6 @@ public class TestPayloadSpans extends LuceneTestCase {
SpanTermQuery stq;
Spans spans;
IndexSearcher searcher = getSearcher();
PayloadSpanCollector collector = new PayloadSpanCollector();
stq = new SpanTermQuery(new Term(PayloadHelper.FIELD, "mark"));
spans = MultiSpansWrapper.wrap(searcher.getIndexReader(), stq, SpanWeight.Postings.PAYLOADS);
@ -151,7 +148,7 @@ public class TestPayloadSpans extends LuceneTestCase {
spans = MultiSpansWrapper.wrap(searcher.getIndexReader(), spanNearQuery, SpanWeight.Postings.PAYLOADS);
assertTrue("spans is null and it shouldn't be", spans != null);
checkSpans(spans, collector, 2, new int[]{3,3});
checkSpans(spans, 2, new int[]{3,3});
clauses[0] = new SpanTermQuery(new Term(PayloadHelper.FIELD, "xx"));
@ -163,7 +160,7 @@ public class TestPayloadSpans extends LuceneTestCase {
spans = MultiSpansWrapper.wrap(searcher.getIndexReader(), spanNearQuery, SpanWeight.Postings.PAYLOADS);
assertTrue("spans is null and it shouldn't be", spans != null);
checkSpans(spans, collector, 1, new int[]{3});
checkSpans(spans, 1, new int[]{3});
clauses = new SpanQuery[2];
@ -184,7 +181,7 @@ public class TestPayloadSpans extends LuceneTestCase {
// yy within 6 of xx within 6 of rr
spans = MultiSpansWrapper.wrap(searcher.getIndexReader(), nestedSpanNearQuery, SpanWeight.Postings.PAYLOADS);
assertTrue("spans is null and it shouldn't be", spans != null);
checkSpans(spans, collector, 2, new int[]{3,3});
checkSpans(spans, 2, new int[]{3,3});
closeIndexReader.close();
directory.close();
}
@ -212,12 +209,11 @@ public class TestPayloadSpans extends LuceneTestCase {
clauses3[0] = new SpanTermQuery(new Term(PayloadHelper.FIELD, "np"));
clauses3[1] = snq;
PayloadSpanCollector collector = new PayloadSpanCollector();
SpanNearQuery nestedSpanNearQuery = new SpanNearQuery(clauses3, 6, false);
spans = MultiSpansWrapper.wrap(searcher.getIndexReader(), nestedSpanNearQuery, SpanWeight.Postings.PAYLOADS);
assertTrue("spans is null and it shouldn't be", spans != null);
checkSpans(spans, collector, 1, new int[]{3});
checkSpans(spans, 1, new int[]{3});
closeIndexReader.close();
directory.close();
}
@ -252,10 +248,9 @@ public class TestPayloadSpans extends LuceneTestCase {
SpanNearQuery nestedSpanNearQuery = new SpanNearQuery(clauses3, 6, false);
PayloadSpanCollector collector = new PayloadSpanCollector();
spans = MultiSpansWrapper.wrap(searcher.getIndexReader(), nestedSpanNearQuery, SpanWeight.Postings.PAYLOADS);
assertTrue("spans is null and it shouldn't be", spans != null);
checkSpans(spans, collector, 2, new int[]{8, 8});
checkSpans(spans, 2, new int[]{8, 8});
closeIndexReader.close();
directory.close();
}
@ -277,7 +272,7 @@ public class TestPayloadSpans extends LuceneTestCase {
SpanTermQuery stq2 = new SpanTermQuery(new Term("content", "k"));
SpanQuery[] sqs = { stq1, stq2 };
SpanNearQuery snq = new SpanNearQuery(sqs, 1, true);
PayloadSpanCollector collector = new PayloadSpanCollector();
VerifyingCollector collector = new VerifyingCollector();
Spans spans = MultiSpansWrapper.wrap(is.getIndexReader(), snq, SpanWeight.Postings.PAYLOADS);
TopDocs topDocs = is.search(snq, 1);
@ -287,9 +282,8 @@ public class TestPayloadSpans extends LuceneTestCase {
while (spans.nextStartPosition() != Spans.NO_MORE_POSITIONS) {
collector.reset();
spans.collect(collector);
Collection<byte[]> payloads = collector.getPayloads();
for (final byte [] payload : payloads) {
payloadSet.add(new String(payload, StandardCharsets.UTF_8));
for (final BytesRef payload : collector.payloads) {
payloadSet.add(Term.toString(payload));
}
}
}
@ -317,7 +311,7 @@ public class TestPayloadSpans extends LuceneTestCase {
SpanTermQuery stq2 = new SpanTermQuery(new Term("content", "k"));
SpanQuery[] sqs = { stq1, stq2 };
SpanNearQuery snq = new SpanNearQuery(sqs, 0, true);
PayloadSpanCollector collector = new PayloadSpanCollector();
VerifyingCollector collector = new VerifyingCollector();
Spans spans = MultiSpansWrapper.wrap(is.getIndexReader(), snq, SpanWeight.Postings.PAYLOADS);
TopDocs topDocs = is.search(snq, 1);
@ -327,10 +321,8 @@ public class TestPayloadSpans extends LuceneTestCase {
while (spans.nextStartPosition() != Spans.NO_MORE_POSITIONS) {
collector.reset();
spans.collect(collector);
Collection<byte[]> payloads = collector.getPayloads();
for (final byte [] payload : payloads) {
payloadSet.add(new String(payload, StandardCharsets.UTF_8));
for (final BytesRef payload: collector.payloads) {
payloadSet.add(Term.toString(payload));
}
}
}
@ -358,20 +350,18 @@ public class TestPayloadSpans extends LuceneTestCase {
SpanTermQuery stq2 = new SpanTermQuery(new Term("content", "k"));
SpanQuery[] sqs = { stq1, stq2 };
SpanNearQuery snq = new SpanNearQuery(sqs, 0, true);
PayloadSpanCollector collector = new PayloadSpanCollector();
Spans spans = MultiSpansWrapper.wrap(is.getIndexReader(), snq, SpanWeight.Postings.PAYLOADS);
TopDocs topDocs = is.search(snq, 1);
Set<String> payloadSet = new HashSet<>();
VerifyingCollector collector = new VerifyingCollector();
for (int i = 0; i < topDocs.scoreDocs.length; i++) {
while (spans.nextDoc() != Spans.NO_MORE_DOCS) {
while (spans.nextStartPosition() != Spans.NO_MORE_POSITIONS) {
collector.reset();
spans.collect(collector);
Collection<byte[]> payloads = collector.getPayloads();
for (final byte [] payload : payloads) {
payloadSet.add(new String(payload, StandardCharsets.UTF_8));
for (final BytesRef payload : collector.payloads) {
payloadSet.add(Term.toString(payload));
}
}
}
@ -387,57 +377,51 @@ public class TestPayloadSpans extends LuceneTestCase {
reader.close();
directory.close();
}
public void testPayloadSpanUtil() throws Exception {
Directory directory = newDirectory();
RandomIndexWriter writer = new RandomIndexWriter(random(), directory,
newIndexWriterConfig(new PayloadAnalyzer()).setSimilarity(similarity));
Document doc = new Document();
doc.add(newTextField(PayloadHelper.FIELD, "xx rr yy mm pp", Field.Store.YES));
writer.addDocument(doc);
IndexReader reader = writer.getReader();
writer.close();
IndexSearcher searcher = newSearcher(reader);
static class VerifyingCollector implements SpanCollector {
PayloadSpanUtil psu = new PayloadSpanUtil(searcher.getTopReaderContext());
Collection<byte[]> payloads = psu.getPayloadsForQuery(new TermQuery(new Term(PayloadHelper.FIELD, "rr")));
if(VERBOSE) {
System.out.println("Num payloads:" + payloads.size());
for (final byte [] bytes : payloads) {
System.out.println(new String(bytes, StandardCharsets.UTF_8));
List<BytesRef> payloads = new ArrayList<>();
@Override
public void collectLeaf(PostingsEnum postings, int position, Term term) throws IOException {
if (postings.getPayload() != null) {
payloads.add(BytesRef.deepCopyOf(postings.getPayload()));
}
}
@Override
public void reset() {
payloads.clear();
}
public void verify(int expectedLength, int expectedFirstByte) {
for (BytesRef payload : payloads) {
assertEquals("Incorrect payload length", expectedLength, payload.length);
assertEquals("Incorrect first byte", expectedFirstByte, payload.bytes[0]);
}
}
reader.close();
directory.close();
}
private void checkSpans(Spans spans, PayloadSpanCollector collector, int expectedNumSpans, int expectedNumPayloads,
private void checkSpans(Spans spans, int expectedNumSpans, int expectedNumPayloads,
int expectedPayloadLength, int expectedFirstByte) throws IOException {
assertTrue("spans is null and it shouldn't be", spans != null);
//each position match should have a span associated with it, since there is just one underlying term query, there should
//only be one entry in the span
VerifyingCollector collector = new VerifyingCollector();
int seen = 0;
while (spans.nextDoc() != Spans.NO_MORE_DOCS) {
while (spans.nextStartPosition() != Spans.NO_MORE_POSITIONS) {
collector.reset();
spans.collect(collector);
Collection<byte[]> payload = collector.getPayloads();
assertEquals("payload size", expectedNumPayloads, payload.size());
for (final byte [] thePayload : payload) {
assertEquals("payload length", expectedPayloadLength, thePayload.length);
assertEquals("payload first byte", expectedFirstByte, thePayload[0]);
}
collector.verify(expectedPayloadLength, expectedFirstByte);
assertEquals("expectedNumPayloads", expectedNumPayloads, collector.payloads.size());
seen++;
}
}
assertEquals("expectedNumSpans", expectedNumSpans, seen);
}
private IndexSearcher getSearcher() throws Exception {
directory = newDirectory();
String[] docs = new String[]{"xx rr yy mm pp","xx yy mm rr pp", "nopayload qq ss pp np", "one two three four five six seven eight nine ten eleven", "nine one two three four five six seven eight eleven ten"};
@ -459,25 +443,16 @@ public class TestPayloadSpans extends LuceneTestCase {
return searcher;
}
private void checkSpans(Spans spans, PayloadSpanCollector collector, int numSpans, int[] numPayloads) throws IOException {
private void checkSpans(Spans spans, int numSpans, int[] numPayloads) throws IOException {
int cnt = 0;
VerifyingCollector collector = new VerifyingCollector();
while (spans.nextDoc() != Spans.NO_MORE_DOCS) {
while (spans.nextStartPosition() != Spans.NO_MORE_POSITIONS) {
if(VERBOSE)
System.out.println("\nSpans Dump --");
collector.reset();
spans.collect(collector);
Collection<byte[]> payload = collector.getPayloads();
if(VERBOSE) {
System.out.println("payloads for span:" + payload.size());
for (final byte [] bytes : payload) {
System.out.println("doc:" + spans.docID() + " s:" + spans.startPosition() + " e:" + spans.endPosition() + " "
+ new String(bytes, StandardCharsets.UTF_8));
}
}
assertEquals("payload size", numPayloads[cnt], payload.size());
assertEquals("payload size", numPayloads[cnt], collector.payloads.size());
cnt++;
}

View File

@ -1,4 +1,4 @@
package org.apache.lucene.search.payloads;
package org.apache.lucene.queries.payloads;
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with

View File

@ -3,8 +3,8 @@ package org.apache.lucene.queryparser.xml.builders;
import org.apache.lucene.index.Term;
import org.apache.lucene.queryparser.xml.DOMUtils;
import org.apache.lucene.queryparser.xml.ParserException;
import org.apache.lucene.search.payloads.AveragePayloadFunction;
import org.apache.lucene.search.payloads.PayloadScoreQuery;
import org.apache.lucene.queries.payloads.AveragePayloadFunction;
import org.apache.lucene.queries.payloads.PayloadScoreQuery;
import org.apache.lucene.search.spans.SpanBoostQuery;
import org.apache.lucene.search.spans.SpanQuery;
import org.apache.lucene.search.spans.SpanTermQuery;

View File

@ -1,4 +1,4 @@
package org.apache.lucene.search.payloads;
package org.apache.lucene.payloads;
/*
* Licensed to the Apache Software Foundation (ASF) under one or more

View File

@ -1,4 +1,4 @@
package org.apache.lucene.search.payloads;
package org.apache.lucene.payloads;
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
@ -17,6 +17,12 @@ package org.apache.lucene.search.payloads;
* limitations under the License.
*/
import java.io.IOException;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Iterator;
import java.util.List;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexReaderContext;
import org.apache.lucene.index.LeafReaderContext;
@ -36,12 +42,6 @@ import org.apache.lucene.search.spans.SpanTermQuery;
import org.apache.lucene.search.spans.SpanWeight;
import org.apache.lucene.search.spans.Spans;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Iterator;
import java.util.List;
/**
* Experimental class to get set of payloads for most standard Lucene queries.
* Operates like Highlighter - IndexReader should only contain doc of interest,

View File

@ -0,0 +1,21 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/**
* Experimental classes for interacting with payloads
*/
package org.apache.lucene.payloads;

View File

@ -0,0 +1,130 @@
package org.apache.lucene.payloads;
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import java.io.IOException;
import java.nio.charset.StandardCharsets;
import java.util.Collection;
import java.util.HashSet;
import java.util.Set;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.MockTokenizer;
import org.apache.lucene.analysis.TokenFilter;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.Tokenizer;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.apache.lucene.analysis.tokenattributes.PayloadAttribute;
import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.RandomIndexWriter;
import org.apache.lucene.index.Term;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.TermQuery;
import org.apache.lucene.search.similarities.ClassicSimilarity;
import org.apache.lucene.store.Directory;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.LuceneTestCase;
public class TestPayloadSpanUtil extends LuceneTestCase {
public static final String FIELD = "f";
public void testPayloadSpanUtil() throws Exception {
Directory directory = newDirectory();
RandomIndexWriter writer = new RandomIndexWriter(random(), directory,
newIndexWriterConfig(new PayloadAnalyzer()).setSimilarity(new ClassicSimilarity()));
Document doc = new Document();
doc.add(newTextField(FIELD, "xx rr yy mm pp", Field.Store.YES));
writer.addDocument(doc);
IndexReader reader = writer.getReader();
writer.close();
IndexSearcher searcher = newSearcher(reader);
PayloadSpanUtil psu = new PayloadSpanUtil(searcher.getTopReaderContext());
Collection<byte[]> payloads = psu.getPayloadsForQuery(new TermQuery(new Term(FIELD, "rr")));
if(VERBOSE) {
System.out.println("Num payloads:" + payloads.size());
for (final byte [] bytes : payloads) {
System.out.println(new String(bytes, StandardCharsets.UTF_8));
}
}
reader.close();
directory.close();
}
final class PayloadAnalyzer extends Analyzer {
@Override
public TokenStreamComponents createComponents(String fieldName) {
Tokenizer result = new MockTokenizer(MockTokenizer.SIMPLE, true);
return new TokenStreamComponents(result, new PayloadFilter(result));
}
}
final class PayloadFilter extends TokenFilter {
Set<String> entities = new HashSet<>();
Set<String> nopayload = new HashSet<>();
int pos;
PayloadAttribute payloadAtt;
CharTermAttribute termAtt;
PositionIncrementAttribute posIncrAtt;
public PayloadFilter(TokenStream input) {
super(input);
pos = 0;
entities.add("xx");
entities.add("one");
nopayload.add("nopayload");
nopayload.add("np");
termAtt = addAttribute(CharTermAttribute.class);
posIncrAtt = addAttribute(PositionIncrementAttribute.class);
payloadAtt = addAttribute(PayloadAttribute.class);
}
@Override
public boolean incrementToken() throws IOException {
if (input.incrementToken()) {
String token = termAtt.toString();
if (!nopayload.contains(token)) {
if (entities.contains(token)) {
payloadAtt.setPayload(new BytesRef(token + ":Entity:"+ pos ));
} else {
payloadAtt.setPayload(new BytesRef(token + ":Noise:" + pos ));
}
}
pos += posIncrAtt.getPositionIncrement();
return true;
}
return false;
}
@Override
public void reset() throws IOException {
super.reset();
this.pos = 0;
}
}
}

View File

@ -17,14 +17,14 @@ package org.apache.lucene.search.spans;
* limitations under the License.
*/
import java.io.IOException;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.LeafReader;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.index.SlowCompositeReaderWrapper;
import org.apache.lucene.search.IndexSearcher;
import java.io.IOException;
/**
*
* A wrapper to perform span operations on a non-leaf reader context

View File

@ -28,7 +28,7 @@ import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.core.WhitespaceAnalyzer;
import org.apache.lucene.index.Term;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.payloads.SpanPayloadCheckQuery;
import org.apache.lucene.queries.payloads.SpanPayloadCheckQuery;
import org.apache.lucene.search.spans.SpanTermQuery;
import org.apache.lucene.util.BytesRef;
import org.apache.solr.SolrTestCaseJ4;