LUCENE-6706: Remove deprecated PayloadTermQuery and PayloadNearQuery

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1693927 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Alan Woodward 2015-08-03 16:14:59 +00:00
parent 83520603c1
commit 0398139b1a
7 changed files with 21 additions and 894 deletions

View File

@ -19,10 +19,10 @@ package org.apache.lucene.search.payloads;
import org.apache.lucene.search.Explanation;
/**
* An abstract class that defines a way for Payload*Query instances to transform
* An abstract class that defines a way for PayloadScoreQuery instances to transform
* the cumulative effects of payload scores for a document.
*
* @see org.apache.lucene.search.payloads.PayloadTermQuery for more information
* @see org.apache.lucene.search.payloads.PayloadScoreQuery for more information
*
* @lucene.experimental This class and its derivations are experimental and subject to
* change

View File

@ -1,239 +0,0 @@
package org.apache.lucene.search.payloads;
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import java.io.IOException;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Objects;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.index.Term;
import org.apache.lucene.index.TermContext;
import org.apache.lucene.search.Explanation;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Scorer;
import org.apache.lucene.search.similarities.DefaultSimilarity;
import org.apache.lucene.search.similarities.Similarity;
import org.apache.lucene.search.similarities.Similarity.SimScorer;
import org.apache.lucene.search.spans.SpanNearQuery;
import org.apache.lucene.search.spans.SpanQuery;
import org.apache.lucene.search.spans.SpanScorer;
import org.apache.lucene.search.spans.SpanWeight;
import org.apache.lucene.search.spans.Spans;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.ToStringUtils;
/**
* This class is very similar to
* {@link org.apache.lucene.search.spans.SpanNearQuery} except that it factors
* in the value of the payloads located at each of the positions where the
* {@link org.apache.lucene.search.spans.TermSpans} occurs.
* <p>
* NOTE: In order to take advantage of this with the default scoring implementation
* ({@link DefaultSimilarity}), you must override {@link DefaultSimilarity#scorePayload(int, int, int, BytesRef)},
* which returns 1 by default.
* <p>
* Payload scores are aggregated using a pluggable {@link PayloadFunction}.
*
* @see org.apache.lucene.search.similarities.Similarity.SimScorer#computePayloadFactor(int, int, int, BytesRef)
*
* @deprecated use {@link PayloadScoreQuery} to wrap {@link SpanNearQuery}
*/
public class PayloadNearQuery extends SpanNearQuery {
protected String fieldName;
protected PayloadFunction function;
public PayloadNearQuery(SpanQuery[] clauses, int slop, boolean inOrder) {
this(clauses, slop, inOrder, new AveragePayloadFunction());
}
public PayloadNearQuery(SpanQuery[] clauses, int slop, boolean inOrder,
PayloadFunction function) {
super(clauses, slop, inOrder);
this.fieldName = Objects.requireNonNull(clauses[0].getField(), "all clauses must have same non null field");
this.function = Objects.requireNonNull(function);
}
@Override
public SpanWeight createWeight(IndexSearcher searcher, boolean needsScores) throws IOException {
List<SpanWeight> subWeights = new ArrayList<>();
for (SpanQuery q : clauses) {
subWeights.add(q.createWeight(searcher, false));
}
return new PayloadNearSpanWeight(subWeights, searcher, needsScores ? getTermContexts(subWeights) : null);
}
@Override
public PayloadNearQuery clone() {
int sz = clauses.size();
SpanQuery[] newClauses = new SpanQuery[sz];
for (int i = 0; i < sz; i++) {
newClauses[i] = (SpanQuery) clauses.get(i).clone();
}
PayloadNearQuery boostingNearQuery = new PayloadNearQuery(newClauses, slop,
inOrder, function);
boostingNearQuery.setBoost(getBoost());
return boostingNearQuery;
}
@Override
public String toString(String field) {
StringBuilder buffer = new StringBuilder();
buffer.append("payloadNear([");
Iterator<SpanQuery> i = clauses.iterator();
while (i.hasNext()) {
SpanQuery clause = i.next();
buffer.append(clause.toString(field));
if (i.hasNext()) {
buffer.append(", ");
}
}
buffer.append("], ");
buffer.append(slop);
buffer.append(", ");
buffer.append(inOrder);
buffer.append(")");
buffer.append(ToStringUtils.boost(getBoost()));
return buffer.toString();
}
@Override
public int hashCode() {
final int prime = 31;
int result = super.hashCode();
result = prime * result + fieldName.hashCode();
result = prime * result + function.hashCode();
return result;
}
@Override
public boolean equals(Object obj) {
if (! super.equals(obj)) {
return false;
}
PayloadNearQuery other = (PayloadNearQuery) obj;
return fieldName.equals(other.fieldName)
&& function.equals(other.function);
}
public class PayloadNearSpanWeight extends SpanNearWeight {
public PayloadNearSpanWeight(List<SpanWeight> subWeights, IndexSearcher searcher, Map<Term, TermContext> terms)
throws IOException {
super(subWeights, searcher, terms);
}
@Override
public Scorer scorer(LeafReaderContext context) throws IOException {
Spans spans = super.getSpans(context, Postings.PAYLOADS);
Similarity.SimScorer simScorer = simWeight == null ? null : similarity.simScorer(simWeight, context);
return (spans == null) ? null : new PayloadNearSpanScorer(spans, this, simScorer);
}
@Override
public Explanation explain(LeafReaderContext context, int doc) throws IOException {
PayloadNearSpanScorer scorer = (PayloadNearSpanScorer) scorer(context);
if (scorer != null) {
int newDoc = scorer.advance(doc);
if (newDoc == doc) {
float freq = scorer.freq();
Explanation freqExplanation = Explanation.match(freq, "phraseFreq=" + freq);
SimScorer docScorer = similarity.simScorer(simWeight, context);
Explanation scoreExplanation = docScorer.explain(doc, freqExplanation);
Explanation expl = Explanation.match(
scoreExplanation.getValue(),
"weight("+getQuery()+" in "+doc+") [" + similarity.getClass().getSimpleName() + "], result of:",
scoreExplanation);
String field = ((SpanQuery)getQuery()).getField();
// now the payloads part
Explanation payloadExpl = function.explain(doc, field, scorer.payloadsSeen, scorer.payloadScore);
// combined
return Explanation.match(
expl.getValue() * payloadExpl.getValue(),
"PayloadNearQuery, product of:",
expl, payloadExpl);
}
}
return Explanation.noMatch("no matching term");
}
}
public class PayloadNearSpanScorer extends SpanScorer {
Spans spans;
protected float payloadScore;
private int payloadsSeen;
private final PayloadSpanCollector collector = new PayloadSpanCollector();
protected PayloadNearSpanScorer(Spans spans, SpanWeight weight, Similarity.SimScorer docScorer) throws IOException {
super(spans, weight, docScorer);
this.spans = spans;
}
// TODO change the whole spans api to use bytesRef, or nuke spans
BytesRef scratch = new BytesRef();
/**
* By default, uses the {@link PayloadFunction} to score the payloads, but
* can be overridden to do other things.
*
* @param payLoads The payloads
* @param start The start position of the span being scored
* @param end The end position of the span being scored
*
* @see Spans
*/
protected void processPayloads(Collection<byte[]> payLoads, int start, int end) {
for (final byte[] thePayload : payLoads) {
scratch.bytes = thePayload;
scratch.offset = 0;
scratch.length = thePayload.length;
payloadScore = function.currentScore(docID(), fieldName, start, end,
payloadsSeen, payloadScore, docScorer.computePayloadFactor(docID(),
spans.startPosition(), spans.endPosition(), scratch));
++payloadsSeen;
}
}
@Override
protected void doStartCurrentDoc() throws IOException {
payloadScore = 0;
payloadsSeen = 0;
}
@Override
protected void doCurrentSpans() throws IOException {
collector.reset();
spans.collect(collector);
processPayloads(collector.getPayloads(), spans.startPosition(), spans.endPosition());
}
@Override
public float scoreCurrentDoc() throws IOException {
return super.scoreCurrentDoc()
* function.docScore(docID(), fieldName, payloadsSeen, payloadScore);
}
}
}

View File

@ -1,230 +0,0 @@
package org.apache.lucene.search.payloads;
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import java.io.IOException;
import java.util.Collections;
import java.util.Map;
import java.util.Objects;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.index.PostingsEnum;
import org.apache.lucene.index.Term;
import org.apache.lucene.index.TermContext;
import org.apache.lucene.search.Explanation;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.similarities.DefaultSimilarity;
import org.apache.lucene.search.similarities.Similarity;
import org.apache.lucene.search.similarities.Similarity.SimScorer;
import org.apache.lucene.search.spans.SpanCollector;
import org.apache.lucene.search.spans.SpanQuery;
import org.apache.lucene.search.spans.SpanScorer;
import org.apache.lucene.search.spans.SpanTermQuery;
import org.apache.lucene.search.spans.SpanWeight;
import org.apache.lucene.search.spans.Spans;
import org.apache.lucene.util.BytesRef;
/**
* This class is very similar to
* {@link org.apache.lucene.search.spans.SpanTermQuery} except that it factors
* in the value of the payload located at each of the positions where the
* {@link org.apache.lucene.index.Term} occurs.
* <p>
* NOTE: In order to take advantage of this with the default scoring implementation
* ({@link DefaultSimilarity}), you must override {@link DefaultSimilarity#scorePayload(int, int, int, BytesRef)},
* which returns 1 by default.
* <p>
* Payload scores are aggregated using a pluggable {@link PayloadFunction}.
* @see org.apache.lucene.search.similarities.Similarity.SimScorer#computePayloadFactor(int, int, int, BytesRef)
*
* @deprecated use {@link PayloadScoreQuery} to wrap {@link SpanTermQuery}
**/
public class PayloadTermQuery extends SpanTermQuery {
protected PayloadFunction function;
private boolean includeSpanScore;
public PayloadTermQuery(Term term, PayloadFunction function) {
this(term, function, true);
}
public PayloadTermQuery(Term term, PayloadFunction function,
boolean includeSpanScore) {
super(term);
this.function = Objects.requireNonNull(function);
this.includeSpanScore = includeSpanScore;
}
@Override
public SpanWeight createWeight(IndexSearcher searcher, boolean needsScores) throws IOException {
TermContext context = TermContext.build(searcher.getTopReaderContext(), term);
return new PayloadTermWeight(context, searcher, needsScores ? Collections.singletonMap(term, context) : null);
}
private static class PayloadTermCollector implements SpanCollector {
BytesRef payload;
@Override
public void collectLeaf(PostingsEnum postings, int position, Term term) throws IOException {
payload = postings.getPayload();
}
@Override
public void reset() {
payload = null;
}
}
private class PayloadTermWeight extends SpanTermWeight {
public PayloadTermWeight(TermContext context, IndexSearcher searcher, Map<Term, TermContext> terms)
throws IOException {
super(context, searcher, terms);
}
@Override
public PayloadTermSpanScorer scorer(LeafReaderContext context) throws IOException {
Spans spans = super.getSpans(context, Postings.PAYLOADS);
Similarity.SimScorer simScorer = simWeight == null ? null : similarity.simScorer(simWeight, context);
return (spans == null) ? null : new PayloadTermSpanScorer(spans, this, simScorer);
}
protected class PayloadTermSpanScorer extends SpanScorer {
protected BytesRef payload;
protected float payloadScore;
protected int payloadsSeen;
private final PayloadTermCollector payloadCollector = new PayloadTermCollector();
public PayloadTermSpanScorer(Spans spans, SpanWeight weight, Similarity.SimScorer docScorer) throws IOException {
super(spans, weight, docScorer);
}
@Override
protected void doStartCurrentDoc() throws IOException {
payloadScore = 0;
payloadsSeen = 0;
}
@Override
protected void doCurrentSpans() throws IOException {
payloadCollector.reset();
spans.collect(payloadCollector);
processPayload();
}
protected void processPayload() throws IOException {
float payloadFactor = payloadCollector.payload == null ? 1F :
docScorer.computePayloadFactor(docID(), spans.startPosition(), spans.endPosition(), payloadCollector.payload);
payloadScore = function.currentScore(docID(), term.field(), spans.startPosition(), spans.endPosition(),
payloadsSeen, payloadScore, payloadFactor);
payloadsSeen++;
}
/**
*
* @return {@link #getSpanScore()} * {@link #getPayloadScore()}
* @throws IOException if there is a low-level I/O error
*/
@Override
public float scoreCurrentDoc() throws IOException {
return includeSpanScore ? getSpanScore() * getPayloadScore()
: getPayloadScore();
}
/**
* Returns the SpanScorer score only.
* <p>
* Should not be overridden without good cause!
*
* @return the score for just the Span part w/o the payload
* @throws IOException if there is a low-level I/O error
*
* @see #score()
*/
protected float getSpanScore() throws IOException {
return super.scoreCurrentDoc();
}
/**
* The score for the payload
*
* @return The score, as calculated by
* {@link PayloadFunction#docScore(int, String, int, float)}
*/
protected float getPayloadScore() {
return function.docScore(docID(), term.field(), payloadsSeen, payloadScore);
}
}
@Override
public Explanation explain(LeafReaderContext context, int doc) throws IOException {
PayloadTermSpanScorer scorer = scorer(context);
if (scorer != null) {
int newDoc = scorer.advance(doc);
if (newDoc == doc) {
float freq = scorer.sloppyFreq();
Explanation freqExplanation = Explanation.match(freq, "phraseFreq=" + freq);
SimScorer docScorer = similarity.simScorer(simWeight, context);
Explanation scoreExplanation = docScorer.explain(doc, freqExplanation);
Explanation expl = Explanation.match(
scoreExplanation.getValue(),
"weight("+getQuery()+" in "+doc+") [" + similarity.getClass().getSimpleName() + "], result of:",
scoreExplanation);
// now the payloads part
// QUESTION: Is there a way to avoid this skipTo call? We need to know
// whether to load the payload or not
// GSI: I suppose we could toString the payload, but I don't think that
// would be a good idea
String field = ((SpanQuery)getQuery()).getField();
Explanation payloadExpl = function.explain(doc, field, scorer.payloadsSeen, scorer.payloadScore);
// combined
if (includeSpanScore) {
return Explanation.match(
expl.getValue() * payloadExpl.getValue(),
"btq, product of:", expl, payloadExpl);
} else {
return Explanation.match(payloadExpl.getValue(), "btq(includeSpanScore=false), result of:", payloadExpl);
}
}
}
return Explanation.noMatch("no matching term");
}
}
@Override
public int hashCode() {
final int prime = 31;
int result = super.hashCode();
result = prime * result + function.hashCode();
result = prime * result + (includeSpanScore ? 1231 : 1237);
return result;
}
@Override
public boolean equals(Object obj) {
if (!super.equals(obj)) {
return false;
}
PayloadTermQuery other = (PayloadTermQuery) obj;
return (includeSpanScore == other.includeSpanScore)
&& function.equals(other.function);
}
}

View File

@ -20,9 +20,8 @@
* <p>
* The following Query implementations are provided:
* <ol>
* <li>{@link org.apache.lucene.search.payloads.PayloadTermQuery PayloadTermQuery} -- Boost a term's score based on the value of the payload located at that term.</li>
* <li>{@link org.apache.lucene.search.payloads.PayloadNearQuery PayloadNearQuery} -- A {@link org.apache.lucene.search.spans.SpanNearQuery SpanNearQuery} that factors in the value of the payloads located
* at each of the positions where the spans occur.</li>
* <li>{@link org.apache.lucene.search.payloads.PayloadScoreQuery PayloadScoreQuery} -- For all terms matched by
* a SpanQuery, boost the score based on the value of the payload located at those terms.</li>
* </ol>
*/
package org.apache.lucene.search.payloads;

View File

@ -48,26 +48,22 @@ public class TestPayloadExplanations extends BaseExplanationTestCase {
});
}
/** macro for payloadtermquery */
private SpanQuery pt(String s, PayloadFunction fn, boolean includeSpanScore) {
return new PayloadTermQuery(new Term(FIELD,s), fn, includeSpanScore);
/** macro for payloadscorequery */
private SpanQuery pt(String s, PayloadFunction fn) {
return new PayloadScoreQuery(new SpanTermQuery(new Term(FIELD,s)), fn);
}
/* simple PayloadTermQueries */
public void testPT1() throws Exception {
for (PayloadFunction fn : functions) {
qtest(pt("w1", fn, false), new int[] {0,1,2,3});
qtest(pt("w1", fn, true), new int[] {0,1,2,3});
qtest(pt("w1", fn), new int[] {0,1,2,3});
}
}
public void testPT2() throws Exception {
for (PayloadFunction fn : functions) {
SpanQuery q = pt("w1", fn, false);
q.setBoost(1000);
qtest(q, new int[] {0,1,2,3});
q = pt("w1", fn, true);
SpanQuery q = pt("w1", fn);
q.setBoost(1000);
qtest(q, new int[] {0,1,2,3});
}
@ -75,17 +71,13 @@ public class TestPayloadExplanations extends BaseExplanationTestCase {
public void testPT4() throws Exception {
for (PayloadFunction fn : functions) {
qtest(pt("xx", fn, false), new int[] {2,3});
qtest(pt("xx", fn, true), new int[] {2,3});
qtest(pt("xx", fn), new int[] {2,3});
}
}
public void testPT5() throws Exception {
for (PayloadFunction fn : functions) {
SpanQuery q = pt("xx", fn, false);
q.setBoost(1000);
qtest(q, new int[] {2,3});
q = pt("xx", fn, true);
SpanQuery q = pt("xx", fn);
q.setBoost(1000);
qtest(q, new int[] {2,3});
}

View File

@ -1,351 +0,0 @@
package org.apache.lucene.search.payloads;
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.MockTokenizer;
import org.apache.lucene.analysis.TokenFilter;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.Tokenizer;
import org.apache.lucene.analysis.tokenattributes.PayloadAttribute;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.index.FieldInvertState;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.RandomIndexWriter;
import org.apache.lucene.index.Term;
import org.apache.lucene.search.CollectionStatistics;
import org.apache.lucene.search.Explanation;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.QueryUtils;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.TermStatistics;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.search.similarities.DefaultSimilarity;
import org.apache.lucene.search.spans.SpanNearQuery;
import org.apache.lucene.search.spans.SpanQuery;
import org.apache.lucene.search.spans.SpanTermQuery;
import org.apache.lucene.store.Directory;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.English;
import org.apache.lucene.util.LuceneTestCase;
import org.junit.AfterClass;
import org.junit.BeforeClass;
import java.io.IOException;
public class TestPayloadNearQuery extends LuceneTestCase {
private static IndexSearcher searcher;
private static IndexReader reader;
private static Directory directory;
private static BoostingSimilarity similarity = new BoostingSimilarity();
private static byte[] payload2 = new byte[]{2};
private static byte[] payload4 = new byte[]{4};
private static class PayloadAnalyzer extends Analyzer {
@Override
public TokenStreamComponents createComponents(String fieldName) {
Tokenizer result = new MockTokenizer(MockTokenizer.SIMPLE, true);
return new TokenStreamComponents(result, new PayloadFilter(result, fieldName));
}
}
private static class PayloadFilter extends TokenFilter {
private final String fieldName;
private int numSeen = 0;
private final PayloadAttribute payAtt;
public PayloadFilter(TokenStream input, String fieldName) {
super(input);
this.fieldName = fieldName;
payAtt = addAttribute(PayloadAttribute.class);
}
@Override
public boolean incrementToken() throws IOException {
boolean result = false;
if (input.incrementToken()) {
if (numSeen % 2 == 0) {
payAtt.setPayload(new BytesRef(payload2));
} else {
payAtt.setPayload(new BytesRef(payload4));
}
numSeen++;
result = true;
}
return result;
}
@Override
public void reset() throws IOException {
super.reset();
this.numSeen = 0;
}
}
private PayloadNearQuery newPhraseQuery (String fieldName, String phrase, boolean inOrder, PayloadFunction function ) {
String[] words = phrase.split("[\\s]+");
SpanQuery clauses[] = new SpanQuery[words.length];
for (int i=0;i<clauses.length;i++) {
clauses[i] = new SpanTermQuery(new Term(fieldName, words[i]));
}
return new PayloadNearQuery(clauses, 0, inOrder, function);
}
@BeforeClass
public static void beforeClass() throws Exception {
directory = newDirectory();
RandomIndexWriter writer = new RandomIndexWriter(random(), directory,
newIndexWriterConfig(new PayloadAnalyzer())
.setSimilarity(similarity));
//writer.infoStream = System.out;
for (int i = 0; i < 1000; i++) {
Document doc = new Document();
doc.add(newTextField("field", English.intToEnglish(i), Field.Store.YES));
String txt = English.intToEnglish(i) +' '+English.intToEnglish(i+1);
doc.add(newTextField("field2", txt, Field.Store.YES));
writer.addDocument(doc);
}
reader = writer.getReader();
writer.close();
searcher = newSearcher(reader);
searcher.setSimilarity(similarity);
}
@AfterClass
public static void afterClass() throws Exception {
searcher = null;
reader.close();
reader = null;
directory.close();
directory = null;
}
public void test() throws IOException {
PayloadNearQuery query;
TopDocs hits;
query = newPhraseQuery("field", "twenty two", true, new AveragePayloadFunction());
QueryUtils.check(query);
// all 10 hits should have score = 3 because adjacent terms have payloads of 2,4
// and all the similarity factors are set to 1
hits = searcher.search(query, 100);
assertTrue("hits is null and it shouldn't be", hits != null);
assertTrue("should be 10 hits", hits.totalHits == 10);
for (int j = 0; j < hits.scoreDocs.length; j++) {
ScoreDoc doc = hits.scoreDocs[j];
assertTrue(doc.score + " does not equal: " + 3, doc.score == 3);
}
for (int i=1;i<10;i++) {
query = newPhraseQuery("field", English.intToEnglish(i)+" hundred", true, new AveragePayloadFunction());
if (VERBOSE) {
System.out.println("TEST: run query=" + query);
}
// all should have score = 3 because adjacent terms have payloads of 2,4
// and all the similarity factors are set to 1
hits = searcher.search(query, 100);
assertTrue("hits is null and it shouldn't be", hits != null);
assertEquals("should be 100 hits", 100, hits.totalHits);
for (int j = 0; j < hits.scoreDocs.length; j++) {
ScoreDoc doc = hits.scoreDocs[j];
// System.out.println("Doc: " + doc.toString());
// System.out.println("Explain: " + searcher.explain(query, doc.doc));
assertTrue(doc.score + " does not equal: " + 3, doc.score == 3);
}
}
}
public void testPayloadNear() throws IOException {
SpanNearQuery q1, q2;
PayloadNearQuery query;
//SpanNearQuery(clauses, 10000, false)
q1 = spanNearQuery("field2", "twenty two");
q2 = spanNearQuery("field2", "twenty three");
SpanQuery[] clauses = new SpanQuery[2];
clauses[0] = q1;
clauses[1] = q2;
query = new PayloadNearQuery(clauses, 10, false);
//System.out.println(query.toString());
assertEquals(12, searcher.search(query, 100).totalHits);
/*
System.out.println(hits.totalHits);
for (int j = 0; j < hits.scoreDocs.length; j++) {
ScoreDoc doc = hits.scoreDocs[j];
System.out.println("doc: "+doc.doc+", score: "+doc.score);
}
*/
}
public void testAverageFunction() throws IOException {
PayloadNearQuery query;
TopDocs hits;
query = newPhraseQuery("field", "twenty two", true, new AveragePayloadFunction());
QueryUtils.check(query);
// all 10 hits should have score = 3 because adjacent terms have payloads of 2,4
// and all the similarity factors are set to 1
hits = searcher.search(query, 100);
assertTrue("hits is null and it shouldn't be", hits != null);
assertTrue("should be 10 hits", hits.totalHits == 10);
for (int j = 0; j < hits.scoreDocs.length; j++) {
ScoreDoc doc = hits.scoreDocs[j];
assertTrue(doc.score + " does not equal: " + 3, doc.score == 3);
Explanation explain = searcher.explain(query, hits.scoreDocs[j].doc);
String exp = explain.toString();
assertTrue(exp, exp.indexOf("AveragePayloadFunction") > -1);
assertTrue(hits.scoreDocs[j].score + " explain value does not equal: " + 3, explain.getValue() == 3f);
}
}
public void testMaxFunction() throws IOException {
PayloadNearQuery query;
TopDocs hits;
query = newPhraseQuery("field", "twenty two", true, new MaxPayloadFunction());
QueryUtils.check(query);
// all 10 hits should have score = 4 (max payload value)
hits = searcher.search(query, 100);
assertTrue("hits is null and it shouldn't be", hits != null);
assertTrue("should be 10 hits", hits.totalHits == 10);
for (int j = 0; j < hits.scoreDocs.length; j++) {
ScoreDoc doc = hits.scoreDocs[j];
assertTrue(doc.score + " does not equal: " + 4, doc.score == 4);
Explanation explain = searcher.explain(query, hits.scoreDocs[j].doc);
String exp = explain.toString();
assertTrue(exp, exp.indexOf("MaxPayloadFunction") > -1);
assertTrue(hits.scoreDocs[j].score + " explain value does not equal: " + 4, explain.getValue() == 4f);
}
}
public void testMinFunction() throws IOException {
PayloadNearQuery query;
TopDocs hits;
query = newPhraseQuery("field", "twenty two", true, new MinPayloadFunction());
QueryUtils.check(query);
// all 10 hits should have score = 2 (min payload value)
hits = searcher.search(query, 100);
assertTrue("hits is null and it shouldn't be", hits != null);
assertTrue("should be 10 hits", hits.totalHits == 10);
for (int j = 0; j < hits.scoreDocs.length; j++) {
ScoreDoc doc = hits.scoreDocs[j];
assertTrue(doc.score + " does not equal: " + 2, doc.score == 2);
Explanation explain = searcher.explain(query, hits.scoreDocs[j].doc);
String exp = explain.toString();
assertTrue(exp, exp.indexOf("MinPayloadFunction") > -1);
assertTrue(hits.scoreDocs[j].score + " explain value does not equal: " + 2, explain.getValue() == 2f);
}
}
private SpanQuery[] getClauses() {
SpanNearQuery q1, q2;
q1 = spanNearQuery("field2", "twenty two");
q2 = spanNearQuery("field2", "twenty three");
SpanQuery[] clauses = new SpanQuery[2];
clauses[0] = q1;
clauses[1] = q2;
return clauses;
}
private SpanNearQuery spanNearQuery(String fieldName, String words) {
String[] wordList = words.split("[\\s]+");
SpanQuery clauses[] = new SpanQuery[wordList.length];
for (int i=0;i<clauses.length;i++) {
clauses[i] = new PayloadTermQuery(new Term(fieldName, wordList[i]), new AveragePayloadFunction());
}
return new SpanNearQuery(clauses, 10000, false);
}
public void testLongerSpan() throws IOException {
PayloadNearQuery query;
TopDocs hits;
query = newPhraseQuery("field", "nine hundred ninety nine", true, new AveragePayloadFunction());
hits = searcher.search(query, 100);
assertTrue("hits is null and it shouldn't be", hits != null);
ScoreDoc doc = hits.scoreDocs[0];
// System.out.println("Doc: " + doc.toString());
// System.out.println("Explain: " + searcher.explain(query, doc.doc));
assertTrue("there should only be one hit", hits.totalHits == 1);
// should have score = 3 because adjacent terms have payloads of 2,4
assertTrue(doc.score + " does not equal: " + 3, doc.score == 3);
}
public void testComplexNested() throws IOException {
PayloadNearQuery query;
TopDocs hits;
// combine ordered and unordered spans with some nesting to make sure all payloads are counted
SpanQuery q1 = newPhraseQuery("field", "nine hundred", true, new AveragePayloadFunction());
SpanQuery q2 = newPhraseQuery("field", "ninety nine", true, new AveragePayloadFunction());
SpanQuery q3 = newPhraseQuery("field", "nine ninety", false, new AveragePayloadFunction());
SpanQuery q4 = newPhraseQuery("field", "hundred nine", false, new AveragePayloadFunction());
SpanQuery[]clauses = new SpanQuery[] {new PayloadNearQuery(new SpanQuery[] {q1,q2}, 0, true), new PayloadNearQuery(new SpanQuery[] {q3,q4}, 0, false)};
query = new PayloadNearQuery(clauses, 0, false);
hits = searcher.search(query, 100);
assertTrue("hits is null and it shouldn't be", hits != null);
// should be only 1 hit - doc 999
assertTrue("should only be one hit", hits.scoreDocs.length == 1);
// the score should be 3 - the average of all the underlying payloads
ScoreDoc doc = hits.scoreDocs[0];
// System.out.println("Doc: " + doc.toString());
// System.out.println("Explain: " + searcher.explain(query, doc.doc));
assertTrue(doc.score + " does not equal: " + 3, doc.score == 3);
}
static class BoostingSimilarity extends DefaultSimilarity {
@Override
public float queryNorm(float sumOfSquaredWeights) {
return 1.0f;
}
@Override
public float coord(int overlap, int maxOverlap) {
return 1.0f;
}
@Override
public float scorePayload(int docId, int start, int end, BytesRef payload) {
//we know it is size 4 here, so ignore the offset/length
return payload.bytes[payload.offset];
}
//!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
//Make everything else 1 so we see the effect of the payload
//!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
@Override
public float lengthNorm(FieldInvertState state) {
return state.getBoost();
}
@Override
public float sloppyFreq(int distance) {
return 1.0f;
}
@Override
public float tf(float freq) {
return 1.0f;
}
// idf used for phrase queries
@Override
public Explanation idfExplain(CollectionStatistics collectionStats, TermStatistics[] termStats) {
return Explanation.match(1.0f, "Inexplicable");
}
}
}

View File

@ -16,6 +16,8 @@ package org.apache.lucene.search.payloads;
* limitations under the License.
*/
import java.io.IOException;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.MockTokenizer;
import org.apache.lucene.analysis.TokenFilter;
@ -24,7 +26,6 @@ import org.apache.lucene.analysis.Tokenizer;
import org.apache.lucene.analysis.tokenattributes.PayloadAttribute;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.FieldInvertState;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.RandomIndexWriter;
@ -39,6 +40,7 @@ import org.apache.lucene.search.TopDocs;
import org.apache.lucene.search.similarities.DefaultSimilarity;
import org.apache.lucene.search.similarities.Similarity;
import org.apache.lucene.search.spans.MultiSpansWrapper;
import org.apache.lucene.search.spans.SpanQuery;
import org.apache.lucene.search.spans.SpanTermQuery;
import org.apache.lucene.search.spans.Spans;
import org.apache.lucene.store.Directory;
@ -48,8 +50,6 @@ import org.apache.lucene.util.LuceneTestCase;
import org.junit.AfterClass;
import org.junit.BeforeClass;
import java.io.IOException;
/**
*
@ -149,7 +149,7 @@ public class TestPayloadTermQuery extends LuceneTestCase {
}
public void test() throws IOException {
PayloadTermQuery query = new PayloadTermQuery(new Term("field", "seventy"),
SpanQuery query = new PayloadScoreQuery(new SpanTermQuery(new Term("field", "seventy")),
new MaxPayloadFunction());
TopDocs hits = searcher.search(query, 100);
assertTrue("hits is null and it shouldn't be", hits != null);
@ -175,7 +175,7 @@ public class TestPayloadTermQuery extends LuceneTestCase {
}
public void testQuery() {
PayloadTermQuery boostingFuncTermQuery = new PayloadTermQuery(new Term(PayloadHelper.MULTI_FIELD, "seventy"),
SpanQuery boostingFuncTermQuery = new PayloadScoreQuery(new SpanTermQuery(new Term(PayloadHelper.MULTI_FIELD, "seventy")),
new MaxPayloadFunction());
QueryUtils.check(boostingFuncTermQuery);
@ -183,14 +183,14 @@ public class TestPayloadTermQuery extends LuceneTestCase {
assertTrue(boostingFuncTermQuery.equals(spanTermQuery) == spanTermQuery.equals(boostingFuncTermQuery));
PayloadTermQuery boostingFuncTermQuery2 = new PayloadTermQuery(new Term(PayloadHelper.MULTI_FIELD, "seventy"),
SpanQuery boostingFuncTermQuery2 = new PayloadScoreQuery(new SpanTermQuery(new Term(PayloadHelper.MULTI_FIELD, "seventy")),
new AveragePayloadFunction());
QueryUtils.checkUnequal(boostingFuncTermQuery, boostingFuncTermQuery2);
}
public void testMultipleMatchesPerDoc() throws Exception {
PayloadTermQuery query = new PayloadTermQuery(new Term(PayloadHelper.MULTI_FIELD, "seventy"),
SpanQuery query = new PayloadScoreQuery(new SpanTermQuery(new Term(PayloadHelper.MULTI_FIELD, "seventy")),
new MaxPayloadFunction());
TopDocs hits = searcher.search(query, 100);
assertTrue("hits is null and it shouldn't be", hits != null);
@ -228,52 +228,8 @@ public class TestPayloadTermQuery extends LuceneTestCase {
assertTrue(count + " does not equal: " + 200, count == 200);
}
//Set includeSpanScore to false, in which case just the payload score comes through.
public void testIgnoreSpanScorer() throws Exception {
PayloadTermQuery query = new PayloadTermQuery(new Term(PayloadHelper.MULTI_FIELD, "seventy"),
new MaxPayloadFunction(), false);
IndexReader reader = DirectoryReader.open(directory);
IndexSearcher theSearcher = newSearcher(reader);
theSearcher.setSimilarity(new FullSimilarity());
TopDocs hits = searcher.search(query, 100);
assertTrue("hits is null and it shouldn't be", hits != null);
assertTrue("hits Size: " + hits.totalHits + " is not: " + 100, hits.totalHits == 100);
//they should all have the exact same score, because they all contain seventy once, and we set
//all the other similarity factors to be 1
//System.out.println("Hash: " + seventyHash + " Twice Hash: " + 2*seventyHash);
assertTrue(hits.getMaxScore() + " does not equal: " + 4.0, hits.getMaxScore() == 4.0);
//there should be exactly 10 items that score a 4, all the rest should score a 2
//The 10 items are: 70 + i*100 where i in [0-9]
int numTens = 0;
for (int i = 0; i < hits.scoreDocs.length; i++) {
ScoreDoc doc = hits.scoreDocs[i];
if (doc.doc % 10 == 0) {
numTens++;
assertTrue(doc.score + " does not equal: " + 4.0, doc.score == 4.0);
} else {
assertTrue(doc.score + " does not equal: " + 2, doc.score == 2);
}
}
assertTrue(numTens + " does not equal: " + 10, numTens == 10);
CheckHits.checkExplanations(query, "field", searcher, true);
Spans spans = MultiSpansWrapper.wrap(searcher.getIndexReader(), query);
assertTrue("spans is null and it shouldn't be", spans != null);
//should be two matches per document
int count = 0;
//100 hits times 2 matches per hit, we should have 200 in count
while (spans.nextDoc() != Spans.NO_MORE_DOCS) {
while (spans.nextStartPosition() != Spans.NO_MORE_POSITIONS) {
count++;
}
}
reader.close();
}
public void testNoMatch() throws Exception {
PayloadTermQuery query = new PayloadTermQuery(new Term(PayloadHelper.FIELD, "junk"),
SpanQuery query = new PayloadScoreQuery(new SpanTermQuery(new Term(PayloadHelper.FIELD, "junk")),
new MaxPayloadFunction());
TopDocs hits = searcher.search(query, 100);
assertTrue("hits is null and it shouldn't be", hits != null);
@ -282,9 +238,9 @@ public class TestPayloadTermQuery extends LuceneTestCase {
}
public void testNoPayload() throws Exception {
PayloadTermQuery q1 = new PayloadTermQuery(new Term(PayloadHelper.NO_PAYLOAD_FIELD, "zero"),
SpanQuery q1 = new PayloadScoreQuery(new SpanTermQuery(new Term(PayloadHelper.NO_PAYLOAD_FIELD, "zero")),
new MaxPayloadFunction());
PayloadTermQuery q2 = new PayloadTermQuery(new Term(PayloadHelper.NO_PAYLOAD_FIELD, "foo"),
SpanQuery q2 = new PayloadScoreQuery(new SpanTermQuery(new Term(PayloadHelper.NO_PAYLOAD_FIELD, "foo")),
new MaxPayloadFunction());
BooleanClause c1 = new BooleanClause(q1, BooleanClause.Occur.MUST);
BooleanClause c2 = new BooleanClause(q2, BooleanClause.Occur.MUST_NOT);