LUCENE-736: Sloppy phrase query with repeating terms matches wrong docs.

For example query "B C B"~2 matches the doc "A B C D E".
Search time cost of this fix is 4%, for sloppy phrase search. 
This fix also partially brings back the the tests checkSkipTo() in QueryUtils, 
(which was disabled by LUCENE-730), but this is now invoked 
only for non Boolean scorers.



git-svn-id: https://svn.apache.org/repos/asf/lucene/java/trunk@531733 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Doron Cohen 2007-04-24 05:32:47 +00:00
parent bb6d2cc9c9
commit e06188c0d6
8 changed files with 453 additions and 69 deletions

View File

@ -98,6 +98,10 @@ Bug fixes
the instance of IndexWriter (but, not the index itself) by
referencing already deleted segments. This bug was only present
in 2.2 (trunk), ie was never released. (Mike McCandless)
13. LUCENE-736: Sloppy phrase query with repeating terms matches wrong docs.
For example query "B C B"~2 matches the doc "A B C D E". (Doron Cohen)
New features

View File

@ -22,27 +22,30 @@ import org.apache.lucene.index.*;
final class ExactPhraseScorer extends PhraseScorer {
ExactPhraseScorer(Weight weight, TermPositions[] tps, int[] positions, Similarity similarity,
ExactPhraseScorer(Weight weight, TermPositions[] tps, int[] offsets, Similarity similarity,
byte[] norms) {
super(weight, tps, positions, similarity, norms);
super(weight, tps, offsets, similarity, norms);
}
protected final float phraseFreq() throws IOException {
// sort list with pq
pq.clear();
for (PhrasePositions pp = first; pp != null; pp = pp.next) {
pp.firstPosition();
pq.put(pp); // build pq from list
}
pqToList(); // rebuild list from pq
// for counting how many times the exact phrase is found in current document,
// just count how many times all PhrasePosition's have exactly the same position.
int freq = 0;
do { // find position w/ all terms
while (first.position < last.position) { // scan forward in first
do {
if (!first.nextPosition())
return (float)freq;
} while (first.position < last.position);
firstToLast();
do {
if (!first.nextPosition())
return (float)freq;
} while (first.position < last.position);
firstToLast();
}
freq++; // all equal: a match
} while (last.nextPosition());

View File

@ -20,6 +20,9 @@ package org.apache.lucene.search;
import java.io.IOException;
import org.apache.lucene.index.*;
/**
* Position of a term in a document that takes into account the term offset within the phrase.
*/
final class PhrasePositions {
int doc; // current doc
int position; // position in doc
@ -27,6 +30,7 @@ final class PhrasePositions {
int offset; // position in phrase
TermPositions tp; // stream of positions
PhrasePositions next; // used to make lists
boolean repeats; // there's other pp for same term (e.g. query="1st word 2nd word"~1)
PhrasePositions(TermPositions t, int o) {
tp = t;
@ -61,6 +65,12 @@ final class PhrasePositions {
nextPosition();
}
/**
* Go to next location of this term current document, and set
* <code>position</code> as <code>location - offset</code>, so that a
* matching exact phrase is easily identified when all PhrasePositions
* have exactly the same <code>position</code>.
*/
final boolean nextPosition() throws IOException {
if (count-- > 0) { // read subsequent pos's
position = tp.nextPosition() - offset;

View File

@ -28,7 +28,12 @@ final class PhraseQueue extends PriorityQueue {
PhrasePositions pp1 = (PhrasePositions)o1;
PhrasePositions pp2 = (PhrasePositions)o2;
if (pp1.doc == pp2.doc)
return pp1.position < pp2.position;
if (pp1.position == pp2.position)
// same doc and pp.position, so decide by actual term positions.
// rely on: pp.position == tp.position - offset.
return pp1.offset < pp2.offset;
else
return pp1.position < pp2.position;
else
return pp1.doc < pp2.doc;
}

View File

@ -21,6 +21,16 @@ import java.io.IOException;
import org.apache.lucene.index.*;
/** Expert: Scoring functionality for phrase queries.
* <br>A document is considered matching if it contains the phrase-query terms
* at "valid" positons. What "valid positions" are
* depends on the type of the phrase query: for an exact phrase query terms are required
* to appear in adjacent locations, while for a sloppy phrase query some distance between
* the terms is allowed. The abstract method {@link #phraseFreq()} of extending classes
* is invoked for each document containing all the phrase query terms, in order to
* compute the frequency of the phrase query in that document. A non zero frequency
* means a match.
*/
abstract class PhraseScorer extends Scorer {
private Weight weight;
protected byte[] norms;
@ -31,19 +41,23 @@ abstract class PhraseScorer extends Scorer {
protected PhraseQueue pq;
protected PhrasePositions first, last;
private float freq;
private float freq; //prhase frequency in current doc as computed by phraseFreq().
PhraseScorer(Weight weight, TermPositions[] tps, int[] positions, Similarity similarity,
PhraseScorer(Weight weight, TermPositions[] tps, int[] offsets, Similarity similarity,
byte[] norms) {
super(similarity);
this.norms = norms;
this.weight = weight;
this.value = weight.getValue();
// convert tps to a list
// convert tps to a list of phrase positions.
// note: phrase-position differs from term-position in that its position
// reflects the phrase offset: pp.pos = tp.pos - offset.
// this allows to easily identify a matching (exact) phrase
// when all PhrasePositions have exactly the same position.
for (int i = 0; i < tps.length; i++) {
PhrasePositions pp = new PhrasePositions(tps[i], positions[i]);
PhrasePositions pp = new PhrasePositions(tps[i], offsets[i]);
if (last != null) { // add next to end of list
last.next = pp;
} else
@ -94,6 +108,7 @@ abstract class PhraseScorer extends Scorer {
}
public boolean skipTo(int target) throws IOException {
firstTime = false;
for (PhrasePositions pp = first; more && pp != null; pp = pp.next) {
more = pp.skipTo(target);
}
@ -102,6 +117,13 @@ abstract class PhraseScorer extends Scorer {
return doNext();
}
/**
* For a document containing all the phrase query terms, compute the
* frequency of the phrase in that document.
* A non zero frequency means a match.
* <br>Note, that containing all phrase terms does not guarantee a match - they have to be found in matching locations.
* @return frequency of the phrase in current doc, 0 if not found.
*/
protected abstract float phraseFreq() throws IOException;
private void init() throws IOException {

View File

@ -20,38 +20,58 @@ package org.apache.lucene.search;
import org.apache.lucene.index.TermPositions;
import java.io.IOException;
import java.util.Arrays;
import java.util.Comparator;
import java.util.HashMap;
final class SloppyPhraseScorer extends PhraseScorer {
private int slop;
private PhrasePositions repeats[];
private boolean checkedRepeats;
SloppyPhraseScorer(Weight weight, TermPositions[] tps, int[] positions, Similarity similarity,
SloppyPhraseScorer(Weight weight, TermPositions[] tps, int[] offsets, Similarity similarity,
int slop, byte[] norms) {
super(weight, tps, positions, similarity, norms);
super(weight, tps, offsets, similarity, norms);
this.slop = slop;
}
/**
* Score a candidate doc for all slop-valid position-combinations (matches)
* encountered while traversing/hopping the PhrasePositions.
* <br> The score contribution of a match depends on the distance:
* <br> - highest score for distance=0 (exact match).
* <br> - score gets lower as distance gets higher.
* <br>Example: for query "a b"~2, a document "x a b a y" can be scored twice:
* once for "a b" (distance=0), and once for "b a" (distance=2).
* <br>Pssibly not all valid combinations are encountered, because for efficiency
* we always propagate the least PhrasePosition. This allows to base on
* PriorityQueue and move forward faster.
* As result, for example, document "a b c b a"
* would score differently for queries "a b c"~4 and "c b a"~4, although
* they really are equivalent.
* Similarly, for doc "a b c b a f g", query "c b"~2
* would get same score as "g f"~2, although "c b"~2 could be matched twice.
* We may want to fix this in the future (currently not, for performance reasons).
*/
protected final float phraseFreq() throws IOException {
pq.clear();
int end = 0;
for (PhrasePositions pp = first; pp != null; pp = pp.next) {
pp.firstPosition();
if (pp.position > end)
end = pp.position;
pq.put(pp); // build pq from list
}
int end = initPhrasePositions();
float freq = 0.0f;
boolean done = false;
do {
boolean done = (end<0);
while (!done) {
PhrasePositions pp = (PhrasePositions) pq.pop();
int start = pp.position;
int next = ((PhrasePositions) pq.top()).position;
for (int pos = start; pos <= next; pos = pp.position) {
start = pos; // advance pp to min window
boolean tpsDiffer = true;
for (int pos = start; pos <= next || !tpsDiffer; pos = pp.position) {
if (pos<=next && tpsDiffer)
start = pos; // advance pp to min window
if (!pp.nextPosition()) {
done = true; // ran out of a term -- done
done = true; // ran out of a term -- done
break;
}
tpsDiffer = !pp.repeats || termPositionsDiffer(pp);
}
int matchLength = end - start;
@ -61,8 +81,113 @@ final class SloppyPhraseScorer extends PhraseScorer {
if (pp.position > end)
end = pp.position;
pq.put(pp); // restore pq
} while (!done);
}
return freq;
}
/**
* Init PhrasePositions in place.
* There is a one time initializatin for this scorer:
* <br>- Put in repeats[] each pp that has another pp with same position in the doc.
* <br>- Also mark each such pp by pp.repeats = true.
* <br>Later can consult with repeats[] in termPositionsDiffer(pp), making that check efficient.
* In particular, this allows to score queries with no repetiotions with no overhead due to this computation.
* <br>- Example 1 - query with no repetitions: "ho my"~2
* <br>- Example 2 - query with repetitions: "ho my my"~2
* <br>- Example 3 - query with repetitions: "my ho my"~2
* <br>Init per doc w/repeats in query, includes propagating some repeating pp's to avoid false phrase detection.
* @return end (max position), or -1 if any term ran out (i.e. done)
* @throws IOException
*/
private int initPhrasePositions() throws IOException {
int end = 0;
// no repeats at all (most common case is also the simplest one)
if (checkedRepeats && repeats==null) {
// build queue from list
pq.clear();
for (PhrasePositions pp = first; pp != null; pp = pp.next) {
pp.firstPosition();
if (pp.position > end)
end = pp.position;
pq.put(pp); // build pq from list
}
return end;
}
// position the pp's
for (PhrasePositions pp = first; pp != null; pp = pp.next)
pp.firstPosition();
// one time initializatin for this scorer
if (!checkedRepeats) {
checkedRepeats = true;
// check for repeats
HashMap m = null;
for (PhrasePositions pp = first; pp != null; pp = pp.next) {
int tpPos = pp.position + pp.offset;
for (PhrasePositions pp2 = pp.next; pp2 != null; pp2 = pp2.next) {
int tpPos2 = pp2.position + pp2.offset;
if (tpPos2 == tpPos) {
if (m == null)
m = new HashMap();
pp.repeats = true;
pp2.repeats = true;
m.put(pp,null);
m.put(pp2,null);
}
}
}
if (m!=null)
repeats = (PhrasePositions[]) m.keySet().toArray(new PhrasePositions[0]);
}
// with repeats must advance some repeating pp's so they all start with differing tp's
if (repeats!=null) {
// must propagate higher offsets first (otherwise might miss matches).
Arrays.sort(repeats, new Comparator() {
public int compare(Object x, Object y) {
return ((PhrasePositions) y).offset - ((PhrasePositions) x).offset;
}});
// now advance them
for (int i = 0; i < repeats.length; i++) {
PhrasePositions pp = repeats[i];
while (!termPositionsDiffer(pp)) {
if (!pp.nextPosition())
return -1; // ran out of a term -- done
}
}
}
// build queue from list
pq.clear();
for (PhrasePositions pp = first; pp != null; pp = pp.next) {
if (pp.position > end)
end = pp.position;
pq.put(pp); // build pq from list
}
return end;
}
// disalow two pp's to have the same tp position, so that same word twice
// in query would go elswhere in the matched doc
private boolean termPositionsDiffer(PhrasePositions pp) {
// efficiency note: a more efficient implemention could keep a map between repeating
// pp's, so that if pp1a, pp1b, pp1c are repeats term1, and pp2a, pp2b are repeats
// of term2, pp2a would only be checked against pp2b but not against pp1a, pp1b, pp1c.
// However this would complicate code, for a rather rare case, so choice is to compromise here.
int tpPos = pp.position + pp.offset;
for (int i = 0; i < repeats.length; i++) {
PhrasePositions pp2 = repeats[i];
if (pp2 == pp)
continue;
int tpPos2 = pp2.position + pp2.offset;
if (tpPos2 == tpPos)
return false;
}
return true;
}
}

View File

@ -68,18 +68,15 @@ public class QueryUtils {
/** various query sanity checks on a searcher */
public static void check(Query q1, Searcher s) {
// Disabled because this started failing after LUCENE-730 patch was applied
// try {
try {
check(q1);
/* disabled for use of BooleanScorer in BooleanScorer2.
if (s!=null && s instanceof IndexSearcher) {
IndexSearcher is = (IndexSearcher)s;
// checkSkipTo(q1,is);
checkSkipTo(q1,is);
}
} catch (IOException e) {
throw new RuntimeException(e);
}
*/
}
/** alternate scorer skipTo(),skipTo(),next(),next(),skipTo(),skipTo(), etc
@ -87,42 +84,71 @@ public class QueryUtils {
*/
public static void checkSkipTo(final Query q, final IndexSearcher s) throws IOException {
//System.out.println("Checking "+q);
final Weight w = q.weight(s);
final Scorer scorer = w.scorer(s.getIndexReader());
// FUTURE: ensure scorer.doc()==-1
if (BooleanQuery.getUseScorer14()) return; // 1.4 doesn't support skipTo
final int[] which = new int[1];
final int[] sdoc = new int[] {-1};
final float maxDiff = 1e-5f;
s.search(q,new HitCollector() {
public void collect(int doc, float score) {
try {
boolean more = (which[0]++&0x02)==0 ? scorer.skipTo(sdoc[0]+1) : scorer.next();
sdoc[0] = scorer.doc();
float scorerScore = scorer.score();
float scoreDiff = Math.abs(score-scorerScore);
scoreDiff=0; // TODO: remove this go get LUCENE-697 failures
if (more==false || doc != sdoc[0] || scoreDiff>maxDiff) {
throw new RuntimeException("ERROR matching docs:"
+"\n\tscorer.more=" + more + " doc="+sdoc[0] + " scorerScore="+scorerScore
+" scoreDiff="+scoreDiff + " maxDiff="+maxDiff
+"\n\thitCollector.doc=" + doc + " score="+score
+"\n\t Scorer=" + scorer
+"\n\t Query=" + q
+"\n\t Searcher=" + s
);
}
} catch (IOException e) {
throw new RuntimeException(e);
}
}
});
final int skip_op = 0;
final int next_op = 1;
final int orders [][] = {
{skip_op},
{next_op},
{skip_op, next_op},
{next_op, skip_op},
{skip_op, skip_op, next_op, next_op},
{next_op, next_op, skip_op, skip_op},
{skip_op, skip_op, skip_op, next_op, next_op},
};
for (int k = 0; k < orders.length; k++) {
final int order[] = orders[k];
//System.out.print("Order:");for (int i = 0; i < order.length; i++) System.out.print(order[i]==skip_op ? " skip()":" next()"); System.out.println();
final int opidx[] = {0};
// make sure next call to scorer is false.
TestCase.assertFalse((which[0]++&0x02)==0 ? scorer.skipTo(sdoc[0]+1) : scorer.next());
final Weight w = q.weight(s);
final Scorer scorer = w.scorer(s.getIndexReader());
if (scorer instanceof BooleanScorer || scorer instanceof BooleanScorer2) {
return; // TODO change this if BooleanScorers would once again guarantee docs in order.
}
// FUTURE: ensure scorer.doc()==-1
final int[] sdoc = new int[] {-1};
final float maxDiff = 1e-5f;
s.search(q,new HitCollector() {
public void collect(int doc, float score) {
try {
int op = order[(opidx[0]++)%order.length];
//System.out.println(op==skip_op ? "skip("+(sdoc[0]+1)+")":"next()");
boolean more = op==skip_op ? scorer.skipTo(sdoc[0]+1) : scorer.next();
sdoc[0] = scorer.doc();
float scorerScore = scorer.score();
float scoreDiff = Math.abs(score-scorerScore);
if (more==false || doc != sdoc[0] || scoreDiff>maxDiff) {
StringBuffer sbord = new StringBuffer();
for (int i = 0; i < order.length; i++)
sbord.append(order[i]==skip_op ? " skip()":" next()");
throw new RuntimeException("ERROR matching docs:"
+"\n\tscorer.more=" + more + " doc="+sdoc[0] + " scorerScore="+scorerScore
+" scoreDiff="+scoreDiff + " maxDiff="+maxDiff
+"\n\thitCollector.doc=" + doc + " score="+score
+"\n\t Scorer=" + scorer
+"\n\t Query=" + q
+"\n\t Searcher=" + s
+"\n\t Order=" + sbord
);
}
} catch (IOException e) {
throw new RuntimeException(e);
}
}
});
// make sure next call to scorer is false.
int op = order[(opidx[0]++)%order.length];
//System.out.println(op==skip_op ? "last: skip()":"last: next()");
boolean more = op==skip_op ? scorer.skipTo(sdoc[0]+1) : scorer.next();
TestCase.assertFalse(more);
}
}
}

View File

@ -35,6 +35,10 @@ import java.io.Reader;
* @author Erik Hatcher
*/
public class TestPhraseQuery extends TestCase {
/** threshold for comparing floats */
public static final float SCORE_COMP_THRESH = 1e-6f;
private IndexSearcher searcher;
private PhraseQuery query;
private RAMDirectory directory;
@ -57,8 +61,17 @@ public class TestPhraseQuery extends TestCase {
doc.add(new Field("repeated", "this is a repeated field - first part", Field.Store.YES, Field.Index.TOKENIZED));
Fieldable repeatedField = new Field("repeated", "second part of a repeated field", Field.Store.YES, Field.Index.TOKENIZED);
doc.add(repeatedField);
doc.add(new Field("palindrome", "one two three two one", Field.Store.YES, Field.Index.TOKENIZED));
writer.addDocument(doc);
doc = new Document();
doc.add(new Field("nonexist", "phrase exist notexist exist found", Field.Store.YES, Field.Index.TOKENIZED));
writer.addDocument(doc);
doc = new Document();
doc.add(new Field("nonexist", "phrase exist notexist exist found", Field.Store.YES, Field.Index.TOKENIZED));
writer.addDocument(doc);
writer.optimize();
writer.close();
@ -341,12 +354,188 @@ public class TestPhraseQuery extends TestCase {
query.add(new Term("repeated", "part"));
query.add(new Term("repeated", "second"));
query.add(new Term("repeated", "part"));
query.setSlop(99);
query.setSlop(100);
Hits hits = searcher.search(query);
assertEquals(0, hits.length());
assertEquals("slop of 100 just right", 1, hits.length());
QueryUtils.check(query,searcher);
query.setSlop(99);
hits = searcher.search(query);
assertEquals("slop of 99 not enough", 0, hits.length());
QueryUtils.check(query,searcher);
}
// work on two docs like this: "phrase exist notexist exist found"
public void testNonExistingPhrase() throws IOException {
// phrase without repetitions that exists in 2 docs
query.add(new Term("nonexist", "phrase"));
query.add(new Term("nonexist", "notexist"));
query.add(new Term("nonexist", "found"));
query.setSlop(2); // would be found this way
Hits hits = searcher.search(query);
assertEquals("phrase without repetitions exists in 2 docs", 2, hits.length());
QueryUtils.check(query,searcher);
// phrase with repetitions that exists in 2 docs
query = new PhraseQuery();
query.add(new Term("nonexist", "phrase"));
query.add(new Term("nonexist", "exist"));
query.add(new Term("nonexist", "exist"));
query.setSlop(1); // would be found
hits = searcher.search(query);
assertEquals("phrase with repetitions exists in two docs", 2, hits.length());
QueryUtils.check(query,searcher);
// phrase I with repetitions that does not exist in any doc
query = new PhraseQuery();
query.add(new Term("nonexist", "phrase"));
query.add(new Term("nonexist", "notexist"));
query.add(new Term("nonexist", "phrase"));
query.setSlop(1000); // would not be found no matter how high the slop is
hits = searcher.search(query);
assertEquals("nonexisting phrase with repetitions does not exist in any doc", 0, hits.length());
QueryUtils.check(query,searcher);
// phrase II with repetitions that does not exist in any doc
query = new PhraseQuery();
query.add(new Term("nonexist", "phrase"));
query.add(new Term("nonexist", "exist"));
query.add(new Term("nonexist", "exist"));
query.add(new Term("nonexist", "exist"));
query.setSlop(1000); // would not be found no matter how high the slop is
hits = searcher.search(query);
assertEquals("nonexisting phrase with repetitions does not exist in any doc", 0, hits.length());
QueryUtils.check(query,searcher);
}
/**
* Working on a 2 fields like this:
* Field("field", "one two three four five")
* Field("palindrome", "one two three two one")
* Phrase of size 2 occuriong twice, once in order and once in reverse,
* because doc is a palyndrome, is counted twice.
* Also, in this case order in query does not matter.
* Also, when an exact match is found, both sloppy scorer and exact scorer scores the same.
*/
public void testPalyndrome2() throws Exception {
// search on non palyndrome, find phrase with no slop, using exact phrase scorer
query.setSlop(0); // to use exact phrase scorer
query.add(new Term("field", "two"));
query.add(new Term("field", "three"));
Hits hits = searcher.search(query);
assertEquals("phrase found with exact phrase scorer", 1, hits.length());
float score0 = hits.score(0);
//System.out.println("(exact) field: two three: "+score0);
QueryUtils.check(query,searcher);
// search on non palyndrome, find phrase with slop 2, though no slop required here.
query.setSlop(2); // to use sloppy scorer
hits = searcher.search(query);
assertEquals("just sloppy enough", 1, hits.length());
float score1 = hits.score(0);
//System.out.println("(sloppy) field: two three: "+score1);
assertEquals("exact scorer and sloppy scorer score the same when slop does not matter",score0, score1, SCORE_COMP_THRESH);
QueryUtils.check(query,searcher);
// search ordered in palyndrome, find it twice
query = new PhraseQuery();
query.setSlop(2); // must be at least two for both ordered and reversed to match
query.add(new Term("palindrome", "two"));
query.add(new Term("palindrome", "three"));
hits = searcher.search(query);
assertEquals("just sloppy enough", 1, hits.length());
float score2 = hits.score(0);
//System.out.println("palindrome: two three: "+score2);
QueryUtils.check(query,searcher);
//commented out for sloppy-phrase efficiency (issue 736) - see SloppyPhraseScorer.phraseFreq().
//assertTrue("ordered scores higher in palindrome",score1+SCORE_COMP_THRESH<score2);
// search reveresed in palyndrome, find it twice
query = new PhraseQuery();
query.setSlop(2); // must be at least two for both ordered and reversed to match
query.add(new Term("palindrome", "three"));
query.add(new Term("palindrome", "two"));
hits = searcher.search(query);
assertEquals("just sloppy enough", 1, hits.length());
float score3 = hits.score(0);
//System.out.println("palindrome: three two: "+score3);
QueryUtils.check(query,searcher);
//commented out for sloppy-phrase efficiency (issue 736) - see SloppyPhraseScorer.phraseFreq().
//assertTrue("reversed scores higher in palindrome",score1+SCORE_COMP_THRESH<score3);
//assertEquals("ordered or reversed does not matter",score2, score3, SCORE_COMP_THRESH);
}
/**
* Working on a 2 fields like this:
* Field("field", "one two three four five")
* Field("palindrome", "one two three two one")
* Phrase of size 3 occuriong twice, once in order and once in reverse,
* because doc is a palyndrome, is counted twice.
* Also, in this case order in query does not matter.
* Also, when an exact match is found, both sloppy scorer and exact scorer scores the same.
*/
public void testPalyndrome3() throws Exception {
// search on non palyndrome, find phrase with no slop, using exact phrase scorer
query.setSlop(0); // to use exact phrase scorer
query.add(new Term("field", "one"));
query.add(new Term("field", "two"));
query.add(new Term("field", "three"));
Hits hits = searcher.search(query);
assertEquals("phrase found with exact phrase scorer", 1, hits.length());
float score0 = hits.score(0);
//System.out.println("(exact) field: one two three: "+score0);
QueryUtils.check(query,searcher);
// search on non palyndrome, find phrase with slop 3, though no slop required here.
query.setSlop(4); // to use sloppy scorer
hits = searcher.search(query);
assertEquals("just sloppy enough", 1, hits.length());
float score1 = hits.score(0);
//System.out.println("(sloppy) field: one two three: "+score1);
assertEquals("exact scorer and sloppy scorer score the same when slop does not matter",score0, score1, SCORE_COMP_THRESH);
QueryUtils.check(query,searcher);
// search ordered in palyndrome, find it twice
query = new PhraseQuery();
query.setSlop(4); // must be at least four for both ordered and reversed to match
query.add(new Term("palindrome", "one"));
query.add(new Term("palindrome", "two"));
query.add(new Term("palindrome", "three"));
hits = searcher.search(query);
assertEquals("just sloppy enough", 1, hits.length());
float score2 = hits.score(0);
//System.out.println("palindrome: one two three: "+score2);
QueryUtils.check(query,searcher);
//commented out for sloppy-phrase efficiency (issue 736) - see SloppyPhraseScorer.phraseFreq().
//assertTrue("ordered scores higher in palindrome",score1+SCORE_COMP_THRESH<score2);
// search reveresed in palyndrome, find it twice
query = new PhraseQuery();
query.setSlop(4); // must be at least four for both ordered and reversed to match
query.add(new Term("palindrome", "three"));
query.add(new Term("palindrome", "two"));
query.add(new Term("palindrome", "one"));
hits = searcher.search(query);
assertEquals("just sloppy enough", 1, hits.length());
float score3 = hits.score(0);
//System.out.println("palindrome: three two one: "+score3);
QueryUtils.check(query,searcher);
//commented out for sloppy-phrase efficiency (issue 736) - see SloppyPhraseScorer.phraseFreq().
//assertTrue("reversed scores higher in palindrome",score1+SCORE_COMP_THRESH<score3);
//assertEquals("ordered or reversed does not matter",score2, score3, SCORE_COMP_THRESH);
}
}