LUCENE-3054: PhraseQuery can in some cases stack overflow in SorterTemplate.quickSort(). This fix also adds an optimization to PhraseQuery as term with lower doc freq will also have less positions

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1098633 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Uwe Schindler 2011-05-02 15:47:38 +00:00
parent 11bed874c2
commit 679cfee191
5 changed files with 73 additions and 5 deletions

View File

@ -491,6 +491,11 @@ Bug fixes
very special use cases of the TokenStream-API, most users would not
have recognized it. (Uwe Schindler, Robert Muir)
* LUCENE-3054: PhraseQuery can in some cases stack overflow in
SorterTemplate.quickSort(). This fix also adds an optimization to
PhraseQuery as term with lower doc freq will also have less positions.
(Uwe Schindler, Robert Muir, Otis Gospodnetic)
======================= Lucene 3.1.0 =======================
Changes in backwards compatibility policy

View File

@ -214,7 +214,7 @@ public class MultiPhraseQuery extends Query {
docFreq = reader.docFreq(term.field(), term.bytes());
}
postingsFreqs[pos] = new PhraseQuery.PostingsAndFreq(postingsEnum, docFreq, positions.get(pos).intValue());
postingsFreqs[pos] = new PhraseQuery.PostingsAndFreq(postingsEnum, docFreq, positions.get(pos).intValue(), terms[0]);
}
// sort by increasing docFreq order

View File

@ -124,16 +124,48 @@ public class PhraseQuery extends Query {
final DocsAndPositionsEnum postings;
final int docFreq;
final int position;
final Term term;
public PostingsAndFreq(DocsAndPositionsEnum postings, int docFreq, int position) {
public PostingsAndFreq(DocsAndPositionsEnum postings, int docFreq, int position, Term term) {
this.postings = postings;
this.docFreq = docFreq;
this.position = position;
this.term = term;
}
public int compareTo(PostingsAndFreq other) {
if (docFreq == other.docFreq) {
if (position == other.position) {
return term.compareTo(other.term);
}
return position - other.position;
}
return docFreq - other.docFreq;
}
@Override
public int hashCode() {
final int prime = 31;
int result = 1;
result = prime * result + docFreq;
result = prime * result + position;
result = prime * result + ((term == null) ? 0 : term.hashCode());
return result;
}
@Override
public boolean equals(Object obj) {
if (this == obj) return true;
if (obj == null) return false;
if (getClass() != obj.getClass()) return false;
PostingsAndFreq other = (PostingsAndFreq) obj;
if (docFreq != other.docFreq) return false;
if (position != other.position) return false;
if (term == null) {
if (other.term != null) return false;
} else if (!term.equals(other.term)) return false;
return true;
}
}
private class PhraseWeight extends Weight {
@ -197,7 +229,7 @@ public class PhraseQuery extends Query {
return null;
}
}
postingsFreqs[i] = new PostingsAndFreq(postingsEnum, reader.docFreq(t.field(), t.bytes()), positions.get(i).intValue());
postingsFreqs[i] = new PostingsAndFreq(postingsEnum, reader.docFreq(t.field(), t.bytes()), positions.get(i).intValue(), t);
}
// sort by increasing docFreq order

View File

@ -30,6 +30,7 @@ package org.apache.lucene.util;
public abstract class SorterTemplate {
private static final int MERGESORT_THRESHOLD = 12;
private static final int MERGE_TO_QUICKSORT_THRESHOLD = 40;
private static final int QUICKSORT_THRESHOLD = 7;
/** Implement this method, that swaps slots {@code i} and {@code j} in your data */
@ -63,6 +64,10 @@ public abstract class SorterTemplate {
/** Sorts via in-place, but unstable, QuickSort algorithm.
* For small collections falls back to {@link #insertionSort(int,int)}. */
public final void quickSort(int lo, int hi) {
quickSort(lo, hi, MERGE_TO_QUICKSORT_THRESHOLD);
}
private void quickSort(int lo, int hi, int maxDepth) {
final int diff = hi - lo;
if (diff <= QUICKSORT_THRESHOLD) {
insertionSort(lo, hi);
@ -101,8 +106,16 @@ public abstract class SorterTemplate {
}
}
quickSort(lo, left);
quickSort(left + 1, hi);
// fall back to merge sort when recursion depth gets too big
if (maxDepth == 0) {
// for testing: new Exception("Hit recursion depth limit").printStackTrace();
mergeSort(lo, left);
mergeSort(left + 1, hi);
} else {
--maxDepth;
quickSort(lo, left, maxDepth);
quickSort(left + 1, hi, maxDepth);
}
}
/** Sorts via stable in-place MergeSort algorithm

View File

@ -144,6 +144,24 @@ public class TestArrayUtil extends LuceneTestCase {
}
}
private Integer[] createSparseRandomArray(int maxSize) {
final Integer[] a = new Integer[random.nextInt(maxSize) + 1];
for (int i = 0; i < a.length; i++) {
a[i] = Integer.valueOf(random.nextInt(2));
}
return a;
}
// This is a test for LUCENE-3054 (which fails without the merge sort fall back with stack overflow in most cases)
public void testQuickToMergeSortFallback() {
for (int i = 0, c = 500 * RANDOM_MULTIPLIER; i < c; i++) {
Integer[] a1 = createSparseRandomArray(40000), a2 = a1.clone();
ArrayUtil.quickSort(a1);
Arrays.sort(a2);
assertArrayEquals(a2, a1);
}
}
public void testMergeSort() {
for (int i = 0, c = 500 * RANDOM_MULTIPLIER; i < c; i++) {
Integer[] a1 = createRandomArray(1000), a2 = a1.clone();