mirror of https://github.com/apache/lucene.git
LUCENE-3054: PhraseQuery can in some cases stack overflow in SorterTemplate.quickSort(). This fix also adds an optimization to PhraseQuery as term with lower doc freq will also have less positions
git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1098633 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
11bed874c2
commit
679cfee191
|
@ -491,6 +491,11 @@ Bug fixes
|
|||
very special use cases of the TokenStream-API, most users would not
|
||||
have recognized it. (Uwe Schindler, Robert Muir)
|
||||
|
||||
* LUCENE-3054: PhraseQuery can in some cases stack overflow in
|
||||
SorterTemplate.quickSort(). This fix also adds an optimization to
|
||||
PhraseQuery as term with lower doc freq will also have less positions.
|
||||
(Uwe Schindler, Robert Muir, Otis Gospodnetic)
|
||||
|
||||
======================= Lucene 3.1.0 =======================
|
||||
|
||||
Changes in backwards compatibility policy
|
||||
|
|
|
@ -214,7 +214,7 @@ public class MultiPhraseQuery extends Query {
|
|||
docFreq = reader.docFreq(term.field(), term.bytes());
|
||||
}
|
||||
|
||||
postingsFreqs[pos] = new PhraseQuery.PostingsAndFreq(postingsEnum, docFreq, positions.get(pos).intValue());
|
||||
postingsFreqs[pos] = new PhraseQuery.PostingsAndFreq(postingsEnum, docFreq, positions.get(pos).intValue(), terms[0]);
|
||||
}
|
||||
|
||||
// sort by increasing docFreq order
|
||||
|
|
|
@ -124,16 +124,48 @@ public class PhraseQuery extends Query {
|
|||
final DocsAndPositionsEnum postings;
|
||||
final int docFreq;
|
||||
final int position;
|
||||
final Term term;
|
||||
|
||||
public PostingsAndFreq(DocsAndPositionsEnum postings, int docFreq, int position) {
|
||||
public PostingsAndFreq(DocsAndPositionsEnum postings, int docFreq, int position, Term term) {
|
||||
this.postings = postings;
|
||||
this.docFreq = docFreq;
|
||||
this.position = position;
|
||||
this.term = term;
|
||||
}
|
||||
|
||||
public int compareTo(PostingsAndFreq other) {
|
||||
if (docFreq == other.docFreq) {
|
||||
if (position == other.position) {
|
||||
return term.compareTo(other.term);
|
||||
}
|
||||
return position - other.position;
|
||||
}
|
||||
return docFreq - other.docFreq;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int hashCode() {
|
||||
final int prime = 31;
|
||||
int result = 1;
|
||||
result = prime * result + docFreq;
|
||||
result = prime * result + position;
|
||||
result = prime * result + ((term == null) ? 0 : term.hashCode());
|
||||
return result;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean equals(Object obj) {
|
||||
if (this == obj) return true;
|
||||
if (obj == null) return false;
|
||||
if (getClass() != obj.getClass()) return false;
|
||||
PostingsAndFreq other = (PostingsAndFreq) obj;
|
||||
if (docFreq != other.docFreq) return false;
|
||||
if (position != other.position) return false;
|
||||
if (term == null) {
|
||||
if (other.term != null) return false;
|
||||
} else if (!term.equals(other.term)) return false;
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
private class PhraseWeight extends Weight {
|
||||
|
@ -197,7 +229,7 @@ public class PhraseQuery extends Query {
|
|||
return null;
|
||||
}
|
||||
}
|
||||
postingsFreqs[i] = new PostingsAndFreq(postingsEnum, reader.docFreq(t.field(), t.bytes()), positions.get(i).intValue());
|
||||
postingsFreqs[i] = new PostingsAndFreq(postingsEnum, reader.docFreq(t.field(), t.bytes()), positions.get(i).intValue(), t);
|
||||
}
|
||||
|
||||
// sort by increasing docFreq order
|
||||
|
|
|
@ -30,6 +30,7 @@ package org.apache.lucene.util;
|
|||
public abstract class SorterTemplate {
|
||||
|
||||
private static final int MERGESORT_THRESHOLD = 12;
|
||||
private static final int MERGE_TO_QUICKSORT_THRESHOLD = 40;
|
||||
private static final int QUICKSORT_THRESHOLD = 7;
|
||||
|
||||
/** Implement this method, that swaps slots {@code i} and {@code j} in your data */
|
||||
|
@ -63,6 +64,10 @@ public abstract class SorterTemplate {
|
|||
/** Sorts via in-place, but unstable, QuickSort algorithm.
|
||||
* For small collections falls back to {@link #insertionSort(int,int)}. */
|
||||
public final void quickSort(int lo, int hi) {
|
||||
quickSort(lo, hi, MERGE_TO_QUICKSORT_THRESHOLD);
|
||||
}
|
||||
|
||||
private void quickSort(int lo, int hi, int maxDepth) {
|
||||
final int diff = hi - lo;
|
||||
if (diff <= QUICKSORT_THRESHOLD) {
|
||||
insertionSort(lo, hi);
|
||||
|
@ -101,8 +106,16 @@ public abstract class SorterTemplate {
|
|||
}
|
||||
}
|
||||
|
||||
quickSort(lo, left);
|
||||
quickSort(left + 1, hi);
|
||||
// fall back to merge sort when recursion depth gets too big
|
||||
if (maxDepth == 0) {
|
||||
// for testing: new Exception("Hit recursion depth limit").printStackTrace();
|
||||
mergeSort(lo, left);
|
||||
mergeSort(left + 1, hi);
|
||||
} else {
|
||||
--maxDepth;
|
||||
quickSort(lo, left, maxDepth);
|
||||
quickSort(left + 1, hi, maxDepth);
|
||||
}
|
||||
}
|
||||
|
||||
/** Sorts via stable in-place MergeSort algorithm
|
||||
|
|
|
@ -144,6 +144,24 @@ public class TestArrayUtil extends LuceneTestCase {
|
|||
}
|
||||
}
|
||||
|
||||
private Integer[] createSparseRandomArray(int maxSize) {
|
||||
final Integer[] a = new Integer[random.nextInt(maxSize) + 1];
|
||||
for (int i = 0; i < a.length; i++) {
|
||||
a[i] = Integer.valueOf(random.nextInt(2));
|
||||
}
|
||||
return a;
|
||||
}
|
||||
|
||||
// This is a test for LUCENE-3054 (which fails without the merge sort fall back with stack overflow in most cases)
|
||||
public void testQuickToMergeSortFallback() {
|
||||
for (int i = 0, c = 500 * RANDOM_MULTIPLIER; i < c; i++) {
|
||||
Integer[] a1 = createSparseRandomArray(40000), a2 = a1.clone();
|
||||
ArrayUtil.quickSort(a1);
|
||||
Arrays.sort(a2);
|
||||
assertArrayEquals(a2, a1);
|
||||
}
|
||||
}
|
||||
|
||||
public void testMergeSort() {
|
||||
for (int i = 0, c = 500 * RANDOM_MULTIPLIER; i < c; i++) {
|
||||
Integer[] a1 = createRandomArray(1000), a2 = a1.clone();
|
||||
|
|
Loading…
Reference in New Issue