From 679cfee191306585c15bbba93c5b9f1b0e770a51 Mon Sep 17 00:00:00 2001 From: Uwe Schindler Date: Mon, 2 May 2011 15:47:38 +0000 Subject: [PATCH] LUCENE-3054: PhraseQuery can in some cases stack overflow in SorterTemplate.quickSort(). This fix also adds an optimization to PhraseQuery as term with lower doc freq will also have less positions git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1098633 13f79535-47bb-0310-9956-ffa450edef68 --- lucene/CHANGES.txt | 5 +++ .../lucene/search/MultiPhraseQuery.java | 2 +- .../org/apache/lucene/search/PhraseQuery.java | 36 +++++++++++++++++-- .../apache/lucene/util/SorterTemplate.java | 17 +++++++-- .../org/apache/lucene/util/TestArrayUtil.java | 18 ++++++++++ 5 files changed, 73 insertions(+), 5 deletions(-) diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt index 83380041f13..ef4b3568ea5 100644 --- a/lucene/CHANGES.txt +++ b/lucene/CHANGES.txt @@ -491,6 +491,11 @@ Bug fixes very special use cases of the TokenStream-API, most users would not have recognized it. (Uwe Schindler, Robert Muir) +* LUCENE-3054: PhraseQuery can in some cases stack overflow in + SorterTemplate.quickSort(). This fix also adds an optimization to + PhraseQuery as term with lower doc freq will also have less positions. + (Uwe Schindler, Robert Muir, Otis Gospodnetic) + ======================= Lucene 3.1.0 ======================= Changes in backwards compatibility policy diff --git a/lucene/src/java/org/apache/lucene/search/MultiPhraseQuery.java b/lucene/src/java/org/apache/lucene/search/MultiPhraseQuery.java index 7cb6994ccaa..8e18b520263 100644 --- a/lucene/src/java/org/apache/lucene/search/MultiPhraseQuery.java +++ b/lucene/src/java/org/apache/lucene/search/MultiPhraseQuery.java @@ -214,7 +214,7 @@ public class MultiPhraseQuery extends Query { docFreq = reader.docFreq(term.field(), term.bytes()); } - postingsFreqs[pos] = new PhraseQuery.PostingsAndFreq(postingsEnum, docFreq, positions.get(pos).intValue()); + postingsFreqs[pos] = new PhraseQuery.PostingsAndFreq(postingsEnum, docFreq, positions.get(pos).intValue(), terms[0]); } // sort by increasing docFreq order diff --git a/lucene/src/java/org/apache/lucene/search/PhraseQuery.java b/lucene/src/java/org/apache/lucene/search/PhraseQuery.java index 2c8d977fa82..a23bdbe69f6 100644 --- a/lucene/src/java/org/apache/lucene/search/PhraseQuery.java +++ b/lucene/src/java/org/apache/lucene/search/PhraseQuery.java @@ -124,16 +124,48 @@ public class PhraseQuery extends Query { final DocsAndPositionsEnum postings; final int docFreq; final int position; + final Term term; - public PostingsAndFreq(DocsAndPositionsEnum postings, int docFreq, int position) { + public PostingsAndFreq(DocsAndPositionsEnum postings, int docFreq, int position, Term term) { this.postings = postings; this.docFreq = docFreq; this.position = position; + this.term = term; } public int compareTo(PostingsAndFreq other) { + if (docFreq == other.docFreq) { + if (position == other.position) { + return term.compareTo(other.term); + } + return position - other.position; + } return docFreq - other.docFreq; } + + @Override + public int hashCode() { + final int prime = 31; + int result = 1; + result = prime * result + docFreq; + result = prime * result + position; + result = prime * result + ((term == null) ? 0 : term.hashCode()); + return result; + } + + @Override + public boolean equals(Object obj) { + if (this == obj) return true; + if (obj == null) return false; + if (getClass() != obj.getClass()) return false; + PostingsAndFreq other = (PostingsAndFreq) obj; + if (docFreq != other.docFreq) return false; + if (position != other.position) return false; + if (term == null) { + if (other.term != null) return false; + } else if (!term.equals(other.term)) return false; + return true; + } } private class PhraseWeight extends Weight { @@ -197,7 +229,7 @@ public class PhraseQuery extends Query { return null; } } - postingsFreqs[i] = new PostingsAndFreq(postingsEnum, reader.docFreq(t.field(), t.bytes()), positions.get(i).intValue()); + postingsFreqs[i] = new PostingsAndFreq(postingsEnum, reader.docFreq(t.field(), t.bytes()), positions.get(i).intValue(), t); } // sort by increasing docFreq order diff --git a/lucene/src/java/org/apache/lucene/util/SorterTemplate.java b/lucene/src/java/org/apache/lucene/util/SorterTemplate.java index b0e558c1c20..8ff57532c92 100644 --- a/lucene/src/java/org/apache/lucene/util/SorterTemplate.java +++ b/lucene/src/java/org/apache/lucene/util/SorterTemplate.java @@ -30,6 +30,7 @@ package org.apache.lucene.util; public abstract class SorterTemplate { private static final int MERGESORT_THRESHOLD = 12; + private static final int MERGE_TO_QUICKSORT_THRESHOLD = 40; private static final int QUICKSORT_THRESHOLD = 7; /** Implement this method, that swaps slots {@code i} and {@code j} in your data */ @@ -63,6 +64,10 @@ public abstract class SorterTemplate { /** Sorts via in-place, but unstable, QuickSort algorithm. * For small collections falls back to {@link #insertionSort(int,int)}. */ public final void quickSort(int lo, int hi) { + quickSort(lo, hi, MERGE_TO_QUICKSORT_THRESHOLD); + } + + private void quickSort(int lo, int hi, int maxDepth) { final int diff = hi - lo; if (diff <= QUICKSORT_THRESHOLD) { insertionSort(lo, hi); @@ -101,8 +106,16 @@ public abstract class SorterTemplate { } } - quickSort(lo, left); - quickSort(left + 1, hi); + // fall back to merge sort when recursion depth gets too big + if (maxDepth == 0) { + // for testing: new Exception("Hit recursion depth limit").printStackTrace(); + mergeSort(lo, left); + mergeSort(left + 1, hi); + } else { + --maxDepth; + quickSort(lo, left, maxDepth); + quickSort(left + 1, hi, maxDepth); + } } /** Sorts via stable in-place MergeSort algorithm diff --git a/lucene/src/test/org/apache/lucene/util/TestArrayUtil.java b/lucene/src/test/org/apache/lucene/util/TestArrayUtil.java index fc9575170e5..61e27f1e28f 100644 --- a/lucene/src/test/org/apache/lucene/util/TestArrayUtil.java +++ b/lucene/src/test/org/apache/lucene/util/TestArrayUtil.java @@ -144,6 +144,24 @@ public class TestArrayUtil extends LuceneTestCase { } } + private Integer[] createSparseRandomArray(int maxSize) { + final Integer[] a = new Integer[random.nextInt(maxSize) + 1]; + for (int i = 0; i < a.length; i++) { + a[i] = Integer.valueOf(random.nextInt(2)); + } + return a; + } + + // This is a test for LUCENE-3054 (which fails without the merge sort fall back with stack overflow in most cases) + public void testQuickToMergeSortFallback() { + for (int i = 0, c = 500 * RANDOM_MULTIPLIER; i < c; i++) { + Integer[] a1 = createSparseRandomArray(40000), a2 = a1.clone(); + ArrayUtil.quickSort(a1); + Arrays.sort(a2); + assertArrayEquals(a2, a1); + } + } + public void testMergeSort() { for (int i = 0, c = 500 * RANDOM_MULTIPLIER; i < c; i++) { Integer[] a1 = createRandomArray(1000), a2 = a1.clone();