From 375afd1342e53b6130e756e46f102563249ca7a9 Mon Sep 17 00:00:00 2001 From: Yonik Seeley Date: Wed, 25 Aug 2010 23:54:19 +0000 Subject: [PATCH] SOLR-2089: Faceting: order term ords before converting to values git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@989406 13f79535-47bb-0310-9956-ffa450edef68 --- solr/CHANGES.txt | 6 + .../apache/solr/request/UnInvertedField.java | 33 ++++- .../java/org/apache/solr/util/PrimUtils.java | 122 ++++++++++++++++++ .../org/apache/solr/util/IntUtilsTest.java | 54 ++++++++ 4 files changed, 212 insertions(+), 3 deletions(-) create mode 100644 solr/src/java/org/apache/solr/util/PrimUtils.java create mode 100644 solr/src/test/org/apache/solr/util/IntUtilsTest.java diff --git a/solr/CHANGES.txt b/solr/CHANGES.txt index 57909cbb40b..3d2e97ecf94 100644 --- a/solr/CHANGES.txt +++ b/solr/CHANGES.txt @@ -270,6 +270,12 @@ Optimizations for the first facet request is anywhere from 30% to 32x, depending on how many terms are in the field and how many documents match per term. (yonik) +* SOLR-2089: Speed up UnInvertedField faceting (facet.method=fc for + multi-valued fields) when facet.limit is both high, and a high enough + percentage of the number of unique terms in the field. Extreme cases + yield speedups over 3x. (yonik) + + Bug Fixes ---------------------- diff --git a/solr/src/java/org/apache/solr/request/UnInvertedField.java b/solr/src/java/org/apache/solr/request/UnInvertedField.java index bd8be052ccf..708b55be393 100755 --- a/solr/src/java/org/apache/solr/request/UnInvertedField.java +++ b/solr/src/java/org/apache/solr/request/UnInvertedField.java @@ -37,6 +37,7 @@ import org.apache.solr.core.SolrCore; import org.apache.solr.schema.FieldType; import org.apache.solr.schema.TrieField; import org.apache.solr.search.*; +import org.apache.solr.util.PrimUtils; import org.apache.solr.util.BoundedTreeSet; import org.apache.solr.handler.component.StatsValues; import org.apache.solr.handler.component.FieldFacetStats; @@ -584,7 +585,7 @@ public class UnInvertedField { // important if a lot of the counts are repeated (like zero counts would be). // minimize object creation and speed comparison by creating a long that - // encompases both count and term number. + // encompasses both count and term number. // Since smaller values are kept in the TreeSet, make higher counts smaller. // // for equal counts, lower term numbers @@ -597,15 +598,41 @@ public class UnInvertedField { } } // now select the right page from the results + + + final int[] tnums = new int[Math.min(queue.size()-off, lim)]; + final int[] indirect = counts; // reuse the counts array for the index into the tnums array + int tnumCount = 0; + for (Long p : queue) { if (--off>=0) continue; if (--lim<0) break; int c = -(int)(p.longValue() >>> 32); //int tnum = 0x7fffffff - (int)p.longValue(); // use if priority queue int tnum = (int)p.longValue(); - String label = getReadableValue(getTermValue(te, tnum), ft, spare); - res.add(label, c); + indirect[tnumCount] = tnumCount; + tnums[tnumCount++] = tnum; + // String label = ft.indexedToReadable(getTermText(te, tnum)); + // add a null label for now... we'll fill it in later. + res.add(null, c); } + + // now sort the indexes by the term numbers + PrimUtils.sort(0, tnumCount, indirect, new PrimUtils.IntComparator() { + @Override + public int compare(int a, int b) { + return tnums[a] - tnums[b]; + } + }); + + // convert the term numbers to term values and set as the label + for (int i=0; i start && comparator.lessThan(array[j], array[j - 1]); j--) { + temp = array[j]; + array[j] = array[j - 1]; + array[j - 1] = temp; + } + } + return; + } + int middle = (start + end) >>> 1; + if (length > 7) { + int bottom = start; + int top = end - 1; + if (length > 40) { + length >>= 3; + bottom = med3(array, bottom, bottom + length, bottom + + (length<<1), comparator); + middle = med3(array, middle - length, middle, middle + length, comparator); + top = med3(array, top - (length<<1), top - length, top, comparator); + } + middle = med3(array, bottom, middle, top, comparator); + } + int partionValue = array[middle]; + int a, b, c, d; + a = b = start; + c = d = end - 1; + while (true) { + while (b <= c && !comparator.lessThan(partionValue, array[b])) { + if (comparator.equals(array[b], partionValue)) { + temp = array[a]; + array[a++] = array[b]; + array[b] = temp; + } + b++; + } + while (c >= b && !comparator.lessThan(array[c], partionValue)) { + if (comparator.equals(array[c], partionValue)) { + temp = array[c]; + array[c] = array[d]; + array[d--] = temp; + } + c--; + } + if (b > c) { + break; + } + temp = array[b]; + array[b++] = array[c]; + array[c--] = temp; + } + length = a - start < b - a ? a - start : b - a; + int l = start; + int h = b - length; + while (length-- > 0) { + temp = array[l]; + array[l++] = array[h]; + array[h++] = temp; + } + length = d - c < end - 1 - d ? d - c : end - 1 - d; + l = b; + h = end - length; + while (length-- > 0) { + temp = array[l]; + array[l++] = array[h]; + array[h++] = temp; + } + if ((length = b - a) > 0) { + sort(start, start + length, array, comparator); + } + if ((length = d - c) > 0) { + sort(end - length, end, array, comparator); + } + } + + private static int med3(int[] array, int a, int b, int c, IntComparator comparator) { + int x = array[a], y = array[b], z = array[c]; + return comparator.lessThan(x, y) ? (comparator.lessThan(y, z) ? b : (comparator.lessThan(x, z) ? c : a)) + : (comparator.lessThan(z, y) ? b : (comparator.lessThan(z, x) ? c : a)); + } +} diff --git a/solr/src/test/org/apache/solr/util/IntUtilsTest.java b/solr/src/test/org/apache/solr/util/IntUtilsTest.java new file mode 100644 index 00000000000..d5e6233c8fb --- /dev/null +++ b/solr/src/test/org/apache/solr/util/IntUtilsTest.java @@ -0,0 +1,54 @@ +package org.apache.solr.util; + +/** + * Copyright 2004 The Apache Software Foundation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import org.apache.lucene.util.LuceneTestCase; + +import java.util.Arrays; +import java.util.Random; + +public class IntUtilsTest extends LuceneTestCase { + Random r = newRandom(); + + public void testSort() { + int maxSize = 100; + int maxVal = 100; + int[] a = new int[maxSize]; + int[] b = new int[maxSize]; + + PrimUtils.IntComparator comparator = new PrimUtils.IntComparator() { + @Override + public int compare(int a, int b) { + return b - a; // sort in reverse + } + }; + + for (int iter=0; iter<100; iter++) { + int start = r.nextInt(maxSize+1); + int end = start==maxSize ? maxSize : start + r.nextInt(maxSize-start); + for (int i=start; i