SOLR-2089: Faceting: order term ords before converting to values

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@989406 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Yonik Seeley 2010-08-25 23:54:19 +00:00
parent 4378ffce0b
commit 375afd1342
4 changed files with 212 additions and 3 deletions

View File

@ -270,6 +270,12 @@ Optimizations
for the first facet request is anywhere from 30% to 32x, depending on how many
terms are in the field and how many documents match per term. (yonik)
* SOLR-2089: Speed up UnInvertedField faceting (facet.method=fc for
multi-valued fields) when facet.limit is both high, and a high enough
percentage of the number of unique terms in the field. Extreme cases
yield speedups over 3x. (yonik)
Bug Fixes
----------------------

View File

@ -37,6 +37,7 @@ import org.apache.solr.core.SolrCore;
import org.apache.solr.schema.FieldType;
import org.apache.solr.schema.TrieField;
import org.apache.solr.search.*;
import org.apache.solr.util.PrimUtils;
import org.apache.solr.util.BoundedTreeSet;
import org.apache.solr.handler.component.StatsValues;
import org.apache.solr.handler.component.FieldFacetStats;
@ -584,7 +585,7 @@ public class UnInvertedField {
// important if a lot of the counts are repeated (like zero counts would be).
// minimize object creation and speed comparison by creating a long that
// encompases both count and term number.
// encompasses both count and term number.
// Since smaller values are kept in the TreeSet, make higher counts smaller.
//
// for equal counts, lower term numbers
@ -597,15 +598,41 @@ public class UnInvertedField {
}
}
// now select the right page from the results
final int[] tnums = new int[Math.min(queue.size()-off, lim)];
final int[] indirect = counts; // reuse the counts array for the index into the tnums array
int tnumCount = 0;
for (Long p : queue) {
if (--off>=0) continue;
if (--lim<0) break;
int c = -(int)(p.longValue() >>> 32);
//int tnum = 0x7fffffff - (int)p.longValue(); // use if priority queue
int tnum = (int)p.longValue();
String label = getReadableValue(getTermValue(te, tnum), ft, spare);
res.add(label, c);
indirect[tnumCount] = tnumCount;
tnums[tnumCount++] = tnum;
// String label = ft.indexedToReadable(getTermText(te, tnum));
// add a null label for now... we'll fill it in later.
res.add(null, c);
}
// now sort the indexes by the term numbers
PrimUtils.sort(0, tnumCount, indirect, new PrimUtils.IntComparator() {
@Override
public int compare(int a, int b) {
return tnums[a] - tnums[b];
}
});
// convert the term numbers to term values and set as the label
for (int i=0; i<tnumCount; i++) {
int idx = indirect[i];
int tnum = tnums[idx];
String label = getReadableValue(getTermValue(te, tnum), ft, spare);
res.setName(idx, label);
}
} else {
// add results in index order
int i=startTerm;

View File

@ -0,0 +1,122 @@
package org.apache.solr.util;
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/** Utilities for primitive Java data types. */
public class PrimUtils {
public static abstract class IntComparator {
public abstract int compare(int a, int b);
public boolean lessThan(int a, int b) {
return compare(a,b) < 0;
}
public boolean equals(int a, int b) {
return compare(a,b) == 0;
}
}
/** Sort the integer array from "start" inclusive to "end" exclusive in ascending order,
* using the provided comparator.
*/
public static void sort(int start, int end, int[] array, IntComparator comparator) {
// This code was copied from Apache Harmony's Arrays.sort(double[]) and modified
// to use a comparator, in addition to other small efficiency enhancements
// like replacing divisions with shifts.
int temp;
int length = end - start;
if (length < 7) {
for (int i = start + 1; i < end; i++) {
for (int j = i; j > start && comparator.lessThan(array[j], array[j - 1]); j--) {
temp = array[j];
array[j] = array[j - 1];
array[j - 1] = temp;
}
}
return;
}
int middle = (start + end) >>> 1;
if (length > 7) {
int bottom = start;
int top = end - 1;
if (length > 40) {
length >>= 3;
bottom = med3(array, bottom, bottom + length, bottom
+ (length<<1), comparator);
middle = med3(array, middle - length, middle, middle + length, comparator);
top = med3(array, top - (length<<1), top - length, top, comparator);
}
middle = med3(array, bottom, middle, top, comparator);
}
int partionValue = array[middle];
int a, b, c, d;
a = b = start;
c = d = end - 1;
while (true) {
while (b <= c && !comparator.lessThan(partionValue, array[b])) {
if (comparator.equals(array[b], partionValue)) {
temp = array[a];
array[a++] = array[b];
array[b] = temp;
}
b++;
}
while (c >= b && !comparator.lessThan(array[c], partionValue)) {
if (comparator.equals(array[c], partionValue)) {
temp = array[c];
array[c] = array[d];
array[d--] = temp;
}
c--;
}
if (b > c) {
break;
}
temp = array[b];
array[b++] = array[c];
array[c--] = temp;
}
length = a - start < b - a ? a - start : b - a;
int l = start;
int h = b - length;
while (length-- > 0) {
temp = array[l];
array[l++] = array[h];
array[h++] = temp;
}
length = d - c < end - 1 - d ? d - c : end - 1 - d;
l = b;
h = end - length;
while (length-- > 0) {
temp = array[l];
array[l++] = array[h];
array[h++] = temp;
}
if ((length = b - a) > 0) {
sort(start, start + length, array, comparator);
}
if ((length = d - c) > 0) {
sort(end - length, end, array, comparator);
}
}
private static int med3(int[] array, int a, int b, int c, IntComparator comparator) {
int x = array[a], y = array[b], z = array[c];
return comparator.lessThan(x, y) ? (comparator.lessThan(y, z) ? b : (comparator.lessThan(x, z) ? c : a))
: (comparator.lessThan(z, y) ? b : (comparator.lessThan(z, x) ? c : a));
}
}

View File

@ -0,0 +1,54 @@
package org.apache.solr.util;
/**
* Copyright 2004 The Apache Software Foundation
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import org.apache.lucene.util.LuceneTestCase;
import java.util.Arrays;
import java.util.Random;
public class IntUtilsTest extends LuceneTestCase {
Random r = newRandom();
public void testSort() {
int maxSize = 100;
int maxVal = 100;
int[] a = new int[maxSize];
int[] b = new int[maxSize];
PrimUtils.IntComparator comparator = new PrimUtils.IntComparator() {
@Override
public int compare(int a, int b) {
return b - a; // sort in reverse
}
};
for (int iter=0; iter<100; iter++) {
int start = r.nextInt(maxSize+1);
int end = start==maxSize ? maxSize : start + r.nextInt(maxSize-start);
for (int i=start; i<end; i++) {
a[i] = b[i] = r.nextInt(maxVal);
}
PrimUtils.sort(start, end, a, comparator);
Arrays.sort(b, start, end);
for (int i=start; i<end; i++) {
assertEquals(a[i], b[end-(i-start+1)]);
}
}
}
}