mirror of https://github.com/apache/lucene.git
SOLR-2089: Faceting: order term ords before converting to values
git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@989406 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
4378ffce0b
commit
375afd1342
|
@ -270,6 +270,12 @@ Optimizations
|
|||
for the first facet request is anywhere from 30% to 32x, depending on how many
|
||||
terms are in the field and how many documents match per term. (yonik)
|
||||
|
||||
* SOLR-2089: Speed up UnInvertedField faceting (facet.method=fc for
|
||||
multi-valued fields) when facet.limit is both high, and a high enough
|
||||
percentage of the number of unique terms in the field. Extreme cases
|
||||
yield speedups over 3x. (yonik)
|
||||
|
||||
|
||||
Bug Fixes
|
||||
----------------------
|
||||
|
||||
|
|
|
@ -37,6 +37,7 @@ import org.apache.solr.core.SolrCore;
|
|||
import org.apache.solr.schema.FieldType;
|
||||
import org.apache.solr.schema.TrieField;
|
||||
import org.apache.solr.search.*;
|
||||
import org.apache.solr.util.PrimUtils;
|
||||
import org.apache.solr.util.BoundedTreeSet;
|
||||
import org.apache.solr.handler.component.StatsValues;
|
||||
import org.apache.solr.handler.component.FieldFacetStats;
|
||||
|
@ -584,7 +585,7 @@ public class UnInvertedField {
|
|||
// important if a lot of the counts are repeated (like zero counts would be).
|
||||
|
||||
// minimize object creation and speed comparison by creating a long that
|
||||
// encompases both count and term number.
|
||||
// encompasses both count and term number.
|
||||
// Since smaller values are kept in the TreeSet, make higher counts smaller.
|
||||
//
|
||||
// for equal counts, lower term numbers
|
||||
|
@ -597,15 +598,41 @@ public class UnInvertedField {
|
|||
}
|
||||
}
|
||||
// now select the right page from the results
|
||||
|
||||
|
||||
final int[] tnums = new int[Math.min(queue.size()-off, lim)];
|
||||
final int[] indirect = counts; // reuse the counts array for the index into the tnums array
|
||||
int tnumCount = 0;
|
||||
|
||||
for (Long p : queue) {
|
||||
if (--off>=0) continue;
|
||||
if (--lim<0) break;
|
||||
int c = -(int)(p.longValue() >>> 32);
|
||||
//int tnum = 0x7fffffff - (int)p.longValue(); // use if priority queue
|
||||
int tnum = (int)p.longValue();
|
||||
String label = getReadableValue(getTermValue(te, tnum), ft, spare);
|
||||
res.add(label, c);
|
||||
indirect[tnumCount] = tnumCount;
|
||||
tnums[tnumCount++] = tnum;
|
||||
// String label = ft.indexedToReadable(getTermText(te, tnum));
|
||||
// add a null label for now... we'll fill it in later.
|
||||
res.add(null, c);
|
||||
}
|
||||
|
||||
// now sort the indexes by the term numbers
|
||||
PrimUtils.sort(0, tnumCount, indirect, new PrimUtils.IntComparator() {
|
||||
@Override
|
||||
public int compare(int a, int b) {
|
||||
return tnums[a] - tnums[b];
|
||||
}
|
||||
});
|
||||
|
||||
// convert the term numbers to term values and set as the label
|
||||
for (int i=0; i<tnumCount; i++) {
|
||||
int idx = indirect[i];
|
||||
int tnum = tnums[idx];
|
||||
String label = getReadableValue(getTermValue(te, tnum), ft, spare);
|
||||
res.setName(idx, label);
|
||||
}
|
||||
|
||||
} else {
|
||||
// add results in index order
|
||||
int i=startTerm;
|
||||
|
|
|
@ -0,0 +1,122 @@
|
|||
package org.apache.solr.util;
|
||||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
/** Utilities for primitive Java data types. */
|
||||
public class PrimUtils {
|
||||
|
||||
public static abstract class IntComparator {
|
||||
public abstract int compare(int a, int b);
|
||||
public boolean lessThan(int a, int b) {
|
||||
return compare(a,b) < 0;
|
||||
}
|
||||
public boolean equals(int a, int b) {
|
||||
return compare(a,b) == 0;
|
||||
}
|
||||
}
|
||||
|
||||
/** Sort the integer array from "start" inclusive to "end" exclusive in ascending order,
|
||||
* using the provided comparator.
|
||||
*/
|
||||
public static void sort(int start, int end, int[] array, IntComparator comparator) {
|
||||
// This code was copied from Apache Harmony's Arrays.sort(double[]) and modified
|
||||
// to use a comparator, in addition to other small efficiency enhancements
|
||||
// like replacing divisions with shifts.
|
||||
|
||||
int temp;
|
||||
int length = end - start;
|
||||
if (length < 7) {
|
||||
for (int i = start + 1; i < end; i++) {
|
||||
for (int j = i; j > start && comparator.lessThan(array[j], array[j - 1]); j--) {
|
||||
temp = array[j];
|
||||
array[j] = array[j - 1];
|
||||
array[j - 1] = temp;
|
||||
}
|
||||
}
|
||||
return;
|
||||
}
|
||||
int middle = (start + end) >>> 1;
|
||||
if (length > 7) {
|
||||
int bottom = start;
|
||||
int top = end - 1;
|
||||
if (length > 40) {
|
||||
length >>= 3;
|
||||
bottom = med3(array, bottom, bottom + length, bottom
|
||||
+ (length<<1), comparator);
|
||||
middle = med3(array, middle - length, middle, middle + length, comparator);
|
||||
top = med3(array, top - (length<<1), top - length, top, comparator);
|
||||
}
|
||||
middle = med3(array, bottom, middle, top, comparator);
|
||||
}
|
||||
int partionValue = array[middle];
|
||||
int a, b, c, d;
|
||||
a = b = start;
|
||||
c = d = end - 1;
|
||||
while (true) {
|
||||
while (b <= c && !comparator.lessThan(partionValue, array[b])) {
|
||||
if (comparator.equals(array[b], partionValue)) {
|
||||
temp = array[a];
|
||||
array[a++] = array[b];
|
||||
array[b] = temp;
|
||||
}
|
||||
b++;
|
||||
}
|
||||
while (c >= b && !comparator.lessThan(array[c], partionValue)) {
|
||||
if (comparator.equals(array[c], partionValue)) {
|
||||
temp = array[c];
|
||||
array[c] = array[d];
|
||||
array[d--] = temp;
|
||||
}
|
||||
c--;
|
||||
}
|
||||
if (b > c) {
|
||||
break;
|
||||
}
|
||||
temp = array[b];
|
||||
array[b++] = array[c];
|
||||
array[c--] = temp;
|
||||
}
|
||||
length = a - start < b - a ? a - start : b - a;
|
||||
int l = start;
|
||||
int h = b - length;
|
||||
while (length-- > 0) {
|
||||
temp = array[l];
|
||||
array[l++] = array[h];
|
||||
array[h++] = temp;
|
||||
}
|
||||
length = d - c < end - 1 - d ? d - c : end - 1 - d;
|
||||
l = b;
|
||||
h = end - length;
|
||||
while (length-- > 0) {
|
||||
temp = array[l];
|
||||
array[l++] = array[h];
|
||||
array[h++] = temp;
|
||||
}
|
||||
if ((length = b - a) > 0) {
|
||||
sort(start, start + length, array, comparator);
|
||||
}
|
||||
if ((length = d - c) > 0) {
|
||||
sort(end - length, end, array, comparator);
|
||||
}
|
||||
}
|
||||
|
||||
private static int med3(int[] array, int a, int b, int c, IntComparator comparator) {
|
||||
int x = array[a], y = array[b], z = array[c];
|
||||
return comparator.lessThan(x, y) ? (comparator.lessThan(y, z) ? b : (comparator.lessThan(x, z) ? c : a))
|
||||
: (comparator.lessThan(z, y) ? b : (comparator.lessThan(z, x) ? c : a));
|
||||
}
|
||||
}
|
|
@ -0,0 +1,54 @@
|
|||
package org.apache.solr.util;
|
||||
|
||||
/**
|
||||
* Copyright 2004 The Apache Software Foundation
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import org.apache.lucene.util.LuceneTestCase;
|
||||
|
||||
import java.util.Arrays;
|
||||
import java.util.Random;
|
||||
|
||||
public class IntUtilsTest extends LuceneTestCase {
|
||||
Random r = newRandom();
|
||||
|
||||
public void testSort() {
|
||||
int maxSize = 100;
|
||||
int maxVal = 100;
|
||||
int[] a = new int[maxSize];
|
||||
int[] b = new int[maxSize];
|
||||
|
||||
PrimUtils.IntComparator comparator = new PrimUtils.IntComparator() {
|
||||
@Override
|
||||
public int compare(int a, int b) {
|
||||
return b - a; // sort in reverse
|
||||
}
|
||||
};
|
||||
|
||||
for (int iter=0; iter<100; iter++) {
|
||||
int start = r.nextInt(maxSize+1);
|
||||
int end = start==maxSize ? maxSize : start + r.nextInt(maxSize-start);
|
||||
for (int i=start; i<end; i++) {
|
||||
a[i] = b[i] = r.nextInt(maxVal);
|
||||
}
|
||||
PrimUtils.sort(start, end, a, comparator);
|
||||
Arrays.sort(b, start, end);
|
||||
for (int i=start; i<end; i++) {
|
||||
assertEquals(a[i], b[end-(i-start+1)]);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
}
|
Loading…
Reference in New Issue