mirror of https://github.com/apache/lucene.git
SOLR-2089: Faceting: order term ords before converting to values
git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@989406 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
4378ffce0b
commit
375afd1342
|
@ -270,6 +270,12 @@ Optimizations
|
||||||
for the first facet request is anywhere from 30% to 32x, depending on how many
|
for the first facet request is anywhere from 30% to 32x, depending on how many
|
||||||
terms are in the field and how many documents match per term. (yonik)
|
terms are in the field and how many documents match per term. (yonik)
|
||||||
|
|
||||||
|
* SOLR-2089: Speed up UnInvertedField faceting (facet.method=fc for
|
||||||
|
multi-valued fields) when facet.limit is both high, and a high enough
|
||||||
|
percentage of the number of unique terms in the field. Extreme cases
|
||||||
|
yield speedups over 3x. (yonik)
|
||||||
|
|
||||||
|
|
||||||
Bug Fixes
|
Bug Fixes
|
||||||
----------------------
|
----------------------
|
||||||
|
|
||||||
|
|
|
@ -37,6 +37,7 @@ import org.apache.solr.core.SolrCore;
|
||||||
import org.apache.solr.schema.FieldType;
|
import org.apache.solr.schema.FieldType;
|
||||||
import org.apache.solr.schema.TrieField;
|
import org.apache.solr.schema.TrieField;
|
||||||
import org.apache.solr.search.*;
|
import org.apache.solr.search.*;
|
||||||
|
import org.apache.solr.util.PrimUtils;
|
||||||
import org.apache.solr.util.BoundedTreeSet;
|
import org.apache.solr.util.BoundedTreeSet;
|
||||||
import org.apache.solr.handler.component.StatsValues;
|
import org.apache.solr.handler.component.StatsValues;
|
||||||
import org.apache.solr.handler.component.FieldFacetStats;
|
import org.apache.solr.handler.component.FieldFacetStats;
|
||||||
|
@ -584,7 +585,7 @@ public class UnInvertedField {
|
||||||
// important if a lot of the counts are repeated (like zero counts would be).
|
// important if a lot of the counts are repeated (like zero counts would be).
|
||||||
|
|
||||||
// minimize object creation and speed comparison by creating a long that
|
// minimize object creation and speed comparison by creating a long that
|
||||||
// encompases both count and term number.
|
// encompasses both count and term number.
|
||||||
// Since smaller values are kept in the TreeSet, make higher counts smaller.
|
// Since smaller values are kept in the TreeSet, make higher counts smaller.
|
||||||
//
|
//
|
||||||
// for equal counts, lower term numbers
|
// for equal counts, lower term numbers
|
||||||
|
@ -597,15 +598,41 @@ public class UnInvertedField {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
// now select the right page from the results
|
// now select the right page from the results
|
||||||
|
|
||||||
|
|
||||||
|
final int[] tnums = new int[Math.min(queue.size()-off, lim)];
|
||||||
|
final int[] indirect = counts; // reuse the counts array for the index into the tnums array
|
||||||
|
int tnumCount = 0;
|
||||||
|
|
||||||
for (Long p : queue) {
|
for (Long p : queue) {
|
||||||
if (--off>=0) continue;
|
if (--off>=0) continue;
|
||||||
if (--lim<0) break;
|
if (--lim<0) break;
|
||||||
int c = -(int)(p.longValue() >>> 32);
|
int c = -(int)(p.longValue() >>> 32);
|
||||||
//int tnum = 0x7fffffff - (int)p.longValue(); // use if priority queue
|
//int tnum = 0x7fffffff - (int)p.longValue(); // use if priority queue
|
||||||
int tnum = (int)p.longValue();
|
int tnum = (int)p.longValue();
|
||||||
String label = getReadableValue(getTermValue(te, tnum), ft, spare);
|
indirect[tnumCount] = tnumCount;
|
||||||
res.add(label, c);
|
tnums[tnumCount++] = tnum;
|
||||||
|
// String label = ft.indexedToReadable(getTermText(te, tnum));
|
||||||
|
// add a null label for now... we'll fill it in later.
|
||||||
|
res.add(null, c);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// now sort the indexes by the term numbers
|
||||||
|
PrimUtils.sort(0, tnumCount, indirect, new PrimUtils.IntComparator() {
|
||||||
|
@Override
|
||||||
|
public int compare(int a, int b) {
|
||||||
|
return tnums[a] - tnums[b];
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
// convert the term numbers to term values and set as the label
|
||||||
|
for (int i=0; i<tnumCount; i++) {
|
||||||
|
int idx = indirect[i];
|
||||||
|
int tnum = tnums[idx];
|
||||||
|
String label = getReadableValue(getTermValue(te, tnum), ft, spare);
|
||||||
|
res.setName(idx, label);
|
||||||
|
}
|
||||||
|
|
||||||
} else {
|
} else {
|
||||||
// add results in index order
|
// add results in index order
|
||||||
int i=startTerm;
|
int i=startTerm;
|
||||||
|
|
|
@ -0,0 +1,122 @@
|
||||||
|
package org.apache.solr.util;
|
||||||
|
/**
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||||
|
* contributor license agreements. See the NOTICE file distributed with
|
||||||
|
* this work for additional information regarding copyright ownership.
|
||||||
|
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||||
|
* (the "License"); you may not use this file except in compliance with
|
||||||
|
* the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
/** Utilities for primitive Java data types. */
|
||||||
|
public class PrimUtils {
|
||||||
|
|
||||||
|
public static abstract class IntComparator {
|
||||||
|
public abstract int compare(int a, int b);
|
||||||
|
public boolean lessThan(int a, int b) {
|
||||||
|
return compare(a,b) < 0;
|
||||||
|
}
|
||||||
|
public boolean equals(int a, int b) {
|
||||||
|
return compare(a,b) == 0;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/** Sort the integer array from "start" inclusive to "end" exclusive in ascending order,
|
||||||
|
* using the provided comparator.
|
||||||
|
*/
|
||||||
|
public static void sort(int start, int end, int[] array, IntComparator comparator) {
|
||||||
|
// This code was copied from Apache Harmony's Arrays.sort(double[]) and modified
|
||||||
|
// to use a comparator, in addition to other small efficiency enhancements
|
||||||
|
// like replacing divisions with shifts.
|
||||||
|
|
||||||
|
int temp;
|
||||||
|
int length = end - start;
|
||||||
|
if (length < 7) {
|
||||||
|
for (int i = start + 1; i < end; i++) {
|
||||||
|
for (int j = i; j > start && comparator.lessThan(array[j], array[j - 1]); j--) {
|
||||||
|
temp = array[j];
|
||||||
|
array[j] = array[j - 1];
|
||||||
|
array[j - 1] = temp;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
int middle = (start + end) >>> 1;
|
||||||
|
if (length > 7) {
|
||||||
|
int bottom = start;
|
||||||
|
int top = end - 1;
|
||||||
|
if (length > 40) {
|
||||||
|
length >>= 3;
|
||||||
|
bottom = med3(array, bottom, bottom + length, bottom
|
||||||
|
+ (length<<1), comparator);
|
||||||
|
middle = med3(array, middle - length, middle, middle + length, comparator);
|
||||||
|
top = med3(array, top - (length<<1), top - length, top, comparator);
|
||||||
|
}
|
||||||
|
middle = med3(array, bottom, middle, top, comparator);
|
||||||
|
}
|
||||||
|
int partionValue = array[middle];
|
||||||
|
int a, b, c, d;
|
||||||
|
a = b = start;
|
||||||
|
c = d = end - 1;
|
||||||
|
while (true) {
|
||||||
|
while (b <= c && !comparator.lessThan(partionValue, array[b])) {
|
||||||
|
if (comparator.equals(array[b], partionValue)) {
|
||||||
|
temp = array[a];
|
||||||
|
array[a++] = array[b];
|
||||||
|
array[b] = temp;
|
||||||
|
}
|
||||||
|
b++;
|
||||||
|
}
|
||||||
|
while (c >= b && !comparator.lessThan(array[c], partionValue)) {
|
||||||
|
if (comparator.equals(array[c], partionValue)) {
|
||||||
|
temp = array[c];
|
||||||
|
array[c] = array[d];
|
||||||
|
array[d--] = temp;
|
||||||
|
}
|
||||||
|
c--;
|
||||||
|
}
|
||||||
|
if (b > c) {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
temp = array[b];
|
||||||
|
array[b++] = array[c];
|
||||||
|
array[c--] = temp;
|
||||||
|
}
|
||||||
|
length = a - start < b - a ? a - start : b - a;
|
||||||
|
int l = start;
|
||||||
|
int h = b - length;
|
||||||
|
while (length-- > 0) {
|
||||||
|
temp = array[l];
|
||||||
|
array[l++] = array[h];
|
||||||
|
array[h++] = temp;
|
||||||
|
}
|
||||||
|
length = d - c < end - 1 - d ? d - c : end - 1 - d;
|
||||||
|
l = b;
|
||||||
|
h = end - length;
|
||||||
|
while (length-- > 0) {
|
||||||
|
temp = array[l];
|
||||||
|
array[l++] = array[h];
|
||||||
|
array[h++] = temp;
|
||||||
|
}
|
||||||
|
if ((length = b - a) > 0) {
|
||||||
|
sort(start, start + length, array, comparator);
|
||||||
|
}
|
||||||
|
if ((length = d - c) > 0) {
|
||||||
|
sort(end - length, end, array, comparator);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private static int med3(int[] array, int a, int b, int c, IntComparator comparator) {
|
||||||
|
int x = array[a], y = array[b], z = array[c];
|
||||||
|
return comparator.lessThan(x, y) ? (comparator.lessThan(y, z) ? b : (comparator.lessThan(x, z) ? c : a))
|
||||||
|
: (comparator.lessThan(z, y) ? b : (comparator.lessThan(z, x) ? c : a));
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,54 @@
|
||||||
|
package org.apache.solr.util;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Copyright 2004 The Apache Software Foundation
|
||||||
|
*
|
||||||
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
* you may not use this file except in compliance with the License.
|
||||||
|
* You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
import org.apache.lucene.util.LuceneTestCase;
|
||||||
|
|
||||||
|
import java.util.Arrays;
|
||||||
|
import java.util.Random;
|
||||||
|
|
||||||
|
public class IntUtilsTest extends LuceneTestCase {
|
||||||
|
Random r = newRandom();
|
||||||
|
|
||||||
|
public void testSort() {
|
||||||
|
int maxSize = 100;
|
||||||
|
int maxVal = 100;
|
||||||
|
int[] a = new int[maxSize];
|
||||||
|
int[] b = new int[maxSize];
|
||||||
|
|
||||||
|
PrimUtils.IntComparator comparator = new PrimUtils.IntComparator() {
|
||||||
|
@Override
|
||||||
|
public int compare(int a, int b) {
|
||||||
|
return b - a; // sort in reverse
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
for (int iter=0; iter<100; iter++) {
|
||||||
|
int start = r.nextInt(maxSize+1);
|
||||||
|
int end = start==maxSize ? maxSize : start + r.nextInt(maxSize-start);
|
||||||
|
for (int i=start; i<end; i++) {
|
||||||
|
a[i] = b[i] = r.nextInt(maxVal);
|
||||||
|
}
|
||||||
|
PrimUtils.sort(start, end, a, comparator);
|
||||||
|
Arrays.sort(b, start, end);
|
||||||
|
for (int i=start; i<end; i++) {
|
||||||
|
assertEquals(a[i], b[end-(i-start+1)]);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
Loading…
Reference in New Issue