mirror of https://github.com/apache/lucene.git
SOLR-6968: perf tweak: eliminate use of SPARSE storage option since it has some pathologically bad behavior for some set sizes (particularly when merging shard responses)
git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1679241 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
761df41537
commit
3c6f6678b1
|
@ -57,6 +57,7 @@ import org.apache.solr.search.SolrIndexSearcher;
|
|||
import org.apache.solr.search.SyntaxError;
|
||||
|
||||
import net.agkn.hll.HLL;
|
||||
import net.agkn.hll.HLLType;
|
||||
import com.google.common.hash.Hashing;
|
||||
import com.google.common.hash.HashFunction;
|
||||
|
||||
|
@ -727,7 +728,14 @@ public class StatsField {
|
|||
return hasher;
|
||||
}
|
||||
public HLL newHLL() {
|
||||
return new HLL(getLog2m(), getRegwidth());
|
||||
// Although it (in theory) saves memory for "medium" size sets, the SPARSE type seems to have
|
||||
// some nasty impacts on response time as it gets larger - particularly in distrib requests.
|
||||
// Merging large SPARSE HLLs is much much slower then merging FULL HLLs with the same num docs
|
||||
//
|
||||
// TODO: add more tunning options for this.
|
||||
return new HLL(getLog2m(), getRegwidth(), -1 /* auto explict threshold */,
|
||||
false /* no sparse representation */, HLLType.EMPTY);
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -35,6 +35,7 @@ import org.apache.solr.schema.*;
|
|||
import com.tdunning.math.stats.AVLTreeDigest;
|
||||
|
||||
import net.agkn.hll.HLL;
|
||||
import net.agkn.hll.HLLType;
|
||||
import com.google.common.hash.Hashing;
|
||||
import com.google.common.hash.HashFunction;
|
||||
|
||||
|
@ -139,7 +140,8 @@ abstract class AbstractStatsValues<T> implements StatsValues {
|
|||
* Hash function that must be used by implementations of {@link #hash}
|
||||
*/
|
||||
protected final HashFunction hasher;
|
||||
private final HLL hll;
|
||||
// if null, no HLL logic can be computed; not final because of "union" optimization (see below)
|
||||
private HLL hll;
|
||||
|
||||
// facetField facetValue
|
||||
protected Map<String,Map<String, StatsValues>> facets = new HashMap<>();
|
||||
|
@ -212,7 +214,17 @@ abstract class AbstractStatsValues<T> implements StatsValues {
|
|||
|
||||
if (computeCardinality) {
|
||||
byte[] data = (byte[]) stv.get("cardinality");
|
||||
hll.union(HLL.fromBytes(data));
|
||||
HLL other = HLL.fromBytes(data);
|
||||
if (hll.getType().equals(HLLType.EMPTY)) {
|
||||
// The HLL.union method goes out of it's way not to modify the "other" HLL.
|
||||
// Which means in the case of merging into an "EMPTY" HLL (garunteed to happen at
|
||||
// least once in every coordination of shard requests) it always clones all
|
||||
// of the internal storage -- but since we're going to throw "other" away after
|
||||
// the merge, this just means a short term doubling of RAM that we can skip.
|
||||
hll = other;
|
||||
} else {
|
||||
hll.union(other);
|
||||
}
|
||||
}
|
||||
|
||||
updateTypeSpecificStats(stv);
|
||||
|
|
Loading…
Reference in New Issue