SOLR-6968: perf tweak: eliminate use of SPARSE storage option since it has some pathologically bad behavior for some set sizes (particularly when merging shard responses)

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1679241 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Chris M. Hostetter 2015-05-13 16:54:46 +00:00
parent 761df41537
commit 3c6f6678b1
2 changed files with 23 additions and 3 deletions

View File

@ -57,6 +57,7 @@ import org.apache.solr.search.SolrIndexSearcher;
import org.apache.solr.search.SyntaxError;
import net.agkn.hll.HLL;
import net.agkn.hll.HLLType;
import com.google.common.hash.Hashing;
import com.google.common.hash.HashFunction;
@ -727,7 +728,14 @@ public class StatsField {
return hasher;
}
public HLL newHLL() {
return new HLL(getLog2m(), getRegwidth());
// Although it (in theory) saves memory for "medium" size sets, the SPARSE type seems to have
// some nasty impacts on response time as it gets larger - particularly in distrib requests.
// Merging large SPARSE HLLs is much much slower then merging FULL HLLs with the same num docs
//
// TODO: add more tunning options for this.
return new HLL(getLog2m(), getRegwidth(), -1 /* auto explict threshold */,
false /* no sparse representation */, HLLType.EMPTY);
}
}

View File

@ -35,6 +35,7 @@ import org.apache.solr.schema.*;
import com.tdunning.math.stats.AVLTreeDigest;
import net.agkn.hll.HLL;
import net.agkn.hll.HLLType;
import com.google.common.hash.Hashing;
import com.google.common.hash.HashFunction;
@ -139,7 +140,8 @@ abstract class AbstractStatsValues<T> implements StatsValues {
* Hash function that must be used by implementations of {@link #hash}
*/
protected final HashFunction hasher;
private final HLL hll;
// if null, no HLL logic can be computed; not final because of "union" optimization (see below)
private HLL hll;
// facetField facetValue
protected Map<String,Map<String, StatsValues>> facets = new HashMap<>();
@ -212,7 +214,17 @@ abstract class AbstractStatsValues<T> implements StatsValues {
if (computeCardinality) {
byte[] data = (byte[]) stv.get("cardinality");
hll.union(HLL.fromBytes(data));
HLL other = HLL.fromBytes(data);
if (hll.getType().equals(HLLType.EMPTY)) {
// The HLL.union method goes out of it's way not to modify the "other" HLL.
// Which means in the case of merging into an "EMPTY" HLL (garunteed to happen at
// least once in every coordination of shard requests) it always clones all
// of the internal storage -- but since we're going to throw "other" away after
// the merge, this just means a short term doubling of RAM that we can skip.
hll = other;
} else {
hll.union(other);
}
}
updateTypeSpecificStats(stv);