SOLR-9981: Performance improvements and bug fixes for the Analytics component

This commit is contained in:
Dennis Gove 2017-06-24 20:22:21 -04:00
parent 3b07e7241e
commit a5dce163eb
18 changed files with 163 additions and 33 deletions

View File

@ -219,6 +219,12 @@ Optimizations
* SOLR-10727: Avoid polluting the filter cache for certain types of faceting (typically ranges) when
the base docset is empty. (David Smiley)
* SOLR-9981: Performance improvements and bug fixes for the Analytics component. Performance fix that
stops the reading of ALL lucene segments over and again for each stats collector. The AtomicReaderContext
that refers to the "current " segment is reused. This fix shows an improvement of about 25% in query
time for a dataset of ~10M (=9.8M) records. Given the nature of the fix, the improvement should get
better as the dataset increases. Fix for the NPE during comparison (Houston Putman)
Other Changes
----------------------
* SOLR-10236: Removed FieldType.getNumericType(). Use getNumberType() instead. (Tomás Fernández Löbbe)

View File

@ -28,7 +28,7 @@ import java.util.List;
import java.util.Map;
import java.util.Map.Entry;
import java.util.Set;
import java.util.TreeMap;
import java.util.HashMap;
import com.google.common.collect.Iterables;
import org.apache.lucene.index.LeafReaderContext;
@ -98,7 +98,7 @@ public class FacetingAccumulator extends BasicAccumulator implements FacetValueA
List<RangeFacetRequest> rangeFreqs = request.getRangeFacets();
List<QueryFacetRequest> queryFreqs = request.getQueryFacets();
this.fieldFacetExpressions = new TreeMap<>();
this.fieldFacetExpressions = new HashMap<>();
this.rangeFacetExpressions = new LinkedHashMap<>(rangeFreqs.size());
this.queryFacetExpressions = new LinkedHashMap<>(queryFreqs.size());
this.fieldFacetCollectors = new LinkedHashMap<>(fieldFreqs.size());
@ -120,8 +120,8 @@ public class FacetingAccumulator extends BasicAccumulator implements FacetValueA
final SchemaField ff = fr.getField();
final FieldFacetAccumulator facc = FieldFacetAccumulator.create(searcher, this, ff);
facetAccumulators.add(facc);
fieldFacetExpressions.put(freq.getName(), new TreeMap<String, Expression[]>() );
fieldFacetCollectors.put(freq.getName(), new TreeMap<String,StatsCollector[]>());
fieldFacetExpressions.put(freq.getName(), new HashMap<String, Expression[]>() );
fieldFacetCollectors.put(freq.getName(), new HashMap<String,StatsCollector[]>());
}
/**
* For each range and query facet request add a bucket to the corresponding
@ -299,6 +299,22 @@ public class FacetingAccumulator extends BasicAccumulator implements FacetValueA
@Override
public int compare(Entry<String,Expression[]> o1, Entry<String,Expression[]> o2) {
// Handle nulls. Null is treated as an infinitely big number so that in case of ASCENDING sorts,
// Nulls will appear last. In case of DESC sorts, Nulls will appear last.
boolean firstIsNull = false;
if (o1 == null || o1.getValue() == null || o1.getValue()[comparatorExpressionPlace] == null)
firstIsNull = true;
boolean secondIsNull = false;
if (o2 == null || o2.getValue() == null || o2.getValue()[comparatorExpressionPlace] == null)
secondIsNull = true;
if (firstIsNull && secondIsNull)
return 0;
else if (firstIsNull)
return 1;
else if (secondIsNull)
return -1;
return comp.compare(o1.getValue()[comparatorExpressionPlace], o2.getValue()[comparatorExpressionPlace]);
}
}

View File

@ -29,10 +29,19 @@ public abstract class Expression {
public Comparator<Expression> comparator(final FacetSortDirection direction) {
return (a, b) -> {
if( direction == FacetSortDirection.ASCENDING ){
return a.getValue().compareTo(b.getValue());
boolean aIsNull = a.getValue() == null;
boolean bIsNull = b.getValue() == null;
if (aIsNull && bIsNull) return 0;
if( direction == FacetSortDirection.ASCENDING ){ // nulls are last for ASC sort
return aIsNull ? 1
: bIsNull ? -1
: a.getValue().compareTo(b.getValue());
} else {
return b.getValue().compareTo(a.getValue());
return aIsNull ? -1
: bIsNull ? 1
: b.getValue().compareTo(a.getValue());
}
};
}

View File

@ -37,19 +37,32 @@ public class MinMaxStatsCollector implements StatsCollector{
protected MutableValue value;
protected final Set<String> statsList;
protected final ValueSource source;
protected FunctionValues function;
protected ValueFiller valueFiller;
private CollectorState state;
public MinMaxStatsCollector(ValueSource source, Set<String> statsList) {
public MinMaxStatsCollector(ValueSource source, Set<String> statsList, CollectorState state) {
this.source = source;
this.statsList = statsList;
this.state = state;
}
public void setNextReader(LeafReaderContext context) throws IOException {
function = source.getValues(null, context);
valueFiller = function.getValueFiller();
state.setNextReader(source, context);
valueFiller = state.function.getValueFiller();
value = valueFiller.getValue();
}
public static class CollectorState {
FunctionValues function;
LeafReaderContext context = null;
public void setNextReader(ValueSource source, LeafReaderContext context) throws IOException {
if (this.context != context) {
this.context = context;
this.function = source.getValues(null, context);
}
}
}
public void collect(int doc) throws IOException {
valueFiller.fillValue(doc);
@ -101,7 +114,7 @@ public class MinMaxStatsCollector implements StatsCollector{
@Override
public FunctionValues getFunction() {
return function;
return state.function;
}
public String valueSourceString() {

View File

@ -29,14 +29,16 @@ public class NumericStatsCollector extends MinMaxStatsCollector {
protected double sumOfSquares = 0;
protected double mean = 0;
protected double stddev = 0;
protected CollectorState state;
public NumericStatsCollector(ValueSource source, Set<String> statsList) {
super(source, statsList);
public NumericStatsCollector(ValueSource source, Set<String> statsList, CollectorState state) {
super(source, statsList, state);
this.state = state;
}
public void collect(int doc) throws IOException {
super.collect(doc);
double value = function.doubleVal(doc);
double value = state.function.doubleVal(doc);
sum += value;
sumOfSquares += (value * value);
}

View File

@ -33,6 +33,7 @@ import org.apache.lucene.queries.function.valuesource.IntFieldSource;
import org.apache.lucene.queries.function.valuesource.LongFieldSource;
import org.apache.solr.analytics.expression.ExpressionFactory;
import org.apache.solr.analytics.request.ExpressionRequest;
import org.apache.solr.analytics.statistics.MinMaxStatsCollector.CollectorState;
import org.apache.solr.analytics.util.AnalyticsParams;
import org.apache.solr.analytics.util.valuesource.AbsoluteValueDoubleFunction;
import org.apache.solr.analytics.util.valuesource.AddDoubleFunction;
@ -213,25 +214,32 @@ public class StatsCollectorSupplierFactory {
}
}
}
final CollectorState states[] = new CollectorState[statsArr.length];
for (int count = 0; count < statsArr.length; count++) {
states[count] = new CollectorState();
}
// Making the Supplier
return new Supplier<StatsCollector[]>() {
private final CollectorState collectorState[] = states;
public StatsCollector[] get() {
StatsCollector[] collectors = new StatsCollector[statsArr.length];
for (int count = 0; count < statsArr.length; count++) {
if(numericBools[count]){
StatsCollector sc = new NumericStatsCollector(sourceArr[count], statsArr[count]);
StatsCollector sc = new NumericStatsCollector(sourceArr[count], statsArr[count], collectorState[count]);
if(uniqueBools[count]) sc = new UniqueStatsCollector(sc);
if(medianBools[count]) sc = new MedianStatsCollector(sc);
if(percsArr[count]!=null) sc = new PercentileStatsCollector(sc,percsArr[count],percsNames[count]);
collectors[count]=sc;
} else if (dateBools[count]) {
StatsCollector sc = new MinMaxStatsCollector(sourceArr[count], statsArr[count]);
StatsCollector sc = new MinMaxStatsCollector(sourceArr[count], statsArr[count], collectorState[count]);
if(uniqueBools[count]) sc = new UniqueStatsCollector(sc);
if(medianBools[count]) sc = new DateMedianStatsCollector(sc);
if(percsArr[count]!=null) sc = new PercentileStatsCollector(sc,percsArr[count],percsNames[count]);
collectors[count]=sc;
} else {
StatsCollector sc = new MinMaxStatsCollector(sourceArr[count], statsArr[count]);
StatsCollector sc = new MinMaxStatsCollector(sourceArr[count], statsArr[count], collectorState[count]);
if(uniqueBools[count]) sc = new UniqueStatsCollector(sc);
if(medianBools[count]) sc = new MedianStatsCollector(sc);
if(percsArr[count]!=null) sc = new PercentileStatsCollector(sc,percsArr[count],percsNames[count]);

View File

@ -0,0 +1,4 @@
o.ar.s.min=min(double_dd)
o.ar.s.max=max(long_ld)
o.ar.ff=string_sd
o.ar.ff.string_sd.sortstatistic=min

View File

@ -0,0 +1,14 @@
<?xml version="1.0" encoding="UTF-8"?>
<analyticsRequestEnvelope stats="true" olap="true">
<analyticsRequest>
<name>MinMax Request</name>
<statistic>
<expression>min(double(double_dd))</expression>
<name>min</name>
</statistic>
<statistic>
<expression>max(long(long_ld))</expression>
<name>max</name>
</statistic>
</analyticsRequest>
</analyticsRequestEnvelope>

View File

@ -60,7 +60,7 @@ public class NoFacetTest extends AbstractAnalyticsStatsTest {
@BeforeClass
public static void beforeClass() throws Exception {
initCore("solrconfig-basic.xml","schema-analytics.xml");
initCore("solrconfig-analytics.xml","schema-analytics.xml");
h.update("<delete><query>*:*</query></delete>");
defaults.put("int_id", new Integer(0));
defaults.put("long_ld", new Long(0));

View File

@ -48,7 +48,7 @@ public class ExpressionTest extends AbstractAnalyticsStatsTest {
@BeforeClass
public static void beforeClass() throws Exception {
initCore("solrconfig-basic.xml", "schema-analytics.xml");
initCore("solrconfig-analytics.xml", "schema-analytics.xml");
h.update("<delete><query>*:*</query></delete>");
for (int j = 0; j < NUM_LOOPS; ++j) {

View File

@ -312,4 +312,19 @@ public class AbstractAnalyticsFacetTest extends SolrTestCaseJ4 {
IOUtils.closeWhileHandlingException(file, in);
}
}
protected void removeNodes(String xPath, List<Double> string) throws XPathExpressionException {
NodeList missingNodes = getNodes(xPath);
List<Double> result = new ArrayList<Double>();
for (int idx = 0; idx < missingNodes.getLength(); ++idx) {
result.add(Double.parseDouble(missingNodes.item(idx).getTextContent()));
}
string.removeAll(result);
}
protected NodeList getNodes(String xPath) throws XPathExpressionException {
StringBuilder sb = new StringBuilder(xPath);
return (NodeList) xPathFact.newXPath().compile(sb.toString()).evaluate(doc, XPathConstants.NODESET);
}
}

View File

@ -0,0 +1,40 @@
package org.apache.solr.analytics.facet;
import org.apache.lucene.util.LuceneTestCase.SuppressCodecs;
import org.apache.solr.analytics.AbstractAnalyticsStatsTest;
import org.apache.solr.analytics.expression.ExpressionTest;
import org.junit.BeforeClass;
import org.junit.Test;
@SuppressCodecs({"Lucene3x","Lucene40","Lucene41","Lucene42","Appending","Asserting"})
public class FacetSortingTest extends AbstractAnalyticsStatsTest {
private static String fileName = "/analytics/requestFiles/facetSorting.txt";
@BeforeClass
public static void beforeClass() throws Exception {
initCore("solrconfig-analytics.xml", "schema-analytics.xml");
h.update("<delete><query>*:*</query></delete>");
// The data set below is so generated that in bucket corresponding fieldFacet B, double_dd column has null values
// and in bucket C corresponding to fieldFacet C has null values for column long_ld.
// FieldFaceting occurs on string_sd field
assertU(adoc("id", "1001", "string_sd", "A", "double_dd", "" + 3, "long_ld", "" + 1));
assertU(adoc("id", "1002", "string_sd", "A", "double_dd", "" + 25, "long_ld", "" + 2));
assertU(adoc("id", "1003", "string_sd", "B", "long_ld", "" + 3));
assertU(adoc("id", "1004", "string_sd", "B", "long_ld", "" + 4));
assertU(adoc("id", "1005", "string_sd", "C", "double_dd", "" + 17));
assertU(commit());
String response = h.query(request(fileToStringArr(ExpressionTest.class, fileName)));
System.out.println("Response=" + response);
setResponse(response);
}
@Test
public void addTest() throws Exception {
Double minResult = (Double) getStatResult("ar", "min", VAL_TYPE.DOUBLE);
Long maxResult = (Long) getStatResult("ar", "max", VAL_TYPE.LONG);
assertEquals(Double.valueOf(minResult), Double.valueOf(3.0));
assertEquals(Long.valueOf(maxResult),Long.valueOf(4));
}
}

View File

@ -44,7 +44,7 @@ public class FieldFacetExtrasTest extends AbstractAnalyticsFacetTest {
@BeforeClass
public static void beforeClass() throws Exception {
initCore("solrconfig-basic.xml","schema-analytics.xml");
initCore("solrconfig-analytics.xml","schema-analytics.xml");
h.update("<delete><query>*:*</query></delete>");
//INT

View File

@ -24,6 +24,7 @@ import java.util.List;
import org.junit.Assert;
import org.junit.BeforeClass;
import org.junit.Test;
import org.w3c.dom.Node;
public class FieldFacetTest extends AbstractAnalyticsFacetTest{
@ -87,7 +88,7 @@ public class FieldFacetTest extends AbstractAnalyticsFacetTest{
@BeforeClass
public static void beforeClass() throws Exception {
initCore("solrconfig-basic.xml","schema-analytics.xml");
initCore("solrconfig-analytics.xml","schema-analytics.xml");
h.update("<delete><query>*:*</query></delete>");
defaults.put("int", new Integer(0));
@ -1037,31 +1038,33 @@ public class FieldFacetTest extends AbstractAnalyticsFacetTest{
public void missingFacetTest() throws Exception {
//int MultiDate
String xPath = "/response/lst[@name='stats']/lst[@name='missingf']/lst[@name='fieldFacets']/lst[@name='date_dtdm']/lst[@name='(MISSING)']";
assertNotNull(getRawResponse(), getNode(xPath));
Node missingNodeXPath = getNode(xPath);
assertNotNull(getRawResponse(), missingNodeXPath);
ArrayList<Double> string = getDoubleList("missingf", "fieldFacets", "date_dtdm", "double", "mean");
string.remove(0);
super.removeNodes(xPath, string);
ArrayList<Double> stringTest = calculateNumberStat(multiDateTestStart, "mean");
assertEquals(getRawResponse(), string,stringTest);
//Int String
xPath = "/response/lst[@name='stats']/lst[@name='missingf']/lst[@name='fieldFacets']/lst[@name='string_sd']/lst[@name='(MISSING)']";
assertNotNull(getRawResponse(), getNode(xPath));
missingNodeXPath = getNode(xPath);
String missingNodeXPathStr = xPath;
assertNotNull(getRawResponse(), missingNodeXPath);
xPath = "/response/lst[@name='stats']/lst[@name='missingf']/lst[@name='fieldFacets']/lst[@name='string_sd']/lst[@name='str0']";
assertNull(getRawResponse(), getNode(xPath));
List<Double> intString = getDoubleList("missingf", "fieldFacets", "string_sd", "double", "mean");
intString.remove(0);
removeNodes(missingNodeXPathStr, intString);
ArrayList<Double> intStringTest = calculateNumberStat(intStringTestStart, "mean");
assertEquals(getRawResponse(), intString,intStringTest);
//Int Date
Collection<Double> intDate = getDoubleList("missingf", "fieldFacets", "date_dtd", "double", "mean");
ArrayList<ArrayList<Double>> intDateMissingTestStart = (ArrayList<ArrayList<Double>>) intDateTestStart.clone();
ArrayList<Double> intDateTest = calculateNumberStat(intDateMissingTestStart, "mean");
assertEquals(getRawResponse(),intDate,intDateTest);
}
private void checkStddevs(ArrayList<Double> list1, ArrayList<Double> list2) {

View File

@ -35,7 +35,7 @@ public class QueryFacetTest extends AbstractAnalyticsFacetTest {
@BeforeClass
public static void beforeClass() throws Exception {
initCore("solrconfig-basic.xml","schema-analytics.xml");
initCore("solrconfig-analytics.xml","schema-analytics.xml");
}
@SuppressWarnings("unchecked")

View File

@ -46,7 +46,7 @@ public class RangeFacetTest extends AbstractAnalyticsFacetTest {
@BeforeClass
public static void beforeClass() throws Exception {
initCore("solrconfig-basic.xml","schema-analytics.xml");
initCore("solrconfig-analytics.xml","schema-analytics.xml");
h.update("<delete><query>*:*</query></delete>");
//INT

View File

@ -35,7 +35,7 @@ public class FunctionTest extends AbstractAnalyticsStatsTest {
@BeforeClass
public static void beforeClass() throws Exception {
initCore("solrconfig-basic.xml","schema-analytics.xml");
initCore("solrconfig-analytics.xml","schema-analytics.xml");
h.update("<delete><query>*:*</query></delete>");
for (int j = 0; j < NUM_LOOPS; ++j) {