Revert "SOLR-9981: Performance improvements and bug fixes for the Analytics component"

This reverts commit a5dce163eb.
This commit is contained in:
Dennis Gove 2017-06-26 13:51:28 -04:00
parent 288ab09ceb
commit bee8d7ccb3
18 changed files with 33 additions and 163 deletions

View File

@ -227,12 +227,6 @@ Optimizations
* SOLR-10727: Avoid polluting the filter cache for certain types of faceting (typically ranges) when * SOLR-10727: Avoid polluting the filter cache for certain types of faceting (typically ranges) when
the base docset is empty. (David Smiley) the base docset is empty. (David Smiley)
* SOLR-9981: Performance improvements and bug fixes for the Analytics component. Performance fix that
stops the reading of ALL lucene segments over and again for each stats collector. The AtomicReaderContext
that refers to the "current " segment is reused. This fix shows an improvement of about 25% in query
time for a dataset of ~10M (=9.8M) records. Given the nature of the fix, the improvement should get
better as the dataset increases. Fix for the NPE during comparison (Houston Putman)
Other Changes Other Changes
---------------------- ----------------------
* SOLR-10236: Removed FieldType.getNumericType(). Use getNumberType() instead. (Tomás Fernández Löbbe) * SOLR-10236: Removed FieldType.getNumericType(). Use getNumberType() instead. (Tomás Fernández Löbbe)

View File

@ -28,7 +28,7 @@ import java.util.List;
import java.util.Map; import java.util.Map;
import java.util.Map.Entry; import java.util.Map.Entry;
import java.util.Set; import java.util.Set;
import java.util.HashMap; import java.util.TreeMap;
import com.google.common.collect.Iterables; import com.google.common.collect.Iterables;
import org.apache.lucene.index.LeafReaderContext; import org.apache.lucene.index.LeafReaderContext;
@ -98,7 +98,7 @@ public class FacetingAccumulator extends BasicAccumulator implements FacetValueA
List<RangeFacetRequest> rangeFreqs = request.getRangeFacets(); List<RangeFacetRequest> rangeFreqs = request.getRangeFacets();
List<QueryFacetRequest> queryFreqs = request.getQueryFacets(); List<QueryFacetRequest> queryFreqs = request.getQueryFacets();
this.fieldFacetExpressions = new HashMap<>(); this.fieldFacetExpressions = new TreeMap<>();
this.rangeFacetExpressions = new LinkedHashMap<>(rangeFreqs.size()); this.rangeFacetExpressions = new LinkedHashMap<>(rangeFreqs.size());
this.queryFacetExpressions = new LinkedHashMap<>(queryFreqs.size()); this.queryFacetExpressions = new LinkedHashMap<>(queryFreqs.size());
this.fieldFacetCollectors = new LinkedHashMap<>(fieldFreqs.size()); this.fieldFacetCollectors = new LinkedHashMap<>(fieldFreqs.size());
@ -120,8 +120,8 @@ public class FacetingAccumulator extends BasicAccumulator implements FacetValueA
final SchemaField ff = fr.getField(); final SchemaField ff = fr.getField();
final FieldFacetAccumulator facc = FieldFacetAccumulator.create(searcher, this, ff); final FieldFacetAccumulator facc = FieldFacetAccumulator.create(searcher, this, ff);
facetAccumulators.add(facc); facetAccumulators.add(facc);
fieldFacetExpressions.put(freq.getName(), new HashMap<String, Expression[]>() ); fieldFacetExpressions.put(freq.getName(), new TreeMap<String, Expression[]>() );
fieldFacetCollectors.put(freq.getName(), new HashMap<String,StatsCollector[]>()); fieldFacetCollectors.put(freq.getName(), new TreeMap<String,StatsCollector[]>());
} }
/** /**
* For each range and query facet request add a bucket to the corresponding * For each range and query facet request add a bucket to the corresponding
@ -299,22 +299,6 @@ public class FacetingAccumulator extends BasicAccumulator implements FacetValueA
@Override @Override
public int compare(Entry<String,Expression[]> o1, Entry<String,Expression[]> o2) { public int compare(Entry<String,Expression[]> o1, Entry<String,Expression[]> o2) {
// Handle nulls. Null is treated as an infinitely big number so that in case of ASCENDING sorts,
// Nulls will appear last. In case of DESC sorts, Nulls will appear last.
boolean firstIsNull = false;
if (o1 == null || o1.getValue() == null || o1.getValue()[comparatorExpressionPlace] == null)
firstIsNull = true;
boolean secondIsNull = false;
if (o2 == null || o2.getValue() == null || o2.getValue()[comparatorExpressionPlace] == null)
secondIsNull = true;
if (firstIsNull && secondIsNull)
return 0;
else if (firstIsNull)
return 1;
else if (secondIsNull)
return -1;
return comp.compare(o1.getValue()[comparatorExpressionPlace], o2.getValue()[comparatorExpressionPlace]); return comp.compare(o1.getValue()[comparatorExpressionPlace], o2.getValue()[comparatorExpressionPlace]);
} }
} }

View File

@ -29,19 +29,10 @@ public abstract class Expression {
public Comparator<Expression> comparator(final FacetSortDirection direction) { public Comparator<Expression> comparator(final FacetSortDirection direction) {
return (a, b) -> { return (a, b) -> {
boolean aIsNull = a.getValue() == null; if( direction == FacetSortDirection.ASCENDING ){
boolean bIsNull = b.getValue() == null; return a.getValue().compareTo(b.getValue());
if (aIsNull && bIsNull) return 0;
if( direction == FacetSortDirection.ASCENDING ){ // nulls are last for ASC sort
return aIsNull ? 1
: bIsNull ? -1
: a.getValue().compareTo(b.getValue());
} else { } else {
return aIsNull ? -1 return b.getValue().compareTo(a.getValue());
: bIsNull ? 1
: b.getValue().compareTo(a.getValue());
} }
}; };
} }

View File

@ -37,32 +37,19 @@ public class MinMaxStatsCollector implements StatsCollector{
protected MutableValue value; protected MutableValue value;
protected final Set<String> statsList; protected final Set<String> statsList;
protected final ValueSource source; protected final ValueSource source;
protected FunctionValues function;
protected ValueFiller valueFiller; protected ValueFiller valueFiller;
private CollectorState state;
public MinMaxStatsCollector(ValueSource source, Set<String> statsList, CollectorState state) { public MinMaxStatsCollector(ValueSource source, Set<String> statsList) {
this.source = source; this.source = source;
this.statsList = statsList; this.statsList = statsList;
this.state = state;
} }
public void setNextReader(LeafReaderContext context) throws IOException { public void setNextReader(LeafReaderContext context) throws IOException {
state.setNextReader(source, context); function = source.getValues(null, context);
valueFiller = state.function.getValueFiller(); valueFiller = function.getValueFiller();
value = valueFiller.getValue(); value = valueFiller.getValue();
} }
public static class CollectorState {
FunctionValues function;
LeafReaderContext context = null;
public void setNextReader(ValueSource source, LeafReaderContext context) throws IOException {
if (this.context != context) {
this.context = context;
this.function = source.getValues(null, context);
}
}
}
public void collect(int doc) throws IOException { public void collect(int doc) throws IOException {
valueFiller.fillValue(doc); valueFiller.fillValue(doc);
@ -114,7 +101,7 @@ public class MinMaxStatsCollector implements StatsCollector{
@Override @Override
public FunctionValues getFunction() { public FunctionValues getFunction() {
return state.function; return function;
} }
public String valueSourceString() { public String valueSourceString() {

View File

@ -29,16 +29,14 @@ public class NumericStatsCollector extends MinMaxStatsCollector {
protected double sumOfSquares = 0; protected double sumOfSquares = 0;
protected double mean = 0; protected double mean = 0;
protected double stddev = 0; protected double stddev = 0;
protected CollectorState state;
public NumericStatsCollector(ValueSource source, Set<String> statsList, CollectorState state) { public NumericStatsCollector(ValueSource source, Set<String> statsList) {
super(source, statsList, state); super(source, statsList);
this.state = state;
} }
public void collect(int doc) throws IOException { public void collect(int doc) throws IOException {
super.collect(doc); super.collect(doc);
double value = state.function.doubleVal(doc); double value = function.doubleVal(doc);
sum += value; sum += value;
sumOfSquares += (value * value); sumOfSquares += (value * value);
} }

View File

@ -33,7 +33,6 @@ import org.apache.lucene.queries.function.valuesource.IntFieldSource;
import org.apache.lucene.queries.function.valuesource.LongFieldSource; import org.apache.lucene.queries.function.valuesource.LongFieldSource;
import org.apache.solr.analytics.expression.ExpressionFactory; import org.apache.solr.analytics.expression.ExpressionFactory;
import org.apache.solr.analytics.request.ExpressionRequest; import org.apache.solr.analytics.request.ExpressionRequest;
import org.apache.solr.analytics.statistics.MinMaxStatsCollector.CollectorState;
import org.apache.solr.analytics.util.AnalyticsParams; import org.apache.solr.analytics.util.AnalyticsParams;
import org.apache.solr.analytics.util.valuesource.AbsoluteValueDoubleFunction; import org.apache.solr.analytics.util.valuesource.AbsoluteValueDoubleFunction;
import org.apache.solr.analytics.util.valuesource.AddDoubleFunction; import org.apache.solr.analytics.util.valuesource.AddDoubleFunction;
@ -214,32 +213,25 @@ public class StatsCollectorSupplierFactory {
} }
} }
} }
final CollectorState states[] = new CollectorState[statsArr.length];
for (int count = 0; count < statsArr.length; count++) {
states[count] = new CollectorState();
}
// Making the Supplier // Making the Supplier
return new Supplier<StatsCollector[]>() { return new Supplier<StatsCollector[]>() {
private final CollectorState collectorState[] = states;
public StatsCollector[] get() { public StatsCollector[] get() {
StatsCollector[] collectors = new StatsCollector[statsArr.length]; StatsCollector[] collectors = new StatsCollector[statsArr.length];
for (int count = 0; count < statsArr.length; count++) { for (int count = 0; count < statsArr.length; count++) {
if(numericBools[count]){ if(numericBools[count]){
StatsCollector sc = new NumericStatsCollector(sourceArr[count], statsArr[count], collectorState[count]); StatsCollector sc = new NumericStatsCollector(sourceArr[count], statsArr[count]);
if(uniqueBools[count]) sc = new UniqueStatsCollector(sc); if(uniqueBools[count]) sc = new UniqueStatsCollector(sc);
if(medianBools[count]) sc = new MedianStatsCollector(sc); if(medianBools[count]) sc = new MedianStatsCollector(sc);
if(percsArr[count]!=null) sc = new PercentileStatsCollector(sc,percsArr[count],percsNames[count]); if(percsArr[count]!=null) sc = new PercentileStatsCollector(sc,percsArr[count],percsNames[count]);
collectors[count]=sc; collectors[count]=sc;
} else if (dateBools[count]) { } else if (dateBools[count]) {
StatsCollector sc = new MinMaxStatsCollector(sourceArr[count], statsArr[count], collectorState[count]); StatsCollector sc = new MinMaxStatsCollector(sourceArr[count], statsArr[count]);
if(uniqueBools[count]) sc = new UniqueStatsCollector(sc); if(uniqueBools[count]) sc = new UniqueStatsCollector(sc);
if(medianBools[count]) sc = new DateMedianStatsCollector(sc); if(medianBools[count]) sc = new DateMedianStatsCollector(sc);
if(percsArr[count]!=null) sc = new PercentileStatsCollector(sc,percsArr[count],percsNames[count]); if(percsArr[count]!=null) sc = new PercentileStatsCollector(sc,percsArr[count],percsNames[count]);
collectors[count]=sc; collectors[count]=sc;
} else { } else {
StatsCollector sc = new MinMaxStatsCollector(sourceArr[count], statsArr[count], collectorState[count]); StatsCollector sc = new MinMaxStatsCollector(sourceArr[count], statsArr[count]);
if(uniqueBools[count]) sc = new UniqueStatsCollector(sc); if(uniqueBools[count]) sc = new UniqueStatsCollector(sc);
if(medianBools[count]) sc = new MedianStatsCollector(sc); if(medianBools[count]) sc = new MedianStatsCollector(sc);
if(percsArr[count]!=null) sc = new PercentileStatsCollector(sc,percsArr[count],percsNames[count]); if(percsArr[count]!=null) sc = new PercentileStatsCollector(sc,percsArr[count],percsNames[count]);

View File

@ -1,4 +0,0 @@
o.ar.s.min=min(double_dd)
o.ar.s.max=max(long_ld)
o.ar.ff=string_sd
o.ar.ff.string_sd.sortstatistic=min

View File

@ -1,14 +0,0 @@
<?xml version="1.0" encoding="UTF-8"?>
<analyticsRequestEnvelope stats="true" olap="true">
<analyticsRequest>
<name>MinMax Request</name>
<statistic>
<expression>min(double(double_dd))</expression>
<name>min</name>
</statistic>
<statistic>
<expression>max(long(long_ld))</expression>
<name>max</name>
</statistic>
</analyticsRequest>
</analyticsRequestEnvelope>

View File

@ -60,7 +60,7 @@ public class NoFacetTest extends AbstractAnalyticsStatsTest {
@BeforeClass @BeforeClass
public static void beforeClass() throws Exception { public static void beforeClass() throws Exception {
initCore("solrconfig-analytics.xml","schema-analytics.xml"); initCore("solrconfig-basic.xml","schema-analytics.xml");
h.update("<delete><query>*:*</query></delete>"); h.update("<delete><query>*:*</query></delete>");
defaults.put("int_id", new Integer(0)); defaults.put("int_id", new Integer(0));
defaults.put("long_ld", new Long(0)); defaults.put("long_ld", new Long(0));

View File

@ -48,7 +48,7 @@ public class ExpressionTest extends AbstractAnalyticsStatsTest {
@BeforeClass @BeforeClass
public static void beforeClass() throws Exception { public static void beforeClass() throws Exception {
initCore("solrconfig-analytics.xml", "schema-analytics.xml"); initCore("solrconfig-basic.xml", "schema-analytics.xml");
h.update("<delete><query>*:*</query></delete>"); h.update("<delete><query>*:*</query></delete>");
for (int j = 0; j < NUM_LOOPS; ++j) { for (int j = 0; j < NUM_LOOPS; ++j) {

View File

@ -312,19 +312,4 @@ public class AbstractAnalyticsFacetTest extends SolrTestCaseJ4 {
IOUtils.closeWhileHandlingException(file, in); IOUtils.closeWhileHandlingException(file, in);
} }
} }
protected void removeNodes(String xPath, List<Double> string) throws XPathExpressionException {
NodeList missingNodes = getNodes(xPath);
List<Double> result = new ArrayList<Double>();
for (int idx = 0; idx < missingNodes.getLength(); ++idx) {
result.add(Double.parseDouble(missingNodes.item(idx).getTextContent()));
}
string.removeAll(result);
}
protected NodeList getNodes(String xPath) throws XPathExpressionException {
StringBuilder sb = new StringBuilder(xPath);
return (NodeList) xPathFact.newXPath().compile(sb.toString()).evaluate(doc, XPathConstants.NODESET);
}
} }

View File

@ -1,40 +0,0 @@
package org.apache.solr.analytics.facet;
import org.apache.lucene.util.LuceneTestCase.SuppressCodecs;
import org.apache.solr.analytics.AbstractAnalyticsStatsTest;
import org.apache.solr.analytics.expression.ExpressionTest;
import org.junit.BeforeClass;
import org.junit.Test;
@SuppressCodecs({"Lucene3x","Lucene40","Lucene41","Lucene42","Appending","Asserting"})
public class FacetSortingTest extends AbstractAnalyticsStatsTest {
private static String fileName = "/analytics/requestFiles/facetSorting.txt";
@BeforeClass
public static void beforeClass() throws Exception {
initCore("solrconfig-analytics.xml", "schema-analytics.xml");
h.update("<delete><query>*:*</query></delete>");
// The data set below is so generated that in bucket corresponding fieldFacet B, double_dd column has null values
// and in bucket C corresponding to fieldFacet C has null values for column long_ld.
// FieldFaceting occurs on string_sd field
assertU(adoc("id", "1001", "string_sd", "A", "double_dd", "" + 3, "long_ld", "" + 1));
assertU(adoc("id", "1002", "string_sd", "A", "double_dd", "" + 25, "long_ld", "" + 2));
assertU(adoc("id", "1003", "string_sd", "B", "long_ld", "" + 3));
assertU(adoc("id", "1004", "string_sd", "B", "long_ld", "" + 4));
assertU(adoc("id", "1005", "string_sd", "C", "double_dd", "" + 17));
assertU(commit());
String response = h.query(request(fileToStringArr(ExpressionTest.class, fileName)));
System.out.println("Response=" + response);
setResponse(response);
}
@Test
public void addTest() throws Exception {
Double minResult = (Double) getStatResult("ar", "min", VAL_TYPE.DOUBLE);
Long maxResult = (Long) getStatResult("ar", "max", VAL_TYPE.LONG);
assertEquals(Double.valueOf(minResult), Double.valueOf(3.0));
assertEquals(Long.valueOf(maxResult),Long.valueOf(4));
}
}

View File

@ -44,7 +44,7 @@ public class FieldFacetExtrasTest extends AbstractAnalyticsFacetTest {
@BeforeClass @BeforeClass
public static void beforeClass() throws Exception { public static void beforeClass() throws Exception {
initCore("solrconfig-analytics.xml","schema-analytics.xml"); initCore("solrconfig-basic.xml","schema-analytics.xml");
h.update("<delete><query>*:*</query></delete>"); h.update("<delete><query>*:*</query></delete>");
//INT //INT

View File

@ -24,7 +24,6 @@ import java.util.List;
import org.junit.Assert; import org.junit.Assert;
import org.junit.BeforeClass; import org.junit.BeforeClass;
import org.junit.Test; import org.junit.Test;
import org.w3c.dom.Node;
public class FieldFacetTest extends AbstractAnalyticsFacetTest{ public class FieldFacetTest extends AbstractAnalyticsFacetTest{
@ -88,7 +87,7 @@ public class FieldFacetTest extends AbstractAnalyticsFacetTest{
@BeforeClass @BeforeClass
public static void beforeClass() throws Exception { public static void beforeClass() throws Exception {
initCore("solrconfig-analytics.xml","schema-analytics.xml"); initCore("solrconfig-basic.xml","schema-analytics.xml");
h.update("<delete><query>*:*</query></delete>"); h.update("<delete><query>*:*</query></delete>");
defaults.put("int", new Integer(0)); defaults.put("int", new Integer(0));
@ -1038,33 +1037,31 @@ public class FieldFacetTest extends AbstractAnalyticsFacetTest{
public void missingFacetTest() throws Exception { public void missingFacetTest() throws Exception {
//int MultiDate //int MultiDate
String xPath = "/response/lst[@name='stats']/lst[@name='missingf']/lst[@name='fieldFacets']/lst[@name='date_dtdm']/lst[@name='(MISSING)']"; String xPath = "/response/lst[@name='stats']/lst[@name='missingf']/lst[@name='fieldFacets']/lst[@name='date_dtdm']/lst[@name='(MISSING)']";
Node missingNodeXPath = getNode(xPath); assertNotNull(getRawResponse(), getNode(xPath));
assertNotNull(getRawResponse(), missingNodeXPath);
ArrayList<Double> string = getDoubleList("missingf", "fieldFacets", "date_dtdm", "double", "mean"); ArrayList<Double> string = getDoubleList("missingf", "fieldFacets", "date_dtdm", "double", "mean");
super.removeNodes(xPath, string); string.remove(0);
ArrayList<Double> stringTest = calculateNumberStat(multiDateTestStart, "mean"); ArrayList<Double> stringTest = calculateNumberStat(multiDateTestStart, "mean");
assertEquals(getRawResponse(), string,stringTest); assertEquals(getRawResponse(), string,stringTest);
//Int String //Int String
xPath = "/response/lst[@name='stats']/lst[@name='missingf']/lst[@name='fieldFacets']/lst[@name='string_sd']/lst[@name='(MISSING)']"; xPath = "/response/lst[@name='stats']/lst[@name='missingf']/lst[@name='fieldFacets']/lst[@name='string_sd']/lst[@name='(MISSING)']";
missingNodeXPath = getNode(xPath); assertNotNull(getRawResponse(), getNode(xPath));
String missingNodeXPathStr = xPath;
assertNotNull(getRawResponse(), missingNodeXPath);
xPath = "/response/lst[@name='stats']/lst[@name='missingf']/lst[@name='fieldFacets']/lst[@name='string_sd']/lst[@name='str0']"; xPath = "/response/lst[@name='stats']/lst[@name='missingf']/lst[@name='fieldFacets']/lst[@name='string_sd']/lst[@name='str0']";
assertNull(getRawResponse(), getNode(xPath)); assertNull(getRawResponse(), getNode(xPath));
List<Double> intString = getDoubleList("missingf", "fieldFacets", "string_sd", "double", "mean"); List<Double> intString = getDoubleList("missingf", "fieldFacets", "string_sd", "double", "mean");
removeNodes(missingNodeXPathStr, intString); intString.remove(0);
ArrayList<Double> intStringTest = calculateNumberStat(intStringTestStart, "mean"); ArrayList<Double> intStringTest = calculateNumberStat(intStringTestStart, "mean");
assertEquals(getRawResponse(), intString,intStringTest); assertEquals(getRawResponse(), intString,intStringTest);
//Int Date //Int Date
Collection<Double> intDate = getDoubleList("missingf", "fieldFacets", "date_dtd", "double", "mean"); Collection<Double> intDate = getDoubleList("missingf", "fieldFacets", "date_dtd", "double", "mean");
ArrayList<ArrayList<Double>> intDateMissingTestStart = (ArrayList<ArrayList<Double>>) intDateTestStart.clone(); ArrayList<ArrayList<Double>> intDateMissingTestStart = (ArrayList<ArrayList<Double>>) intDateTestStart.clone();
ArrayList<Double> intDateTest = calculateNumberStat(intDateMissingTestStart, "mean"); ArrayList<Double> intDateTest = calculateNumberStat(intDateMissingTestStart, "mean");
assertEquals(getRawResponse(),intDate,intDateTest); assertEquals(getRawResponse(),intDate,intDateTest);
} }
private void checkStddevs(ArrayList<Double> list1, ArrayList<Double> list2) { private void checkStddevs(ArrayList<Double> list1, ArrayList<Double> list2) {

View File

@ -35,7 +35,7 @@ public class QueryFacetTest extends AbstractAnalyticsFacetTest {
@BeforeClass @BeforeClass
public static void beforeClass() throws Exception { public static void beforeClass() throws Exception {
initCore("solrconfig-analytics.xml","schema-analytics.xml"); initCore("solrconfig-basic.xml","schema-analytics.xml");
} }
@SuppressWarnings("unchecked") @SuppressWarnings("unchecked")

View File

@ -46,7 +46,7 @@ public class RangeFacetTest extends AbstractAnalyticsFacetTest {
@BeforeClass @BeforeClass
public static void beforeClass() throws Exception { public static void beforeClass() throws Exception {
initCore("solrconfig-analytics.xml","schema-analytics.xml"); initCore("solrconfig-basic.xml","schema-analytics.xml");
h.update("<delete><query>*:*</query></delete>"); h.update("<delete><query>*:*</query></delete>");
//INT //INT

View File

@ -35,7 +35,7 @@ public class FunctionTest extends AbstractAnalyticsStatsTest {
@BeforeClass @BeforeClass
public static void beforeClass() throws Exception { public static void beforeClass() throws Exception {
initCore("solrconfig-analytics.xml","schema-analytics.xml"); initCore("solrconfig-basic.xml","schema-analytics.xml");
h.update("<delete><query>*:*</query></delete>"); h.update("<delete><query>*:*</query></delete>");
for (int j = 0; j < NUM_LOOPS; ++j) { for (int j = 0; j < NUM_LOOPS; ++j) {