mirror of https://github.com/apache/lucene.git
SOLR-9981: Performance improvements and bug fixes for the Analytics component
This commit is contained in:
parent
3b07e7241e
commit
a5dce163eb
|
@ -219,6 +219,12 @@ Optimizations
|
|||
* SOLR-10727: Avoid polluting the filter cache for certain types of faceting (typically ranges) when
|
||||
the base docset is empty. (David Smiley)
|
||||
|
||||
* SOLR-9981: Performance improvements and bug fixes for the Analytics component. Performance fix that
|
||||
stops the reading of ALL lucene segments over and again for each stats collector. The AtomicReaderContext
|
||||
that refers to the "current " segment is reused. This fix shows an improvement of about 25% in query
|
||||
time for a dataset of ~10M (=9.8M) records. Given the nature of the fix, the improvement should get
|
||||
better as the dataset increases. Fix for the NPE during comparison (Houston Putman)
|
||||
|
||||
Other Changes
|
||||
----------------------
|
||||
* SOLR-10236: Removed FieldType.getNumericType(). Use getNumberType() instead. (Tomás Fernández Löbbe)
|
||||
|
|
|
@ -28,7 +28,7 @@ import java.util.List;
|
|||
import java.util.Map;
|
||||
import java.util.Map.Entry;
|
||||
import java.util.Set;
|
||||
import java.util.TreeMap;
|
||||
import java.util.HashMap;
|
||||
|
||||
import com.google.common.collect.Iterables;
|
||||
import org.apache.lucene.index.LeafReaderContext;
|
||||
|
@ -98,7 +98,7 @@ public class FacetingAccumulator extends BasicAccumulator implements FacetValueA
|
|||
List<RangeFacetRequest> rangeFreqs = request.getRangeFacets();
|
||||
List<QueryFacetRequest> queryFreqs = request.getQueryFacets();
|
||||
|
||||
this.fieldFacetExpressions = new TreeMap<>();
|
||||
this.fieldFacetExpressions = new HashMap<>();
|
||||
this.rangeFacetExpressions = new LinkedHashMap<>(rangeFreqs.size());
|
||||
this.queryFacetExpressions = new LinkedHashMap<>(queryFreqs.size());
|
||||
this.fieldFacetCollectors = new LinkedHashMap<>(fieldFreqs.size());
|
||||
|
@ -120,8 +120,8 @@ public class FacetingAccumulator extends BasicAccumulator implements FacetValueA
|
|||
final SchemaField ff = fr.getField();
|
||||
final FieldFacetAccumulator facc = FieldFacetAccumulator.create(searcher, this, ff);
|
||||
facetAccumulators.add(facc);
|
||||
fieldFacetExpressions.put(freq.getName(), new TreeMap<String, Expression[]>() );
|
||||
fieldFacetCollectors.put(freq.getName(), new TreeMap<String,StatsCollector[]>());
|
||||
fieldFacetExpressions.put(freq.getName(), new HashMap<String, Expression[]>() );
|
||||
fieldFacetCollectors.put(freq.getName(), new HashMap<String,StatsCollector[]>());
|
||||
}
|
||||
/**
|
||||
* For each range and query facet request add a bucket to the corresponding
|
||||
|
@ -299,6 +299,22 @@ public class FacetingAccumulator extends BasicAccumulator implements FacetValueA
|
|||
|
||||
@Override
|
||||
public int compare(Entry<String,Expression[]> o1, Entry<String,Expression[]> o2) {
|
||||
|
||||
// Handle nulls. Null is treated as an infinitely big number so that in case of ASCENDING sorts,
|
||||
// Nulls will appear last. In case of DESC sorts, Nulls will appear last.
|
||||
boolean firstIsNull = false;
|
||||
if (o1 == null || o1.getValue() == null || o1.getValue()[comparatorExpressionPlace] == null)
|
||||
firstIsNull = true;
|
||||
boolean secondIsNull = false;
|
||||
if (o2 == null || o2.getValue() == null || o2.getValue()[comparatorExpressionPlace] == null)
|
||||
secondIsNull = true;
|
||||
if (firstIsNull && secondIsNull)
|
||||
return 0;
|
||||
else if (firstIsNull)
|
||||
return 1;
|
||||
else if (secondIsNull)
|
||||
return -1;
|
||||
|
||||
return comp.compare(o1.getValue()[comparatorExpressionPlace], o2.getValue()[comparatorExpressionPlace]);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -29,10 +29,19 @@ public abstract class Expression {
|
|||
|
||||
public Comparator<Expression> comparator(final FacetSortDirection direction) {
|
||||
return (a, b) -> {
|
||||
if( direction == FacetSortDirection.ASCENDING ){
|
||||
return a.getValue().compareTo(b.getValue());
|
||||
boolean aIsNull = a.getValue() == null;
|
||||
boolean bIsNull = b.getValue() == null;
|
||||
|
||||
if (aIsNull && bIsNull) return 0;
|
||||
|
||||
if( direction == FacetSortDirection.ASCENDING ){ // nulls are last for ASC sort
|
||||
return aIsNull ? 1
|
||||
: bIsNull ? -1
|
||||
: a.getValue().compareTo(b.getValue());
|
||||
} else {
|
||||
return b.getValue().compareTo(a.getValue());
|
||||
return aIsNull ? -1
|
||||
: bIsNull ? 1
|
||||
: b.getValue().compareTo(a.getValue());
|
||||
}
|
||||
};
|
||||
}
|
||||
|
|
|
@ -37,20 +37,33 @@ public class MinMaxStatsCollector implements StatsCollector{
|
|||
protected MutableValue value;
|
||||
protected final Set<String> statsList;
|
||||
protected final ValueSource source;
|
||||
protected FunctionValues function;
|
||||
protected ValueFiller valueFiller;
|
||||
private CollectorState state;
|
||||
|
||||
public MinMaxStatsCollector(ValueSource source, Set<String> statsList) {
|
||||
public MinMaxStatsCollector(ValueSource source, Set<String> statsList, CollectorState state) {
|
||||
this.source = source;
|
||||
this.statsList = statsList;
|
||||
this.state = state;
|
||||
}
|
||||
|
||||
public void setNextReader(LeafReaderContext context) throws IOException {
|
||||
function = source.getValues(null, context);
|
||||
valueFiller = function.getValueFiller();
|
||||
state.setNextReader(source, context);
|
||||
valueFiller = state.function.getValueFiller();
|
||||
value = valueFiller.getValue();
|
||||
}
|
||||
|
||||
public static class CollectorState {
|
||||
FunctionValues function;
|
||||
LeafReaderContext context = null;
|
||||
|
||||
public void setNextReader(ValueSource source, LeafReaderContext context) throws IOException {
|
||||
if (this.context != context) {
|
||||
this.context = context;
|
||||
this.function = source.getValues(null, context);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
public void collect(int doc) throws IOException {
|
||||
valueFiller.fillValue(doc);
|
||||
if( value.exists ){
|
||||
|
@ -101,7 +114,7 @@ public class MinMaxStatsCollector implements StatsCollector{
|
|||
|
||||
@Override
|
||||
public FunctionValues getFunction() {
|
||||
return function;
|
||||
return state.function;
|
||||
}
|
||||
|
||||
public String valueSourceString() {
|
||||
|
|
|
@ -29,14 +29,16 @@ public class NumericStatsCollector extends MinMaxStatsCollector {
|
|||
protected double sumOfSquares = 0;
|
||||
protected double mean = 0;
|
||||
protected double stddev = 0;
|
||||
protected CollectorState state;
|
||||
|
||||
public NumericStatsCollector(ValueSource source, Set<String> statsList) {
|
||||
super(source, statsList);
|
||||
public NumericStatsCollector(ValueSource source, Set<String> statsList, CollectorState state) {
|
||||
super(source, statsList, state);
|
||||
this.state = state;
|
||||
}
|
||||
|
||||
public void collect(int doc) throws IOException {
|
||||
super.collect(doc);
|
||||
double value = function.doubleVal(doc);
|
||||
double value = state.function.doubleVal(doc);
|
||||
sum += value;
|
||||
sumOfSquares += (value * value);
|
||||
}
|
||||
|
|
|
@ -33,6 +33,7 @@ import org.apache.lucene.queries.function.valuesource.IntFieldSource;
|
|||
import org.apache.lucene.queries.function.valuesource.LongFieldSource;
|
||||
import org.apache.solr.analytics.expression.ExpressionFactory;
|
||||
import org.apache.solr.analytics.request.ExpressionRequest;
|
||||
import org.apache.solr.analytics.statistics.MinMaxStatsCollector.CollectorState;
|
||||
import org.apache.solr.analytics.util.AnalyticsParams;
|
||||
import org.apache.solr.analytics.util.valuesource.AbsoluteValueDoubleFunction;
|
||||
import org.apache.solr.analytics.util.valuesource.AddDoubleFunction;
|
||||
|
@ -213,25 +214,32 @@ public class StatsCollectorSupplierFactory {
|
|||
}
|
||||
}
|
||||
}
|
||||
|
||||
final CollectorState states[] = new CollectorState[statsArr.length];
|
||||
for (int count = 0; count < statsArr.length; count++) {
|
||||
states[count] = new CollectorState();
|
||||
}
|
||||
// Making the Supplier
|
||||
return new Supplier<StatsCollector[]>() {
|
||||
private final CollectorState collectorState[] = states;
|
||||
|
||||
public StatsCollector[] get() {
|
||||
StatsCollector[] collectors = new StatsCollector[statsArr.length];
|
||||
for (int count = 0; count < statsArr.length; count++) {
|
||||
if(numericBools[count]){
|
||||
StatsCollector sc = new NumericStatsCollector(sourceArr[count], statsArr[count]);
|
||||
StatsCollector sc = new NumericStatsCollector(sourceArr[count], statsArr[count], collectorState[count]);
|
||||
if(uniqueBools[count]) sc = new UniqueStatsCollector(sc);
|
||||
if(medianBools[count]) sc = new MedianStatsCollector(sc);
|
||||
if(percsArr[count]!=null) sc = new PercentileStatsCollector(sc,percsArr[count],percsNames[count]);
|
||||
collectors[count]=sc;
|
||||
} else if (dateBools[count]) {
|
||||
StatsCollector sc = new MinMaxStatsCollector(sourceArr[count], statsArr[count]);
|
||||
StatsCollector sc = new MinMaxStatsCollector(sourceArr[count], statsArr[count], collectorState[count]);
|
||||
if(uniqueBools[count]) sc = new UniqueStatsCollector(sc);
|
||||
if(medianBools[count]) sc = new DateMedianStatsCollector(sc);
|
||||
if(percsArr[count]!=null) sc = new PercentileStatsCollector(sc,percsArr[count],percsNames[count]);
|
||||
collectors[count]=sc;
|
||||
} else {
|
||||
StatsCollector sc = new MinMaxStatsCollector(sourceArr[count], statsArr[count]);
|
||||
StatsCollector sc = new MinMaxStatsCollector(sourceArr[count], statsArr[count], collectorState[count]);
|
||||
if(uniqueBools[count]) sc = new UniqueStatsCollector(sc);
|
||||
if(medianBools[count]) sc = new MedianStatsCollector(sc);
|
||||
if(percsArr[count]!=null) sc = new PercentileStatsCollector(sc,percsArr[count],percsNames[count]);
|
||||
|
|
|
@ -0,0 +1,4 @@
|
|||
o.ar.s.min=min(double_dd)
|
||||
o.ar.s.max=max(long_ld)
|
||||
o.ar.ff=string_sd
|
||||
o.ar.ff.string_sd.sortstatistic=min
|
|
@ -0,0 +1,14 @@
|
|||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<analyticsRequestEnvelope stats="true" olap="true">
|
||||
<analyticsRequest>
|
||||
<name>MinMax Request</name>
|
||||
<statistic>
|
||||
<expression>min(double(double_dd))</expression>
|
||||
<name>min</name>
|
||||
</statistic>
|
||||
<statistic>
|
||||
<expression>max(long(long_ld))</expression>
|
||||
<name>max</name>
|
||||
</statistic>
|
||||
</analyticsRequest>
|
||||
</analyticsRequestEnvelope>
|
|
@ -60,7 +60,7 @@ public class NoFacetTest extends AbstractAnalyticsStatsTest {
|
|||
|
||||
@BeforeClass
|
||||
public static void beforeClass() throws Exception {
|
||||
initCore("solrconfig-basic.xml","schema-analytics.xml");
|
||||
initCore("solrconfig-analytics.xml","schema-analytics.xml");
|
||||
h.update("<delete><query>*:*</query></delete>");
|
||||
defaults.put("int_id", new Integer(0));
|
||||
defaults.put("long_ld", new Long(0));
|
||||
|
|
|
@ -48,7 +48,7 @@ public class ExpressionTest extends AbstractAnalyticsStatsTest {
|
|||
|
||||
@BeforeClass
|
||||
public static void beforeClass() throws Exception {
|
||||
initCore("solrconfig-basic.xml", "schema-analytics.xml");
|
||||
initCore("solrconfig-analytics.xml", "schema-analytics.xml");
|
||||
h.update("<delete><query>*:*</query></delete>");
|
||||
|
||||
for (int j = 0; j < NUM_LOOPS; ++j) {
|
||||
|
|
|
@ -312,4 +312,19 @@ public class AbstractAnalyticsFacetTest extends SolrTestCaseJ4 {
|
|||
IOUtils.closeWhileHandlingException(file, in);
|
||||
}
|
||||
}
|
||||
|
||||
protected void removeNodes(String xPath, List<Double> string) throws XPathExpressionException {
|
||||
NodeList missingNodes = getNodes(xPath);
|
||||
List<Double> result = new ArrayList<Double>();
|
||||
for (int idx = 0; idx < missingNodes.getLength(); ++idx) {
|
||||
result.add(Double.parseDouble(missingNodes.item(idx).getTextContent()));
|
||||
}
|
||||
string.removeAll(result);
|
||||
}
|
||||
|
||||
protected NodeList getNodes(String xPath) throws XPathExpressionException {
|
||||
StringBuilder sb = new StringBuilder(xPath);
|
||||
return (NodeList) xPathFact.newXPath().compile(sb.toString()).evaluate(doc, XPathConstants.NODESET);
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
@ -0,0 +1,40 @@
|
|||
package org.apache.solr.analytics.facet;
|
||||
|
||||
import org.apache.lucene.util.LuceneTestCase.SuppressCodecs;
|
||||
import org.apache.solr.analytics.AbstractAnalyticsStatsTest;
|
||||
import org.apache.solr.analytics.expression.ExpressionTest;
|
||||
import org.junit.BeforeClass;
|
||||
import org.junit.Test;
|
||||
|
||||
@SuppressCodecs({"Lucene3x","Lucene40","Lucene41","Lucene42","Appending","Asserting"})
|
||||
public class FacetSortingTest extends AbstractAnalyticsStatsTest {
|
||||
private static String fileName = "/analytics/requestFiles/facetSorting.txt";
|
||||
|
||||
@BeforeClass
|
||||
public static void beforeClass() throws Exception {
|
||||
initCore("solrconfig-analytics.xml", "schema-analytics.xml");
|
||||
h.update("<delete><query>*:*</query></delete>");
|
||||
|
||||
// The data set below is so generated that in bucket corresponding fieldFacet B, double_dd column has null values
|
||||
// and in bucket C corresponding to fieldFacet C has null values for column long_ld.
|
||||
// FieldFaceting occurs on string_sd field
|
||||
assertU(adoc("id", "1001", "string_sd", "A", "double_dd", "" + 3, "long_ld", "" + 1));
|
||||
assertU(adoc("id", "1002", "string_sd", "A", "double_dd", "" + 25, "long_ld", "" + 2));
|
||||
assertU(adoc("id", "1003", "string_sd", "B", "long_ld", "" + 3));
|
||||
assertU(adoc("id", "1004", "string_sd", "B", "long_ld", "" + 4));
|
||||
assertU(adoc("id", "1005", "string_sd", "C", "double_dd", "" + 17));
|
||||
|
||||
assertU(commit());
|
||||
String response = h.query(request(fileToStringArr(ExpressionTest.class, fileName)));
|
||||
System.out.println("Response=" + response);
|
||||
setResponse(response);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void addTest() throws Exception {
|
||||
Double minResult = (Double) getStatResult("ar", "min", VAL_TYPE.DOUBLE);
|
||||
Long maxResult = (Long) getStatResult("ar", "max", VAL_TYPE.LONG);
|
||||
assertEquals(Double.valueOf(minResult), Double.valueOf(3.0));
|
||||
assertEquals(Long.valueOf(maxResult),Long.valueOf(4));
|
||||
}
|
||||
}
|
|
@ -44,7 +44,7 @@ public class FieldFacetExtrasTest extends AbstractAnalyticsFacetTest {
|
|||
|
||||
@BeforeClass
|
||||
public static void beforeClass() throws Exception {
|
||||
initCore("solrconfig-basic.xml","schema-analytics.xml");
|
||||
initCore("solrconfig-analytics.xml","schema-analytics.xml");
|
||||
h.update("<delete><query>*:*</query></delete>");
|
||||
|
||||
//INT
|
||||
|
|
|
@ -24,6 +24,7 @@ import java.util.List;
|
|||
import org.junit.Assert;
|
||||
import org.junit.BeforeClass;
|
||||
import org.junit.Test;
|
||||
import org.w3c.dom.Node;
|
||||
|
||||
|
||||
public class FieldFacetTest extends AbstractAnalyticsFacetTest{
|
||||
|
@ -87,7 +88,7 @@ public class FieldFacetTest extends AbstractAnalyticsFacetTest{
|
|||
|
||||
@BeforeClass
|
||||
public static void beforeClass() throws Exception {
|
||||
initCore("solrconfig-basic.xml","schema-analytics.xml");
|
||||
initCore("solrconfig-analytics.xml","schema-analytics.xml");
|
||||
h.update("<delete><query>*:*</query></delete>");
|
||||
|
||||
defaults.put("int", new Integer(0));
|
||||
|
@ -1037,21 +1038,25 @@ public class FieldFacetTest extends AbstractAnalyticsFacetTest{
|
|||
public void missingFacetTest() throws Exception {
|
||||
//int MultiDate
|
||||
String xPath = "/response/lst[@name='stats']/lst[@name='missingf']/lst[@name='fieldFacets']/lst[@name='date_dtdm']/lst[@name='(MISSING)']";
|
||||
assertNotNull(getRawResponse(), getNode(xPath));
|
||||
Node missingNodeXPath = getNode(xPath);
|
||||
assertNotNull(getRawResponse(), missingNodeXPath);
|
||||
|
||||
ArrayList<Double> string = getDoubleList("missingf", "fieldFacets", "date_dtdm", "double", "mean");
|
||||
string.remove(0);
|
||||
super.removeNodes(xPath, string);
|
||||
ArrayList<Double> stringTest = calculateNumberStat(multiDateTestStart, "mean");
|
||||
assertEquals(getRawResponse(), string,stringTest);
|
||||
|
||||
//Int String
|
||||
xPath = "/response/lst[@name='stats']/lst[@name='missingf']/lst[@name='fieldFacets']/lst[@name='string_sd']/lst[@name='(MISSING)']";
|
||||
assertNotNull(getRawResponse(), getNode(xPath));
|
||||
missingNodeXPath = getNode(xPath);
|
||||
String missingNodeXPathStr = xPath;
|
||||
assertNotNull(getRawResponse(), missingNodeXPath);
|
||||
|
||||
xPath = "/response/lst[@name='stats']/lst[@name='missingf']/lst[@name='fieldFacets']/lst[@name='string_sd']/lst[@name='str0']";
|
||||
assertNull(getRawResponse(), getNode(xPath));
|
||||
|
||||
List<Double> intString = getDoubleList("missingf", "fieldFacets", "string_sd", "double", "mean");
|
||||
intString.remove(0);
|
||||
removeNodes(missingNodeXPathStr, intString);
|
||||
ArrayList<Double> intStringTest = calculateNumberStat(intStringTestStart, "mean");
|
||||
assertEquals(getRawResponse(), intString,intStringTest);
|
||||
|
||||
|
@ -1060,8 +1065,6 @@ public class FieldFacetTest extends AbstractAnalyticsFacetTest{
|
|||
ArrayList<ArrayList<Double>> intDateMissingTestStart = (ArrayList<ArrayList<Double>>) intDateTestStart.clone();
|
||||
ArrayList<Double> intDateTest = calculateNumberStat(intDateMissingTestStart, "mean");
|
||||
assertEquals(getRawResponse(),intDate,intDateTest);
|
||||
|
||||
|
||||
}
|
||||
|
||||
private void checkStddevs(ArrayList<Double> list1, ArrayList<Double> list2) {
|
||||
|
|
|
@ -35,7 +35,7 @@ public class QueryFacetTest extends AbstractAnalyticsFacetTest {
|
|||
|
||||
@BeforeClass
|
||||
public static void beforeClass() throws Exception {
|
||||
initCore("solrconfig-basic.xml","schema-analytics.xml");
|
||||
initCore("solrconfig-analytics.xml","schema-analytics.xml");
|
||||
}
|
||||
|
||||
@SuppressWarnings("unchecked")
|
||||
|
|
|
@ -46,7 +46,7 @@ public class RangeFacetTest extends AbstractAnalyticsFacetTest {
|
|||
|
||||
@BeforeClass
|
||||
public static void beforeClass() throws Exception {
|
||||
initCore("solrconfig-basic.xml","schema-analytics.xml");
|
||||
initCore("solrconfig-analytics.xml","schema-analytics.xml");
|
||||
h.update("<delete><query>*:*</query></delete>");
|
||||
|
||||
//INT
|
||||
|
|
|
@ -35,7 +35,7 @@ public class FunctionTest extends AbstractAnalyticsStatsTest {
|
|||
|
||||
@BeforeClass
|
||||
public static void beforeClass() throws Exception {
|
||||
initCore("solrconfig-basic.xml","schema-analytics.xml");
|
||||
initCore("solrconfig-analytics.xml","schema-analytics.xml");
|
||||
h.update("<delete><query>*:*</query></delete>");
|
||||
|
||||
for (int j = 0; j < NUM_LOOPS; ++j) {
|
||||
|
|
Loading…
Reference in New Issue