diff --git a/solr/CHANGES.txt b/solr/CHANGES.txt index 4118e75dab8..ad0a522c642 100644 --- a/solr/CHANGES.txt +++ b/solr/CHANGES.txt @@ -83,6 +83,10 @@ New Features * SOLR-11244: Query DSL for Solr (Cao Manh Dat) +* SOLR-11317: JSON Facet API: min/max aggregations on numeric fields are now typed better so int/long + fields return an appropriate integral type rather than a double. (yonik) + + Bug Fixes ---------------------- diff --git a/solr/core/src/java/org/apache/solr/search/facet/MinMaxAgg.java b/solr/core/src/java/org/apache/solr/search/facet/MinMaxAgg.java index 0f4bea6f271..5a48ab2221c 100644 --- a/solr/core/src/java/org/apache/solr/search/facet/MinMaxAgg.java +++ b/solr/core/src/java/org/apache/solr/search/facet/MinMaxAgg.java @@ -25,7 +25,9 @@ import org.apache.lucene.index.OrdinalMap; import org.apache.lucene.index.SortedDocValues; import org.apache.lucene.queries.function.ValueSource; import org.apache.lucene.util.BytesRef; +import org.apache.lucene.util.FixedBitSet; import org.apache.lucene.util.LongValues; +import org.apache.solr.common.SolrException; import org.apache.solr.schema.SchemaField; import org.apache.solr.schema.StrFieldSource; import org.apache.solr.search.function.FieldNameValueSource; @@ -48,29 +50,39 @@ public class MinMaxAgg extends SimpleAggValueSource { String field = ((FieldNameValueSource)vs).getFieldName(); sf = fcontext.qcontext.searcher().getSchema().getField(field); - vs = sf.getType().getValueSource(sf, null); // temporary implementation to make existing code work + if (sf.multiValued() || sf.getType().multiValuedFieldCache()) { + vs = null; + throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "min/max aggregations can't be used on multi-valued field " + field); + } else { + vs = sf.getType().getValueSource(sf, null); + } } if (vs instanceof StrFieldSource) { - if (sf.multiValued() || sf.getType().multiValuedFieldCache()) { - if (sf.hasDocValues()) { - // dv - } else { - // uif - } - } else { - return new SingleValuedOrdAcc(fcontext, sf, numSlots); + return new SingleValuedOrdAcc(fcontext, sf, numSlots); + } + + // Since functions don't currently have types, we rely on the type of the field + if (sf != null && sf.getType().getNumberType() != null) { + switch (sf.getType().getNumberType()) { + case FLOAT: + case DOUBLE: + return new DFuncAcc(vs, fcontext, numSlots); + case INTEGER: + case LONG: + case DATE: + return new LFuncAcc(vs, fcontext, numSlots); } } // numeric functions - return new ValSlotAcc(vs, fcontext, numSlots); + return new DFuncAcc(vs, fcontext, numSlots); } @Override public FacetMerger createFacetMerger(Object prototype) { - if (prototype instanceof Number) - return new NumericMerger(); + if (prototype instanceof Double) + return new NumericMerger(); // still use NumericMerger to handle NaN? else if (prototype instanceof Comparable) { return new ComparableMerger(); } else { @@ -122,8 +134,8 @@ public class MinMaxAgg extends SimpleAggValueSource { } } - class ValSlotAcc extends DoubleFuncSlotAcc { - public ValSlotAcc(ValueSource values, FacetContext fcontext, int numSlots) { + class DFuncAcc extends DoubleFuncSlotAcc { + public DFuncAcc(ValueSource values, FacetContext fcontext, int numSlots) { super(values, fcontext, numSlots, Double.NaN); } @@ -149,6 +161,66 @@ public class MinMaxAgg extends SimpleAggValueSource { } } + class LFuncAcc extends LongFuncSlotAcc { + FixedBitSet exists; + public LFuncAcc(ValueSource values, FacetContext fcontext, int numSlots) { + super(values, fcontext, numSlots, 0); + exists = new FixedBitSet(numSlots); + } + + @Override + public void collect(int doc, int slotNum) throws IOException { + long val = values.longVal(doc); + if (val == 0 && !values.exists(doc)) return; // depend on fact that non existing values return 0 for func query + + long currVal = result[slotNum]; + if (currVal == 0 && !exists.get(slotNum)) { + exists.set(slotNum); + result[slotNum] = val; + } else if (Long.compare(val, currVal) * minmax < 0) { + result[slotNum] = val; + } + } + + @Override + public Object getValue(int slot) { + long val = result[slot]; + if (val == 0 && exists.get(slot)) { + return null; + } else { + return val; + } + } + + @Override + public void resize(Resizer resizer) { + super.resize(resizer); + exists = resizer.resize(exists); + } + + @Override + public int compare(int slotA, int slotB) { + long a = result[slotA]; + long b = result[slotB]; + boolean ea = a != 0 || exists.get(slotA); + boolean eb = b != 0 || exists.get(slotB); + + if (ea != eb) { + if (ea) return 1; // a exists and b doesn't TODO: we need context to be able to sort missing last! SOLR-10618 + if (eb) return -1; // b exists and a is missing + } + + return Long.compare(a, b); + } + + @Override + public void reset() { + super.reset(); + exists.clear(0, exists.length()); + } + + } + abstract class OrdAcc extends SlotAcc { final static int MISSING = -1; diff --git a/solr/core/src/java/org/apache/solr/search/facet/SlotAcc.java b/solr/core/src/java/org/apache/solr/search/facet/SlotAcc.java index 1240051be6e..578ef1796a9 100644 --- a/solr/core/src/java/org/apache/solr/search/facet/SlotAcc.java +++ b/solr/core/src/java/org/apache/solr/search/facet/SlotAcc.java @@ -16,14 +16,6 @@ */ package org.apache.solr.search.facet; -import org.apache.lucene.index.LeafReaderContext; -import org.apache.lucene.queries.function.FunctionValues; -import org.apache.lucene.queries.function.ValueSource; -import org.apache.solr.common.util.SimpleOrderedMap; -import org.apache.solr.search.DocIterator; -import org.apache.solr.search.DocSet; -import org.apache.solr.search.SolrIndexSearcher; - import java.io.Closeable; import java.io.IOException; import java.lang.reflect.Array; @@ -32,6 +24,16 @@ import java.util.Arrays; import java.util.Iterator; import java.util.List; +import org.apache.lucene.index.LeafReaderContext; +import org.apache.lucene.queries.function.FunctionValues; +import org.apache.lucene.queries.function.ValueSource; +import org.apache.lucene.search.DocIdSetIterator; +import org.apache.lucene.util.FixedBitSet; +import org.apache.solr.common.util.SimpleOrderedMap; +import org.apache.solr.search.DocIterator; +import org.apache.solr.search.DocSet; +import org.apache.solr.search.SolrIndexSearcher; + /** * Accumulates statistics separated by a slot number. * There is a separate statistic per slot. The slot is usually an ordinal into a set of values, e.g. tracking a count @@ -140,6 +142,38 @@ public abstract class SlotAcc implements Closeable { return values; } + public long[] resize(long[] old, long defaultValue) { + long[] values = new long[getNewSize()]; + if (defaultValue != 0) { + Arrays.fill(values, 0, values.length, defaultValue); + } + for (int i = 0; i < old.length; i++) { + long val = old[i]; + if (val != defaultValue) { + int newSlot = getNewSlot(i); + if (newSlot >= 0) { + values[newSlot] = val; + } + } + } + return values; + } + + public FixedBitSet resize(FixedBitSet old) { + FixedBitSet values = new FixedBitSet(getNewSize()); + int oldSize = old.length(); + + for(int oldSlot = 0;;) { + oldSlot = values.nextSetBit(oldSlot); + if (oldSlot == DocIdSetIterator.NO_MORE_DOCS) break; + int newSlot = getNewSlot(oldSlot); + values.set(newSlot); + if (++oldSlot >= oldSize) break; + } + + return values; + } + public T[] resize(T[] old, T defaultValue) { T[] values = (T[]) Array.newInstance(old.getClass().getComponentType(), getNewSize()); if (defaultValue != null) { @@ -222,6 +256,40 @@ abstract class DoubleFuncSlotAcc extends FuncSlotAcc { } } +abstract class LongFuncSlotAcc extends FuncSlotAcc { + long[] result; + long initialValue; + + public LongFuncSlotAcc(ValueSource values, FacetContext fcontext, int numSlots, long initialValue) { + super(values, fcontext, numSlots); + this.initialValue = initialValue; + result = new long[numSlots]; + if (initialValue != 0) { + reset(); + } + } + + @Override + public int compare(int slotA, int slotB) { + return Long.compare(result[slotA], result[slotB]); + } + + @Override + public Object getValue(int slot) { + return result[slot]; + } + + @Override + public void reset() { + Arrays.fill(result, initialValue); + } + + @Override + public void resize(Resizer resizer) { + result = resizer.resize(result, initialValue); + } +} + abstract class IntSlotAcc extends SlotAcc { int[] result; // use LongArray32 int initialValue; diff --git a/solr/core/src/test/org/apache/solr/search/facet/TestJsonFacets.java b/solr/core/src/test/org/apache/solr/search/facet/TestJsonFacets.java index 5ecd3a1a85c..559240bc048 100644 --- a/solr/core/src/test/org/apache/solr/search/facet/TestJsonFacets.java +++ b/solr/core/src/test/org/apache/solr/search/facet/TestJsonFacets.java @@ -467,28 +467,29 @@ public class TestJsonFacets extends SolrTestCaseHS { // single valued strings - doStatsTemplated(client, params(p, "rows","0", "noexist","noexist_s", "cat_s","cat_s", "where_s","where_s", "num_d","num_d", "num_i","num_i", "super_s","super_s", "val_b","val_b", "date","date_dt", "sparse_s","sparse_s" ,"multi_ss","multi_ss") ); + doStatsTemplated(client, params(p, "rows","0", "noexist","noexist_s", "cat_s","cat_s", "where_s","where_s", "num_d","num_d", "num_i","num_i", "num_l","long_l", "super_s","super_s", "val_b","val_b", "date","date_dt", "sparse_s","sparse_s" ,"multi_ss","multi_ss") ); // multi-valued strings, long/float substitute for int/double - doStatsTemplated(client, params(p, "facet","true", "rows","0", "noexist","noexist_ss", "cat_s","cat_ss", "where_s","where_ss", "num_d","num_f", "num_i","num_l", "num_is","num_ls", "num_fs", "num_ds", "super_s","super_ss", "val_b","val_b", "date","date_dt", "sparse_s","sparse_ss", "multi_ss","multi_ss") ); + doStatsTemplated(client, params(p, "facet","true", "rows","0", "noexist","noexist_ss", "cat_s","cat_ss", "where_s","where_ss", "num_d","num_f", "num_i","num_l", "num_l","long_l", "num_is","num_ls", "num_fs", "num_ds", "super_s","super_ss", "val_b","val_b", "date","date_dt", "sparse_s","sparse_ss", "multi_ss","multi_ss") ); // multi-valued strings, method=dv for terms facets - doStatsTemplated(client, params(p, "terms_method", "method:dv,", "rows", "0", "noexist", "noexist_ss", "cat_s", "cat_ss", "where_s", "where_ss", "num_d", "num_f", "num_i", "num_l", "super_s", "super_ss", "val_b", "val_b", "date", "date_dt", "sparse_s", "sparse_ss", "multi_ss", "multi_ss")); + doStatsTemplated(client, params(p, "terms_method", "method:dv,", "rows", "0", "noexist", "noexist_ss", "cat_s", "cat_ss", "where_s", "where_ss", "num_d", "num_f", "num_i", "num_l", "num_l","long_l","super_s", "super_ss", "val_b", "val_b", "date", "date_dt", "sparse_s", "sparse_ss", "multi_ss", "multi_ss")); // single valued docvalues for strings, and single valued numeric doc values for numeric fields - doStatsTemplated(client, params(p, "rows","0", "noexist","noexist_sd", "cat_s","cat_sd", "where_s","where_sd", "num_d","num_dd", "num_i","num_id", "num_is","num_lds", "num_fs","num_dds", "super_s","super_sd", "val_b","val_b", "date","date_dtd", "sparse_s","sparse_sd" ,"multi_ss","multi_sds") ); + doStatsTemplated(client, params(p, "rows","0", "noexist","noexist_sd", "cat_s","cat_sd", "where_s","where_sd", "num_d","num_dd", "num_i","num_id", "num_is","num_lds", "num_l","long_ld", "num_fs","num_dds", "super_s","super_sd", "val_b","val_b", "date","date_dtd", "sparse_s","sparse_sd" ,"multi_ss","multi_sds") ); // multi-valued docvalues FacetFieldProcessorByArrayDV.unwrap_singleValued_multiDv = false; // better multi-valued coverage - doStatsTemplated(client, params(p, "rows","0", "noexist","noexist_sds", "cat_s","cat_sds", "where_s","where_sds", "num_d","num_d", "num_i","num_i", "num_is","num_ids", "num_fs","num_fds", "super_s","super_sds", "val_b","val_b", "date","date_dtds", "sparse_s","sparse_sds" ,"multi_ss","multi_sds") ); + doStatsTemplated(client, params(p, "rows","0", "noexist","noexist_sds", "cat_s","cat_sds", "where_s","where_sds", "num_d","num_d", "num_i","num_i", "num_is","num_ids", "num_l","long_ld", "num_fs","num_fds", "super_s","super_sds", "val_b","val_b", "date","date_dtds", "sparse_s","sparse_sds" ,"multi_ss","multi_sds") ); // multi-valued docvalues FacetFieldProcessorByArrayDV.unwrap_singleValued_multiDv = true; - doStatsTemplated(client, params(p, "rows","0", "noexist","noexist_sds", "cat_s","cat_sds", "where_s","where_sds", "num_d","num_d", "num_i","num_i", "num_is","num_ids", "num_fs","num_fds", "super_s","super_sds", "val_b","val_b", "date","date_dtds", "sparse_s","sparse_sds" ,"multi_ss","multi_sds") ); + doStatsTemplated(client, params(p, "rows","0", "noexist","noexist_sds", "cat_s","cat_sds", "where_s","where_sds", "num_d","num_d", "num_i","num_i", "num_is","num_ids", "num_l","long_ld", "num_fs","num_fds", "super_s","super_sds", "val_b","val_b", "date","date_dtds", "sparse_s","sparse_sds" ,"multi_ss","multi_sds") ); } public static void doStatsTemplated(Client client, ModifiableSolrParams p) throws Exception { p.set("Z_num_i", "Z_" + p.get("num_i") ); + p.set("Z_num_l", "Z_" + p.get("num_l") ); p.set("sparse_num_d", "sparse_" + p.get("num_d") ); if (p.get("num_is") == null) p.add("num_is","num_is"); if (p.get("num_fs") == null) p.add("num_fs","num_fs"); @@ -528,6 +529,7 @@ public class TestJsonFacets extends SolrTestCaseHS { String num_is = m.expand("${num_is}"); String num_fs = m.expand("${num_fs}"); String Z_num_i = m.expand("${Z_num_i}"); + String Z_num_l = m.expand("${Z_num_l}"); String val_b = m.expand("${val_b}"); String date = m.expand("${date}"); String super_s = m.expand("${super_s}"); @@ -553,13 +555,13 @@ public class TestJsonFacets extends SolrTestCaseHS { iclient.add(doc, null); iclient.add(doc, null); iclient.add(doc, null); // a couple of deleted docs - iclient.add(sdoc("id", "2", cat_s, "B", where_s, "NJ", num_d, "-9", num_i, "-5", num_is,"3",num_is,"-1", num_fs,"3",num_fs,"-1.5", super_s,"superman", date,"2002-02-02T02:02:02Z", val_b, "false" , multi_ss,"a", multi_ss,"b" , Z_num_i, "0"), null); + iclient.add(sdoc("id", "2", cat_s, "B", where_s, "NJ", num_d, "-9", num_i, "-5", num_is,"3",num_is,"-1", num_fs,"3",num_fs,"-1.5", super_s,"superman", date,"2002-02-02T02:02:02Z", val_b, "false" , multi_ss,"a", multi_ss,"b" , Z_num_i, "0", Z_num_l,"0"), null); iclient.add(sdoc("id", "3"), null); iclient.commit(); - iclient.add(sdoc("id", "4", cat_s, "A", where_s, "NJ", num_d, "2", sparse_num_d,"-4",num_i, "3", num_is,"0",num_is,"3", num_fs,"0", num_fs,"3", super_s,"spiderman", date,"2003-03-03T03:03:03Z" , multi_ss, "b", Z_num_i, ""+Integer.MIN_VALUE), null); + iclient.add(sdoc("id", "4", cat_s, "A", where_s, "NJ", num_d, "2", sparse_num_d,"-4",num_i, "3", num_is,"0",num_is,"3", num_fs,"0", num_fs,"3", super_s,"spiderman", date,"2003-03-03T03:03:03Z" , multi_ss, "b", Z_num_i, ""+Integer.MIN_VALUE, Z_num_l,Long.MIN_VALUE), null); iclient.add(sdoc("id", "5", cat_s, "B", where_s, "NJ", num_d, "11", num_i, "7", num_is,"0", num_fs,"0", super_s,"batman" , date,"2001-02-03T01:02:03Z" ,sparse_s,"two", multi_ss, "a"), null); iclient.commit(); - iclient.add(sdoc("id", "6", cat_s, "B", where_s, "NY", num_d, "-5", num_i, "-5", num_is,"-1", num_fs,"-1.5", super_s,"hulk" , date,"2002-03-01T03:02:01Z" , multi_ss, "b", multi_ss, "a", Z_num_i, ""+Integer.MAX_VALUE), null); + iclient.add(sdoc("id", "6", cat_s, "B", where_s, "NY", num_d, "-5", num_i, "-5", num_is,"-1", num_fs,"-1.5", super_s,"hulk" , date,"2002-03-01T03:02:01Z" , multi_ss, "b", multi_ss, "a", Z_num_i, ""+Integer.MAX_VALUE, Z_num_l,Long.MAX_VALUE), null); iclient.commit(); client.commit(); @@ -685,7 +687,18 @@ public class TestJsonFacets extends SolrTestCaseHS { ", f2:{ 'buckets':[{ val:'B', count:3, n1:-2.0}, { val:'A', count:2, n1:6.0 }]} }" ); - + // facet on numbers to test resize from hashing (may need to be sorting by the metric to test that) + client.testJQ(params(p, "q", "*:*" + , "json.facet", "{" + + " f1:{${terms} type:field, field:${num_is}, facet:{a:'min(${num_i})'}, sort:'a asc' }" + + ",f2:{${terms} type:field, field:${num_is}, facet:{a:'max(${num_i})'}, sort:'a desc' }" + + "}" + ) + , "facets=={count:6 " + + ",f1:{ buckets:[{val:-1,count:2,a:-5},{val:3,count:2,a:-5},{val:-5,count:1,a:2},{val:2,count:1,a:2},{val:0,count:2,a:3}, ] } " + + ",f2:{ buckets:[{val:0,count:2,a:7},{val:3,count:2,a:3},{val:-5,count:1,a:2},{val:2,count:1,a:2},{val:-1,count:2,a:-5}, ] } " + + "}" + ); // percentiles 0,10,50,90,100 // catA: 2.0 2.2 3.0 3.8 4.0 @@ -983,16 +996,20 @@ public class TestJsonFacets extends SolrTestCaseHS { // stats at top level client.testJQ(params(p, "q", "*:*" - , "json.facet", "{ sum1:'sum(${num_d})', sumsq1:'sumsq(${num_d})', avg1:'avg(${num_d})', avg2:'avg(def(${num_d},0))', min1:'min(${num_d})', max1:'max(${num_d})'" + + , "json.facet", "{ sum1:'sum(${num_d})', sumsq1:'sumsq(${num_d})', avg1:'avg(${num_d})', avg2:'avg(def(${num_d},0))', mind:'min(${num_d})', maxd:'max(${num_d})'" + ", numwhere:'unique(${where_s})', unique_num_i:'unique(${num_i})', unique_num_d:'unique(${num_d})', unique_date:'unique(${date})'" + ", where_hll:'hll(${where_s})', hll_num_i:'hll(${num_i})', hll_num_d:'hll(${num_d})', hll_date:'hll(${date})'" + - ", med:'percentile(${num_d},50)', perc:'percentile(${num_d},0,50.0,100)', variance:'variance(${num_d})', stddev:'stddev(${num_d})' }" + ", med:'percentile(${num_d},50)', perc:'percentile(${num_d},0,50.0,100)', variance:'variance(${num_d})', stddev:'stddev(${num_d})'" + + ", mini:'min(${num_i})', maxi:'max(${num_i})'" + + " }" ) , "facets=={ 'count':6, " + - "sum1:3.0, sumsq1:247.0, avg1:0.6, avg2:0.5, min1:-9.0, max1:11.0" + + "sum1:3.0, sumsq1:247.0, avg1:0.6, avg2:0.5, mind:-9.0, maxd:11.0" + ", numwhere:2, unique_num_i:4, unique_num_d:5, unique_date:5" + ", where_hll:2, hll_num_i:4, hll_num_d:5, hll_date:5" + - ", med:2.0, perc:[-9.0,2.0,11.0], variance:49.04, stddev:7.002856560004639}" + ", med:2.0, perc:[-9.0,2.0,11.0], variance:49.04, stddev:7.002856560004639" + + ", mini:-5, maxi:7" + + "}" ); // stats at top level, no matches @@ -1307,16 +1324,26 @@ public class TestJsonFacets extends SolrTestCaseHS { "}" ); - // test 0, min/max int + // test 0, min/max int/long client.testJQ(params(p, "q", "*:*" , "json.facet", "{" + - " u : 'unique(${Z_num_i})'" + + " u : 'unique(${Z_num_i})'" + + ", u2 : 'unique(${Z_num_l})'" + + ", min1 : 'min(${Z_num_i})', max1 : 'max(${Z_num_i})'" + + ", min2 : 'min(${Z_num_l})', max2 : 'max(${Z_num_l})'" + ", f1:{${terms} type:field, field:${Z_num_i} }" + + ", f2:{${terms} type:field, field:${Z_num_l} }" + "}" ) , "facets=={count:6 " + ",u:3" + + ",u2:3" + + ",min1:" + Integer.MIN_VALUE + + ",max1:" + Integer.MAX_VALUE + + ",min2:" + Long.MIN_VALUE + + ",max2:" + Long.MAX_VALUE + ",f1:{ buckets:[{val:" + Integer.MIN_VALUE + ",count:1},{val:0,count:1},{val:" + Integer.MAX_VALUE+",count:1}]} " + + ",f2:{ buckets:[{val:" + Long.MIN_VALUE + ",count:1},{val:0,count:1},{val:" + Long.MAX_VALUE+",count:1}]} " + "}" ); @@ -1394,11 +1421,12 @@ public class TestJsonFacets extends SolrTestCaseHS { // test acc reuse (i.e. reset() method). This is normally used for stats that are not calculated in the first phase, // currently non-sorting stats. client.testJQ(params(p, "q", "*:*" - , "json.facet", "{f1:{type:terms, field:'${cat_s}', facet:{h:'hll(${where_s})' , u:'unique(${where_s})', mind:'min(${num_d})', maxd:'max(${num_d})', sumd:'sum(${num_d})', avgd:'avg(${num_d})', variance:'variance(${num_d})', stddev:'stddev(${num_d})' } }}" + , "json.facet", "{f1:{type:terms, field:'${cat_s}', facet:{h:'hll(${where_s})' , u:'unique(${where_s})', mind:'min(${num_d})', maxd:'max(${num_d})', mini:'min(${num_i})', maxi:'max(${num_i})'" + + ", sumd:'sum(${num_d})', avgd:'avg(${num_d})', variance:'variance(${num_d})', stddev:'stddev(${num_d})' } }}" ) , "facets=={ 'count':6, " + - "'f1':{ buckets:[{val:B, count:3, h:2, u:2, mind:-9.0, maxd:11.0, sumd:-3.0, avgd:-1.0, variance:74.66666666666667, stddev:8.640987597877148}," + - " {val:A, count:2, h:2, u:2, mind:2.0, maxd:4.0, sumd:6.0, avgd:3.0, variance:1.0, stddev:1.0}] } } " + "'f1':{ buckets:[{val:B, count:3, h:2, u:2, mind:-9.0, maxd:11.0, mini:-5, maxi:7, sumd:-3.0, avgd:-1.0, variance:74.66666666666667, stddev:8.640987597877148}," + + " {val:A, count:2, h:2, u:2, mind:2.0, maxd:4.0, mini:2, maxi:3, sumd:6.0, avgd:3.0, variance:1.0, stddev:1.0}] } } " );