From 558d85f892593de743c33ba057eb29de47765146 Mon Sep 17 00:00:00 2001 From: Yonik Seeley Date: Thu, 23 Apr 2015 19:35:09 +0000 Subject: [PATCH] SOLR-7387: fix distrib terms facet sorting buckets by min,max,avg,unique git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1675707 13f79535-47bb-0310-9956-ffa450edef68 --- solr/CHANGES.txt | 3 + .../org/apache/solr/search/facet/AvgAgg.java | 36 ++++--- .../apache/solr/search/facet/FacetModule.java | 18 ++-- .../org/apache/solr/search/facet/MaxAgg.java | 28 ++--- .../org/apache/solr/search/facet/MinAgg.java | 28 ++--- .../org/apache/solr/search/facet/SumAgg.java | 15 ++- .../apache/solr/search/facet/SumsqAgg.java | 2 +- .../apache/solr/search/facet/UniqueAgg.java | 80 ++++++++------ .../solr/search/facet/TestJsonFacets.java | 100 +++++++++++++++++- 9 files changed, 221 insertions(+), 89 deletions(-) diff --git a/solr/CHANGES.txt b/solr/CHANGES.txt index 2b44dfb5fbb..b0ec119e949 100644 --- a/solr/CHANGES.txt +++ b/solr/CHANGES.txt @@ -217,6 +217,9 @@ Other Changes * SOLR-7081: Add new test case to test if create/delete/re-create collections work. (Christine Poerschke via Ramkumar Aiyengar) +* SOLR-7387: Facet Module - distributed search didn't work when sorting terms + facet by min, max, avg, or unique functions. (yonik) + ================== 5.1.0 ================== Consult the LUCENE_CHANGES.txt file for additional, low level, changes in this release diff --git a/solr/core/src/java/org/apache/solr/search/facet/AvgAgg.java b/solr/core/src/java/org/apache/solr/search/facet/AvgAgg.java index 0de1e0f3d58..4343ce7666f 100644 --- a/solr/core/src/java/org/apache/solr/search/facet/AvgAgg.java +++ b/solr/core/src/java/org/apache/solr/search/facet/AvgAgg.java @@ -35,21 +35,25 @@ public class AvgAgg extends SimpleAggValueSource { @Override public FacetMerger createFacetMerger(Object prototype) { - return new FacetMerger() { - long num; - double sum; - - @Override - public void merge(Object facetResult) { - List numberList = (List)facetResult; - num += numberList.get(0).longValue(); - sum += numberList.get(1).doubleValue(); - } - - @Override - public Object getMergedResult() { - return num==0 ? 0.0d : sum/num; - } - }; + return new Merger(); } + + private static class Merger extends FacetDoubleMerger { + long num; + double sum; + + @Override + public void merge(Object facetResult) { + List numberList = (List)facetResult; + num += numberList.get(0).longValue(); + sum += numberList.get(1).doubleValue(); + } + + @Override + protected double getDouble() { + // TODO: is it worth to try and cache? + return num==0 ? 0.0d : sum/num; + } + + }; } diff --git a/solr/core/src/java/org/apache/solr/search/facet/FacetModule.java b/solr/core/src/java/org/apache/solr/search/facet/FacetModule.java index 38a84c767c7..0367c9c91a4 100644 --- a/solr/core/src/java/org/apache/solr/search/facet/FacetModule.java +++ b/solr/core/src/java/org/apache/solr/search/facet/FacetModule.java @@ -242,22 +242,21 @@ abstract class FacetSortableMerger extends FacetMerger { public abstract int compareTo(FacetSortableMerger other, FacetField.SortDirection direction); } -class FacetDoubleMerger extends FacetSortableMerger { - double val; - +abstract class FacetDoubleMerger extends FacetSortableMerger { @Override - public void merge(Object facetResult) { - val += ((Number)facetResult).doubleValue(); - } + public abstract void merge(Object facetResult); + + protected abstract double getDouble(); @Override public Object getMergedResult() { - return val; + return getDouble(); } + @Override public int compareTo(FacetSortableMerger other, FacetField.SortDirection direction) { - return compare(val, ((FacetDoubleMerger)other).val, direction); + return compare(getDouble(), ((FacetDoubleMerger)other).getDouble(), direction); } @@ -282,6 +281,9 @@ class FacetDoubleMerger extends FacetSortableMerger { } + + + class FacetLongMerger extends FacetSortableMerger { long val; diff --git a/solr/core/src/java/org/apache/solr/search/facet/MaxAgg.java b/solr/core/src/java/org/apache/solr/search/facet/MaxAgg.java index c15ee4e4f02..385902a027a 100644 --- a/solr/core/src/java/org/apache/solr/search/facet/MaxAgg.java +++ b/solr/core/src/java/org/apache/solr/search/facet/MaxAgg.java @@ -34,22 +34,24 @@ public class MaxAgg extends SimpleAggValueSource { @Override public FacetMerger createFacetMerger(Object prototype) { - return new FacetMerger() { - double val = Double.NaN; + return new Merger(); + } - @Override - public void merge(Object facetResult) { - double result = ((Number)facetResult).doubleValue(); - if (result > val || Double.isNaN(val)) { - val = result; - } - } + private static class Merger extends FacetDoubleMerger { + double val = Double.NaN; - @Override - public Object getMergedResult() { - return val; + @Override + public void merge(Object facetResult) { + double result = ((Number)facetResult).doubleValue(); + if (result > val || Double.isNaN(val)) { + val = result; } - }; + } + + @Override + protected double getDouble() { + return val; + } } diff --git a/solr/core/src/java/org/apache/solr/search/facet/MinAgg.java b/solr/core/src/java/org/apache/solr/search/facet/MinAgg.java index 0cf725235ee..f747dfe6f62 100644 --- a/solr/core/src/java/org/apache/solr/search/facet/MinAgg.java +++ b/solr/core/src/java/org/apache/solr/search/facet/MinAgg.java @@ -33,21 +33,23 @@ public class MinAgg extends SimpleAggValueSource { @Override public FacetMerger createFacetMerger(Object prototype) { - return new FacetMerger() { - double val = Double.NaN; + return new Merger(); + } - @Override - public void merge(Object facetResult) { - double result = ((Number)facetResult).doubleValue(); - if (result < val || Double.isNaN(val)) { - val = result; - } - } + private static class Merger extends FacetDoubleMerger { + double val = Double.NaN; - @Override - public Object getMergedResult() { - return val; + @Override + public void merge(Object facetResult) { + double result = ((Number)facetResult).doubleValue(); + if (result < val || Double.isNaN(val)) { + val = result; } - }; + } + + @Override + protected double getDouble() { + return val; + } } } diff --git a/solr/core/src/java/org/apache/solr/search/facet/SumAgg.java b/solr/core/src/java/org/apache/solr/search/facet/SumAgg.java index 16070c73501..41f41f2b78d 100644 --- a/solr/core/src/java/org/apache/solr/search/facet/SumAgg.java +++ b/solr/core/src/java/org/apache/solr/search/facet/SumAgg.java @@ -34,7 +34,20 @@ public class SumAgg extends SimpleAggValueSource { @Override public FacetMerger createFacetMerger(Object prototype) { - return new FacetDoubleMerger(); + return new Merger(); + } + + public static class Merger extends FacetDoubleMerger { + double val; + + @Override + public void merge(Object facetResult) { + val += ((Number)facetResult).doubleValue(); + } + + protected double getDouble() { + return val; + } } } diff --git a/solr/core/src/java/org/apache/solr/search/facet/SumsqAgg.java b/solr/core/src/java/org/apache/solr/search/facet/SumsqAgg.java index 8373baa119b..f511351304e 100644 --- a/solr/core/src/java/org/apache/solr/search/facet/SumsqAgg.java +++ b/solr/core/src/java/org/apache/solr/search/facet/SumsqAgg.java @@ -33,6 +33,6 @@ public class SumsqAgg extends SimpleAggValueSource { @Override public FacetMerger createFacetMerger(Object prototype) { - return new FacetDoubleMerger(); + return new SumAgg.Merger(); } } diff --git a/solr/core/src/java/org/apache/solr/search/facet/UniqueAgg.java b/solr/core/src/java/org/apache/solr/search/facet/UniqueAgg.java index 8b18ccf4bd8..131db3d53fe 100644 --- a/solr/core/src/java/org/apache/solr/search/facet/UniqueAgg.java +++ b/solr/core/src/java/org/apache/solr/search/facet/UniqueAgg.java @@ -63,42 +63,54 @@ public class UniqueAgg extends StrAggValueSource { @Override public FacetMerger createFacetMerger(Object prototype) { - return new FacetMerger() { - long sumUnique; - Set values; - int shardsMissing; - long shardsMissingSum; - long shardsMissingMax; - - @Override - public void merge(Object facetResult) { - SimpleOrderedMap map = (SimpleOrderedMap)facetResult; - long unique = ((Number)map.get("unique")).longValue(); - sumUnique += unique; - - List vals = (List)map.get("vals"); - if (vals != null) { - if (values == null) { - values = new HashSet<>(vals.size()*4); - } - values.addAll(vals); - } else { - shardsMissing++; - shardsMissingSum += unique; - shardsMissingMax = Math.max(shardsMissingMax, unique); - } - - // TODO: somehow get & use the count in the bucket? - } - - @Override - public Object getMergedResult() { - long exactCount = values == null ? 0 : values.size(); - return exactCount + shardsMissingSum; - } - }; + return new Merger(); } + private static class Merger extends FacetSortableMerger { + long sumUnique; + Set values; + int shardsMissing; + long shardsMissingSum; + long shardsMissingMax; + + @Override + public void merge(Object facetResult) { + SimpleOrderedMap map = (SimpleOrderedMap)facetResult; + long unique = ((Number)map.get("unique")).longValue(); + sumUnique += unique; + + List vals = (List)map.get("vals"); + if (vals != null) { + if (values == null) { + values = new HashSet<>(vals.size()*4); + } + values.addAll(vals); + } else { + shardsMissing++; + shardsMissingSum += unique; + shardsMissingMax = Math.max(shardsMissingMax, unique); + } + + // TODO: somehow get & use the count in the bucket? + } + + private long getLong() { + long exactCount = values == null ? 0 : values.size(); + return exactCount + shardsMissingSum; + } + + @Override + public Object getMergedResult() { + return getLong(); + } + + @Override + public int compareTo(FacetSortableMerger other, FacetField.SortDirection direction) { + return Long.compare( getLong(), ((Merger)other).getLong() ); + } + } + + static class LongSet { diff --git a/solr/core/src/test/org/apache/solr/search/facet/TestJsonFacets.java b/solr/core/src/test/org/apache/solr/search/facet/TestJsonFacets.java index 1690890c0cd..e8ed066ad30 100644 --- a/solr/core/src/test/org/apache/solr/search/facet/TestJsonFacets.java +++ b/solr/core/src/test/org/apache/solr/search/facet/TestJsonFacets.java @@ -22,7 +22,9 @@ import java.util.ArrayList; import java.util.Arrays; import java.util.Collections; import java.util.Comparator; +import java.util.HashMap; import java.util.List; +import java.util.Map; import java.util.Random; import com.tdunning.math.stats.AVLTreeDigest; @@ -250,6 +252,38 @@ public class TestJsonFacets extends SolrTestCaseHS { ); } + Map suffixMap = new HashMap<>(); + { + suffixMap.put("_s", new String[]{"_s","_ss","_sd","_sds"} ); + suffixMap.put("_ss", new String[]{"_ss","_sds"} ); + suffixMap.put("_l", new String[]{"_l","_ls","_ld","_lds"} ); + suffixMap.put("_ls", new String[]{"_ls","_lds"} ); + suffixMap.put("_i", new String[]{"_i","_is","_id","_ids", "_l","_ls","_ld","_lds"} ); + suffixMap.put("_is", new String[]{"_is","_ids", "_ls","_lds"} ); + suffixMap.put("_d", new String[]{"_d","_ds","_dd","_dds"} ); + suffixMap.put("_ds", new String[]{"_ds","_dds"} ); + suffixMap.put("_f", new String[]{"_f","_fs","_fd","_fds", "_d","_ds","_dd","_dds"} ); + suffixMap.put("_fs", new String[]{"_fs","_fds","_ds","_dds"} ); + suffixMap.put("_dt", new String[]{"_dt","_dts","_dtd","_dtds"} ); + suffixMap.put("_dts", new String[]{"_dts","_dtds"} ); + suffixMap.put("_b", new String[]{"_b"} ); + } + + List getAlternatives(String field) { + int idx = field.lastIndexOf("_"); + if (idx<=0 || idx>=field.length()) return Collections.singletonList(field); + String suffix = field.substring(idx); + String[] alternativeSuffixes = suffixMap.get(suffix); + if (alternativeSuffixes == null) return Collections.singletonList(field); + String base = field.substring(0, idx); + List out = new ArrayList<>(alternativeSuffixes.length); + for (String altS : alternativeSuffixes) { + out.add( base + altS ); + } + Collections.shuffle(out, random()); + return out; + } + @Test public void testStats() throws Exception { // single valued strings @@ -257,11 +291,45 @@ public class TestJsonFacets extends SolrTestCaseHS { } public void doStats(Client client, ModifiableSolrParams p) throws Exception { + + Map> fieldLists = new HashMap<>(); + fieldLists.put("noexist", getAlternatives("noexist_s")); + fieldLists.put("cat_s", getAlternatives("cat_s")); + fieldLists.put("where_s", getAlternatives("where_s")); + fieldLists.put("num_d", getAlternatives("num_f")); // num_d name is historical, which is why we map it to num_f alternatives so we can include floats as well + fieldLists.put("num_i", getAlternatives("num_i")); + fieldLists.put("super_s", getAlternatives("super_s")); + fieldLists.put("val_b", getAlternatives("val_b")); + fieldLists.put("date", getAlternatives("date_dt")); + fieldLists.put("sparse_s", getAlternatives("sparse_s")); + fieldLists.put("multi_ss", getAlternatives("multi_ss")); + + // TODO: if a field will be used as a function source, we can't use multi-valued types for it (currently) + + int maxAlt = 0; + for (List fieldList : fieldLists.values()) { + maxAlt = Math.max(fieldList.size(), maxAlt); + } + + // take the field with the maximum number of alternative types and loop through our variants that many times + for (int i=0; i alts = fieldLists.get(field); + String alt = alts.get( i % alts.size() ); + args.add(field, alt); + } + + args.set("rows","0"); + // doStatsTemplated(client, args); + } + + // single valued strings doStatsTemplated(client, params(p, "rows","0", "noexist","noexist_s", "cat_s","cat_s", "where_s","where_s", "num_d","num_d", "num_i","num_i", "super_s","super_s", "val_b","val_b", "date","date_dt", "sparse_s","sparse_s" ,"multi_ss","multi_ss") ); - // multi-valued strings - doStatsTemplated(client, params(p, "facet","true", "rows","0", "noexist","noexist_ss", "cat_s","cat_ss", "where_s","where_ss", "num_d","num_d", "num_i","num_i", "super_s","super_ss", "val_b","val_b", "date","date_dt", "sparse_s","sparse_ss", "multi_ss","multi_ss") ); + // multi-valued strings, long/float substitute for int/double + doStatsTemplated(client, params(p, "facet","true", "rows","0", "noexist","noexist_ss", "cat_s","cat_ss", "where_s","where_ss", "num_d","num_f", "num_i","num_l", "super_s","super_ss", "val_b","val_b", "date","date_dt", "sparse_s","sparse_ss", "multi_ss","multi_ss") ); // single valued docvalues for strings, and single valued numeric doc values for numeric fields doStatsTemplated(client, params(p, "rows","0", "noexist","noexist_sd", "cat_s","cat_sd", "where_s","where_sd", "num_d","num_dd", "num_i","num_id", "super_s","super_sd", "val_b","val_b", "date","date_dtd", "sparse_s","sparse_sd" ,"multi_ss","multi_sds") ); @@ -372,7 +440,7 @@ public class TestJsonFacets extends SolrTestCaseHS { "'f1':{ allBuckets:{ 'count':1, n1:4.0}, 'buckets':[{ 'val':'A', 'count':1, n1:4.0}, { 'val':'B', 'count':0 /*, n1:0.0 */ }]} } " ); - // test sorting by stat + // test sorting by other stats client.testJQ(params(p, "q", "*:*" , "json.facet", "{f1:{terms:{field:'${cat_s}', sort:'n1 desc', facet:{n1:'sum(${num_d})'} }}" + " , f2:{terms:{field:'${cat_s}', sort:'n1 asc', facet:{n1:'sum(${num_d})'} }} }" @@ -382,6 +450,32 @@ public class TestJsonFacets extends SolrTestCaseHS { ", f2:{ 'buckets':[{ val:'B', count:3, n1:-3.0}, { val:'A', count:2, n1:6.0 }]} }" ); + // test sorting by other stats + client.testJQ(params(p, "q", "*:*" + , "json.facet", "{f1:{type:terms, field:'${cat_s}', sort:'x desc', facet:{x:'min(${num_d})'} }" + + " , f2:{type:terms, field:'${cat_s}', sort:'x desc', facet:{x:'max(${num_d})'} } " + + " , f3:{type:terms, field:'${cat_s}', sort:'x desc', facet:{x:'unique(${where_s})'} } " + + "}" + ) + , "facets=={ 'count':6, " + + " f1:{ 'buckets':[{ val:'A', count:2, x:2.0 }, { val:'B', count:3, x:-9.0}]}" + + ", f2:{ 'buckets':[{ val:'B', count:3, x:11.0 }, { val:'A', count:2, x:4.0 }]} " + + ", f3:{ 'buckets':[{ val:'A', count:2, x:2 }, { val:'B', count:3, x:2 }]} " + + "}" + ); + + // test sorting by stat with function + client.testJQ(params(p, "q", "*:*" + , "json.facet", "{f1:{terms:{field:'${cat_s}', sort:'n1 desc', facet:{n1:'avg(add(${num_d},${num_d}))'} }}" + + " , f2:{terms:{field:'${cat_s}', sort:'n1 asc', facet:{n1:'avg(add(${num_d},${num_d}))'} }} }" + ) + , "facets=={ 'count':6, " + + " f1:{ 'buckets':[{ val:'A', count:2, n1:6.0 }, { val:'B', count:3, n1:-2.0}]}" + + ", f2:{ 'buckets':[{ val:'B', count:3, n1:-2.0}, { val:'A', count:2, n1:6.0 }]} }" + ); + + + // percentiles 0,10,50,90,100 // catA: 2.0 2.2 3.0 3.8 4.0 // catB: -9.0 -8.2 -5.0 7.800000000000001 11.0