diff --git a/solr/CHANGES.txt b/solr/CHANGES.txt index 82ac5588b47..0af4aab2fd6 100644 --- a/solr/CHANGES.txt +++ b/solr/CHANGES.txt @@ -131,6 +131,9 @@ New Features * SOLR-2335: New 'field("...")' function syntax for refering to complex field names (containing whitespace or special characters) in functions. +* SOLR-1709: Distributed support for Date and Numeric Range Faceting + (Peter Sturge, David Smiley, hossman) + Optimizations ---------------------- diff --git a/solr/src/java/org/apache/solr/handler/component/FacetComponent.java b/solr/src/java/org/apache/solr/handler/component/FacetComponent.java index 90e0d9b57a9..3a5ee6149bd 100644 --- a/solr/src/java/org/apache/solr/handler/component/FacetComponent.java +++ b/solr/src/java/org/apache/solr/handler/component/FacetComponent.java @@ -17,23 +17,23 @@ package org.apache.solr.handler.component; -import java.io.IOException; -import java.net.URL; -import java.util.*; - +import org.apache.lucene.queryParser.ParseException; +import org.apache.lucene.util.OpenBitSet; +import org.apache.solr.common.SolrException; import org.apache.solr.common.params.CommonParams; import org.apache.solr.common.params.FacetParams; -import org.apache.solr.common.params.SolrParams; import org.apache.solr.common.params.ModifiableSolrParams; +import org.apache.solr.common.params.SolrParams; import org.apache.solr.common.util.NamedList; import org.apache.solr.common.util.SimpleOrderedMap; import org.apache.solr.common.util.StrUtils; -import org.apache.solr.common.SolrException; import org.apache.solr.request.SimpleFacets; -import org.apache.lucene.util.OpenBitSet; -import org.apache.solr.search.QueryParsing; import org.apache.solr.schema.FieldType; -import org.apache.lucene.queryParser.ParseException; +import org.apache.solr.search.QueryParsing; + +import java.io.IOException; +import java.net.URL; +import java.util.*; /** * TODO! @@ -312,8 +312,95 @@ public class FacetComponent extends SearchComponent dff.add(shardNum, (NamedList)facet_fields.get(dff.getKey()), dff.initialLimit); } } - } + // Distributed facet_dates + // + // The implementation below uses the first encountered shard's + // facet_dates as the basis for subsequent shards' data to be merged. + // (the "NOW" param should ensure consistency) + @SuppressWarnings("unchecked") + SimpleOrderedMap> facet_dates = + (SimpleOrderedMap>) + facet_counts.get("facet_dates"); + + if (facet_dates != null) { + + // go through each facet_date + for (Map.Entry> entry : facet_dates) { + final String field = entry.getKey(); + if (fi.dateFacets.get(field) == null) { + // first time we've seen this field, no merging + fi.dateFacets.add(field, entry.getValue()); + + } else { + // not the first time, merge current field + + SimpleOrderedMap shardFieldValues + = entry.getValue(); + SimpleOrderedMap existFieldValues + = fi.dateFacets.get(field); + + for (Map.Entry existPair : existFieldValues) { + final String key = existPair.getKey(); + if (key.equals("gap") || + key.equals("end") || + key.equals("start")) { + // we can skip these, must all be the same across shards + continue; + } + // can be null if inconsistencies in shards responses + Integer newValue = (Integer) shardFieldValues.get(key); + if (null != newValue) { + Integer oldValue = ((Integer) existPair.getValue()); + existPair.setValue(oldValue + newValue); + } + } + } + } + } + + // Distributed facet_ranges + // + // The implementation below uses the first encountered shard's + // facet_ranges as the basis for subsequent shards' data to be merged. + @SuppressWarnings("unchecked") + SimpleOrderedMap> facet_ranges = + (SimpleOrderedMap>) + facet_counts.get("facet_ranges"); + + if (facet_ranges != null) { + + // go through each facet_range + for (Map.Entry> entry : facet_ranges) { + final String field = entry.getKey(); + if (fi.rangeFacets.get(field) == null) { + // first time we've seen this field, no merging + fi.rangeFacets.add(field, entry.getValue()); + + } else { + // not the first time, merge current field counts + + @SuppressWarnings("unchecked") + NamedList shardFieldValues + = (NamedList) entry.getValue().get("counts"); + + @SuppressWarnings("unchecked") + NamedList existFieldValues + = (NamedList) fi.rangeFacets.get(field).get("counts"); + + for (Map.Entry existPair : existFieldValues) { + final String key = existPair.getKey(); + // can be null if inconsistencies in shards responses + Integer newValue = shardFieldValues.get(key); + if (null != newValue) { + Integer oldValue = existPair.getValue(); + existPair.setValue(oldValue + newValue); + } + } + } + } + } + } // // This code currently assumes that there will be only a single @@ -487,9 +574,8 @@ public class FacetComponent extends SearchComponent } } - // TODO: facet dates & numbers - facet_counts.add("facet_dates", new SimpleOrderedMap()); - facet_counts.add("facet_ranges", new SimpleOrderedMap()); + facet_counts.add("facet_dates", fi.dateFacets); + facet_counts.add("facet_ranges", fi.rangeFacets); rb.rsp.add("facet_counts", facet_counts); @@ -541,8 +627,14 @@ public class FacetComponent extends SearchComponent * This API is experimental and subject to change */ public static class FacetInfo { + public LinkedHashMap queryFacets; public LinkedHashMap facets; + public SimpleOrderedMap> dateFacets + = new SimpleOrderedMap>(); + public SimpleOrderedMap> rangeFacets + = new SimpleOrderedMap>(); + public List exceptionList; void parse(SolrParams params, ResponseBuilder rb) { diff --git a/solr/src/test/org/apache/solr/TestDistributedSearch.java b/solr/src/test/org/apache/solr/TestDistributedSearch.java index 066a38a3c7e..9b31d391232 100755 --- a/solr/src/test/org/apache/solr/TestDistributedSearch.java +++ b/solr/src/test/org/apache/solr/TestDistributedSearch.java @@ -40,7 +40,8 @@ public class TestDistributedSearch extends BaseDistributedSearchTestCase { String nlong = "n_l"; String tlong = "other_tl1"; String ndate = "n_dt"; - String tdate = "n_tdt"; + String tdate_a = "a_n_tdt"; + String tdate_b = "b_n_tdt"; String oddField="oddField_s"; String missingField="ignore_exception__missing_but_valid_field_t"; @@ -52,24 +53,36 @@ public class TestDistributedSearch extends BaseDistributedSearchTestCase { del("*:*"); - indexr(id,1, i1, 100, tlong, 100,t1,"now is the time for all good men" - ,"foo_f", 1.414f, "foo_b", "true", "foo_d", 1.414d); - indexr(id,2, i1, 50 , tlong, 50,t1,"to come to the aid of their country." - ); - indexr(id,3, i1, 2, tlong, 2,t1,"how now brown cow" - ); - indexr(id,4, i1, -100 ,tlong, 101,t1,"the quick fox jumped over the lazy dog" - ); - indexr(id,5, i1, 500, tlong, 500 ,t1,"the quick fox jumped way over the lazy dog" - ); + indexr(id,1, i1, 100, tlong, 100,t1,"now is the time for all good men", + tdate_a, "2010-04-20T11:00:00Z", + tdate_b, "2009-08-20T11:00:00Z", + "foo_f", 1.414f, "foo_b", "true", "foo_d", 1.414d); + indexr(id,2, i1, 50 , tlong, 50,t1,"to come to the aid of their country.", + tdate_a, "2010-05-02T11:00:00Z", + tdate_b, "2009-11-02T11:00:00Z"); + indexr(id,3, i1, 2, tlong, 2,t1,"how now brown cow", + tdate_a, "2010-05-03T11:00:00Z"); + indexr(id,4, i1, -100 ,tlong, 101, + t1,"the quick fox jumped over the lazy dog", + tdate_a, "2010-05-03T11:00:00Z", + tdate_b, "2010-05-03T11:00:00Z"); + indexr(id,5, i1, 500, tlong, 500 , + t1,"the quick fox jumped way over the lazy dog", + tdate_a, "2010-05-05T11:00:00Z"); indexr(id,6, i1, -600, tlong, 600 ,t1,"humpty dumpy sat on a wall"); indexr(id,7, i1, 123, tlong, 123 ,t1,"humpty dumpy had a great fall"); - indexr(id,8, i1, 876, tlong, 876,t1,"all the kings horses and all the kings men"); + indexr(id,8, i1, 876, tlong, 876, + tdate_b, "2010-01-05T11:00:00Z", + t1,"all the kings horses and all the kings men"); indexr(id,9, i1, 7, tlong, 7,t1,"couldn't put humpty together again"); indexr(id,10, i1, 4321, tlong, 4321,t1,"this too shall pass"); - indexr(id,11, i1, -987, tlong, 987,t1,"An eye for eye only ends up making the whole world blind."); - indexr(id,12, i1, 379, tlong, 379,t1,"Great works are performed, not by strength, but by perseverance."); - indexr(id,13, i1, 232, tlong, 232,t1,"no eggs on wall, lesson learned", oddField, "odd man out"); + indexr(id,11, i1, -987, tlong, 987, + t1,"An eye for eye only ends up making the whole world blind."); + indexr(id,12, i1, 379, tlong, 379, + t1,"Great works are performed, not by strength, but by perseverance."); + indexr(id,13, i1, 232, tlong, 232, + t1,"no eggs on wall, lesson learned", + oddField, "odd man out"); indexr(id, 14, "SubjectTerms_mfacet", new String[] {"mathematical models", "mathematical analysis"}); indexr(id, 15, "SubjectTerms_mfacet", new String[] {"test 1", "test 2", "test3"}); @@ -140,6 +153,43 @@ public class TestDistributedSearch extends BaseDistributedSearchTestCase { query("q","*:*", "rows",0, "facet","true", "facet.query","quick", "facet.query","all", "facet.query","*:*"); query("q","*:*", "rows",0, "facet","true", "facet.field",t1, "facet.mincount",2); + // simple date facet on one field + query("q","*:*", "rows",100, "facet","true", + "facet.date",tdate_a, + "facet.date.other", "all", + "facet.date.start","2010-05-01T11:00:00Z", + "facet.date.gap","+1DAY", + "facet.date.end","2010-05-20T11:00:00Z"); + + // date facet on multiple fields + query("q","*:*", "rows",100, "facet","true", + "facet.date",tdate_a, + "facet.date",tdate_b, + "facet.date.other", "all", + "f."+tdate_b+".facet.date.start","2009-05-01T11:00:00Z", + "f."+tdate_b+".facet.date.gap","+3MONTHS", + "facet.date.start","2010-05-01T11:00:00Z", + "facet.date.gap","+1DAY", + "facet.date.end","2010-05-20T11:00:00Z"); + + // simple range facet on one field + query("q","*:*", "rows",100, "facet","true", + "facet.range",tlong, + "facet.range.start",200, + "facet.range.gap",100, + "facet.range.end",900); + + // range facet on multiple fields + query("q","*:*", "rows",100, "facet","true", + "facet.range",tlong, + "facet.range",i1, + "f."+i1+".facet.range.start",300, + "f."+i1+".facet.range.gap",87, + "facet.range.end",900, + "facet.range.start",200, + "facet.range.gap",100, + "f."+tlong+".facet.range.end",900); + stress=0; // turn off stress... we want to tex max combos in min time for (int i=0; i<25*RANDOM_MULTIPLIER; i++) { String f = fieldNames[random.nextInt(fieldNames.length)];