SOLR-1709: Distributed support for Date and Numeric Range Faceting

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1095517 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Chris M. Hostetter 2011-04-20 21:29:13 +00:00
parent 1773808025
commit affaa37983
3 changed files with 173 additions and 28 deletions

View File

@ -131,6 +131,9 @@ New Features
* SOLR-2335: New 'field("...")' function syntax for refering to complex
field names (containing whitespace or special characters) in functions.
* SOLR-1709: Distributed support for Date and Numeric Range Faceting
(Peter Sturge, David Smiley, hossman)
Optimizations
----------------------

View File

@ -17,23 +17,23 @@
package org.apache.solr.handler.component;
import java.io.IOException;
import java.net.URL;
import java.util.*;
import org.apache.lucene.queryParser.ParseException;
import org.apache.lucene.util.OpenBitSet;
import org.apache.solr.common.SolrException;
import org.apache.solr.common.params.CommonParams;
import org.apache.solr.common.params.FacetParams;
import org.apache.solr.common.params.SolrParams;
import org.apache.solr.common.params.ModifiableSolrParams;
import org.apache.solr.common.params.SolrParams;
import org.apache.solr.common.util.NamedList;
import org.apache.solr.common.util.SimpleOrderedMap;
import org.apache.solr.common.util.StrUtils;
import org.apache.solr.common.SolrException;
import org.apache.solr.request.SimpleFacets;
import org.apache.lucene.util.OpenBitSet;
import org.apache.solr.search.QueryParsing;
import org.apache.solr.schema.FieldType;
import org.apache.lucene.queryParser.ParseException;
import org.apache.solr.search.QueryParsing;
import java.io.IOException;
import java.net.URL;
import java.util.*;
/**
* TODO!
@ -312,8 +312,95 @@ public class FacetComponent extends SearchComponent
dff.add(shardNum, (NamedList)facet_fields.get(dff.getKey()), dff.initialLimit);
}
}
}
// Distributed facet_dates
//
// The implementation below uses the first encountered shard's
// facet_dates as the basis for subsequent shards' data to be merged.
// (the "NOW" param should ensure consistency)
@SuppressWarnings("unchecked")
SimpleOrderedMap<SimpleOrderedMap<Object>> facet_dates =
(SimpleOrderedMap<SimpleOrderedMap<Object>>)
facet_counts.get("facet_dates");
if (facet_dates != null) {
// go through each facet_date
for (Map.Entry<String,SimpleOrderedMap<Object>> entry : facet_dates) {
final String field = entry.getKey();
if (fi.dateFacets.get(field) == null) {
// first time we've seen this field, no merging
fi.dateFacets.add(field, entry.getValue());
} else {
// not the first time, merge current field
SimpleOrderedMap<Object> shardFieldValues
= entry.getValue();
SimpleOrderedMap<Object> existFieldValues
= fi.dateFacets.get(field);
for (Map.Entry<String,Object> existPair : existFieldValues) {
final String key = existPair.getKey();
if (key.equals("gap") ||
key.equals("end") ||
key.equals("start")) {
// we can skip these, must all be the same across shards
continue;
}
// can be null if inconsistencies in shards responses
Integer newValue = (Integer) shardFieldValues.get(key);
if (null != newValue) {
Integer oldValue = ((Integer) existPair.getValue());
existPair.setValue(oldValue + newValue);
}
}
}
}
}
// Distributed facet_ranges
//
// The implementation below uses the first encountered shard's
// facet_ranges as the basis for subsequent shards' data to be merged.
@SuppressWarnings("unchecked")
SimpleOrderedMap<SimpleOrderedMap<Object>> facet_ranges =
(SimpleOrderedMap<SimpleOrderedMap<Object>>)
facet_counts.get("facet_ranges");
if (facet_ranges != null) {
// go through each facet_range
for (Map.Entry<String,SimpleOrderedMap<Object>> entry : facet_ranges) {
final String field = entry.getKey();
if (fi.rangeFacets.get(field) == null) {
// first time we've seen this field, no merging
fi.rangeFacets.add(field, entry.getValue());
} else {
// not the first time, merge current field counts
@SuppressWarnings("unchecked")
NamedList<Integer> shardFieldValues
= (NamedList<Integer>) entry.getValue().get("counts");
@SuppressWarnings("unchecked")
NamedList<Integer> existFieldValues
= (NamedList<Integer>) fi.rangeFacets.get(field).get("counts");
for (Map.Entry<String,Integer> existPair : existFieldValues) {
final String key = existPair.getKey();
// can be null if inconsistencies in shards responses
Integer newValue = shardFieldValues.get(key);
if (null != newValue) {
Integer oldValue = existPair.getValue();
existPair.setValue(oldValue + newValue);
}
}
}
}
}
}
//
// This code currently assumes that there will be only a single
@ -487,9 +574,8 @@ public class FacetComponent extends SearchComponent
}
}
// TODO: facet dates & numbers
facet_counts.add("facet_dates", new SimpleOrderedMap());
facet_counts.add("facet_ranges", new SimpleOrderedMap());
facet_counts.add("facet_dates", fi.dateFacets);
facet_counts.add("facet_ranges", fi.rangeFacets);
rb.rsp.add("facet_counts", facet_counts);
@ -541,8 +627,14 @@ public class FacetComponent extends SearchComponent
* <b>This API is experimental and subject to change</b>
*/
public static class FacetInfo {
public LinkedHashMap<String,QueryFacet> queryFacets;
public LinkedHashMap<String,DistribFieldFacet> facets;
public SimpleOrderedMap<SimpleOrderedMap<Object>> dateFacets
= new SimpleOrderedMap<SimpleOrderedMap<Object>>();
public SimpleOrderedMap<SimpleOrderedMap<Object>> rangeFacets
= new SimpleOrderedMap<SimpleOrderedMap<Object>>();
public List<String> exceptionList;
void parse(SolrParams params, ResponseBuilder rb) {

View File

@ -40,7 +40,8 @@ public class TestDistributedSearch extends BaseDistributedSearchTestCase {
String nlong = "n_l";
String tlong = "other_tl1";
String ndate = "n_dt";
String tdate = "n_tdt";
String tdate_a = "a_n_tdt";
String tdate_b = "b_n_tdt";
String oddField="oddField_s";
String missingField="ignore_exception__missing_but_valid_field_t";
@ -52,24 +53,36 @@ public class TestDistributedSearch extends BaseDistributedSearchTestCase {
del("*:*");
indexr(id,1, i1, 100, tlong, 100,t1,"now is the time for all good men"
,"foo_f", 1.414f, "foo_b", "true", "foo_d", 1.414d);
indexr(id,2, i1, 50 , tlong, 50,t1,"to come to the aid of their country."
);
indexr(id,3, i1, 2, tlong, 2,t1,"how now brown cow"
);
indexr(id,4, i1, -100 ,tlong, 101,t1,"the quick fox jumped over the lazy dog"
);
indexr(id,5, i1, 500, tlong, 500 ,t1,"the quick fox jumped way over the lazy dog"
);
indexr(id,1, i1, 100, tlong, 100,t1,"now is the time for all good men",
tdate_a, "2010-04-20T11:00:00Z",
tdate_b, "2009-08-20T11:00:00Z",
"foo_f", 1.414f, "foo_b", "true", "foo_d", 1.414d);
indexr(id,2, i1, 50 , tlong, 50,t1,"to come to the aid of their country.",
tdate_a, "2010-05-02T11:00:00Z",
tdate_b, "2009-11-02T11:00:00Z");
indexr(id,3, i1, 2, tlong, 2,t1,"how now brown cow",
tdate_a, "2010-05-03T11:00:00Z");
indexr(id,4, i1, -100 ,tlong, 101,
t1,"the quick fox jumped over the lazy dog",
tdate_a, "2010-05-03T11:00:00Z",
tdate_b, "2010-05-03T11:00:00Z");
indexr(id,5, i1, 500, tlong, 500 ,
t1,"the quick fox jumped way over the lazy dog",
tdate_a, "2010-05-05T11:00:00Z");
indexr(id,6, i1, -600, tlong, 600 ,t1,"humpty dumpy sat on a wall");
indexr(id,7, i1, 123, tlong, 123 ,t1,"humpty dumpy had a great fall");
indexr(id,8, i1, 876, tlong, 876,t1,"all the kings horses and all the kings men");
indexr(id,8, i1, 876, tlong, 876,
tdate_b, "2010-01-05T11:00:00Z",
t1,"all the kings horses and all the kings men");
indexr(id,9, i1, 7, tlong, 7,t1,"couldn't put humpty together again");
indexr(id,10, i1, 4321, tlong, 4321,t1,"this too shall pass");
indexr(id,11, i1, -987, tlong, 987,t1,"An eye for eye only ends up making the whole world blind.");
indexr(id,12, i1, 379, tlong, 379,t1,"Great works are performed, not by strength, but by perseverance.");
indexr(id,13, i1, 232, tlong, 232,t1,"no eggs on wall, lesson learned", oddField, "odd man out");
indexr(id,11, i1, -987, tlong, 987,
t1,"An eye for eye only ends up making the whole world blind.");
indexr(id,12, i1, 379, tlong, 379,
t1,"Great works are performed, not by strength, but by perseverance.");
indexr(id,13, i1, 232, tlong, 232,
t1,"no eggs on wall, lesson learned",
oddField, "odd man out");
indexr(id, 14, "SubjectTerms_mfacet", new String[] {"mathematical models", "mathematical analysis"});
indexr(id, 15, "SubjectTerms_mfacet", new String[] {"test 1", "test 2", "test3"});
@ -140,6 +153,43 @@ public class TestDistributedSearch extends BaseDistributedSearchTestCase {
query("q","*:*", "rows",0, "facet","true", "facet.query","quick", "facet.query","all", "facet.query","*:*");
query("q","*:*", "rows",0, "facet","true", "facet.field",t1, "facet.mincount",2);
// simple date facet on one field
query("q","*:*", "rows",100, "facet","true",
"facet.date",tdate_a,
"facet.date.other", "all",
"facet.date.start","2010-05-01T11:00:00Z",
"facet.date.gap","+1DAY",
"facet.date.end","2010-05-20T11:00:00Z");
// date facet on multiple fields
query("q","*:*", "rows",100, "facet","true",
"facet.date",tdate_a,
"facet.date",tdate_b,
"facet.date.other", "all",
"f."+tdate_b+".facet.date.start","2009-05-01T11:00:00Z",
"f."+tdate_b+".facet.date.gap","+3MONTHS",
"facet.date.start","2010-05-01T11:00:00Z",
"facet.date.gap","+1DAY",
"facet.date.end","2010-05-20T11:00:00Z");
// simple range facet on one field
query("q","*:*", "rows",100, "facet","true",
"facet.range",tlong,
"facet.range.start",200,
"facet.range.gap",100,
"facet.range.end",900);
// range facet on multiple fields
query("q","*:*", "rows",100, "facet","true",
"facet.range",tlong,
"facet.range",i1,
"f."+i1+".facet.range.start",300,
"f."+i1+".facet.range.gap",87,
"facet.range.end",900,
"facet.range.start",200,
"facet.range.gap",100,
"f."+tlong+".facet.range.end",900);
stress=0; // turn off stress... we want to tex max combos in min time
for (int i=0; i<25*RANDOM_MULTIPLIER; i++) {
String f = fieldNames[random.nextInt(fieldNames.length)];