mirror of https://github.com/apache/lucene.git
SOLR-1709: Distributed support for Date and Numeric Range Faceting
git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1095517 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
1773808025
commit
affaa37983
|
@ -131,6 +131,9 @@ New Features
|
|||
* SOLR-2335: New 'field("...")' function syntax for refering to complex
|
||||
field names (containing whitespace or special characters) in functions.
|
||||
|
||||
* SOLR-1709: Distributed support for Date and Numeric Range Faceting
|
||||
(Peter Sturge, David Smiley, hossman)
|
||||
|
||||
Optimizations
|
||||
----------------------
|
||||
|
||||
|
|
|
@ -17,23 +17,23 @@
|
|||
|
||||
package org.apache.solr.handler.component;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.net.URL;
|
||||
import java.util.*;
|
||||
|
||||
import org.apache.lucene.queryParser.ParseException;
|
||||
import org.apache.lucene.util.OpenBitSet;
|
||||
import org.apache.solr.common.SolrException;
|
||||
import org.apache.solr.common.params.CommonParams;
|
||||
import org.apache.solr.common.params.FacetParams;
|
||||
import org.apache.solr.common.params.SolrParams;
|
||||
import org.apache.solr.common.params.ModifiableSolrParams;
|
||||
import org.apache.solr.common.params.SolrParams;
|
||||
import org.apache.solr.common.util.NamedList;
|
||||
import org.apache.solr.common.util.SimpleOrderedMap;
|
||||
import org.apache.solr.common.util.StrUtils;
|
||||
import org.apache.solr.common.SolrException;
|
||||
import org.apache.solr.request.SimpleFacets;
|
||||
import org.apache.lucene.util.OpenBitSet;
|
||||
import org.apache.solr.search.QueryParsing;
|
||||
import org.apache.solr.schema.FieldType;
|
||||
import org.apache.lucene.queryParser.ParseException;
|
||||
import org.apache.solr.search.QueryParsing;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.net.URL;
|
||||
import java.util.*;
|
||||
|
||||
/**
|
||||
* TODO!
|
||||
|
@ -312,8 +312,95 @@ public class FacetComponent extends SearchComponent
|
|||
dff.add(shardNum, (NamedList)facet_fields.get(dff.getKey()), dff.initialLimit);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Distributed facet_dates
|
||||
//
|
||||
// The implementation below uses the first encountered shard's
|
||||
// facet_dates as the basis for subsequent shards' data to be merged.
|
||||
// (the "NOW" param should ensure consistency)
|
||||
@SuppressWarnings("unchecked")
|
||||
SimpleOrderedMap<SimpleOrderedMap<Object>> facet_dates =
|
||||
(SimpleOrderedMap<SimpleOrderedMap<Object>>)
|
||||
facet_counts.get("facet_dates");
|
||||
|
||||
if (facet_dates != null) {
|
||||
|
||||
// go through each facet_date
|
||||
for (Map.Entry<String,SimpleOrderedMap<Object>> entry : facet_dates) {
|
||||
final String field = entry.getKey();
|
||||
if (fi.dateFacets.get(field) == null) {
|
||||
// first time we've seen this field, no merging
|
||||
fi.dateFacets.add(field, entry.getValue());
|
||||
|
||||
} else {
|
||||
// not the first time, merge current field
|
||||
|
||||
SimpleOrderedMap<Object> shardFieldValues
|
||||
= entry.getValue();
|
||||
SimpleOrderedMap<Object> existFieldValues
|
||||
= fi.dateFacets.get(field);
|
||||
|
||||
for (Map.Entry<String,Object> existPair : existFieldValues) {
|
||||
final String key = existPair.getKey();
|
||||
if (key.equals("gap") ||
|
||||
key.equals("end") ||
|
||||
key.equals("start")) {
|
||||
// we can skip these, must all be the same across shards
|
||||
continue;
|
||||
}
|
||||
// can be null if inconsistencies in shards responses
|
||||
Integer newValue = (Integer) shardFieldValues.get(key);
|
||||
if (null != newValue) {
|
||||
Integer oldValue = ((Integer) existPair.getValue());
|
||||
existPair.setValue(oldValue + newValue);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Distributed facet_ranges
|
||||
//
|
||||
// The implementation below uses the first encountered shard's
|
||||
// facet_ranges as the basis for subsequent shards' data to be merged.
|
||||
@SuppressWarnings("unchecked")
|
||||
SimpleOrderedMap<SimpleOrderedMap<Object>> facet_ranges =
|
||||
(SimpleOrderedMap<SimpleOrderedMap<Object>>)
|
||||
facet_counts.get("facet_ranges");
|
||||
|
||||
if (facet_ranges != null) {
|
||||
|
||||
// go through each facet_range
|
||||
for (Map.Entry<String,SimpleOrderedMap<Object>> entry : facet_ranges) {
|
||||
final String field = entry.getKey();
|
||||
if (fi.rangeFacets.get(field) == null) {
|
||||
// first time we've seen this field, no merging
|
||||
fi.rangeFacets.add(field, entry.getValue());
|
||||
|
||||
} else {
|
||||
// not the first time, merge current field counts
|
||||
|
||||
@SuppressWarnings("unchecked")
|
||||
NamedList<Integer> shardFieldValues
|
||||
= (NamedList<Integer>) entry.getValue().get("counts");
|
||||
|
||||
@SuppressWarnings("unchecked")
|
||||
NamedList<Integer> existFieldValues
|
||||
= (NamedList<Integer>) fi.rangeFacets.get(field).get("counts");
|
||||
|
||||
for (Map.Entry<String,Integer> existPair : existFieldValues) {
|
||||
final String key = existPair.getKey();
|
||||
// can be null if inconsistencies in shards responses
|
||||
Integer newValue = shardFieldValues.get(key);
|
||||
if (null != newValue) {
|
||||
Integer oldValue = existPair.getValue();
|
||||
existPair.setValue(oldValue + newValue);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
//
|
||||
// This code currently assumes that there will be only a single
|
||||
|
@ -487,9 +574,8 @@ public class FacetComponent extends SearchComponent
|
|||
}
|
||||
}
|
||||
|
||||
// TODO: facet dates & numbers
|
||||
facet_counts.add("facet_dates", new SimpleOrderedMap());
|
||||
facet_counts.add("facet_ranges", new SimpleOrderedMap());
|
||||
facet_counts.add("facet_dates", fi.dateFacets);
|
||||
facet_counts.add("facet_ranges", fi.rangeFacets);
|
||||
|
||||
rb.rsp.add("facet_counts", facet_counts);
|
||||
|
||||
|
@ -541,8 +627,14 @@ public class FacetComponent extends SearchComponent
|
|||
* <b>This API is experimental and subject to change</b>
|
||||
*/
|
||||
public static class FacetInfo {
|
||||
|
||||
public LinkedHashMap<String,QueryFacet> queryFacets;
|
||||
public LinkedHashMap<String,DistribFieldFacet> facets;
|
||||
public SimpleOrderedMap<SimpleOrderedMap<Object>> dateFacets
|
||||
= new SimpleOrderedMap<SimpleOrderedMap<Object>>();
|
||||
public SimpleOrderedMap<SimpleOrderedMap<Object>> rangeFacets
|
||||
= new SimpleOrderedMap<SimpleOrderedMap<Object>>();
|
||||
|
||||
public List<String> exceptionList;
|
||||
|
||||
void parse(SolrParams params, ResponseBuilder rb) {
|
||||
|
|
|
@ -40,7 +40,8 @@ public class TestDistributedSearch extends BaseDistributedSearchTestCase {
|
|||
String nlong = "n_l";
|
||||
String tlong = "other_tl1";
|
||||
String ndate = "n_dt";
|
||||
String tdate = "n_tdt";
|
||||
String tdate_a = "a_n_tdt";
|
||||
String tdate_b = "b_n_tdt";
|
||||
|
||||
String oddField="oddField_s";
|
||||
String missingField="ignore_exception__missing_but_valid_field_t";
|
||||
|
@ -52,24 +53,36 @@ public class TestDistributedSearch extends BaseDistributedSearchTestCase {
|
|||
|
||||
|
||||
del("*:*");
|
||||
indexr(id,1, i1, 100, tlong, 100,t1,"now is the time for all good men"
|
||||
,"foo_f", 1.414f, "foo_b", "true", "foo_d", 1.414d);
|
||||
indexr(id,2, i1, 50 , tlong, 50,t1,"to come to the aid of their country."
|
||||
);
|
||||
indexr(id,3, i1, 2, tlong, 2,t1,"how now brown cow"
|
||||
);
|
||||
indexr(id,4, i1, -100 ,tlong, 101,t1,"the quick fox jumped over the lazy dog"
|
||||
);
|
||||
indexr(id,5, i1, 500, tlong, 500 ,t1,"the quick fox jumped way over the lazy dog"
|
||||
);
|
||||
indexr(id,1, i1, 100, tlong, 100,t1,"now is the time for all good men",
|
||||
tdate_a, "2010-04-20T11:00:00Z",
|
||||
tdate_b, "2009-08-20T11:00:00Z",
|
||||
"foo_f", 1.414f, "foo_b", "true", "foo_d", 1.414d);
|
||||
indexr(id,2, i1, 50 , tlong, 50,t1,"to come to the aid of their country.",
|
||||
tdate_a, "2010-05-02T11:00:00Z",
|
||||
tdate_b, "2009-11-02T11:00:00Z");
|
||||
indexr(id,3, i1, 2, tlong, 2,t1,"how now brown cow",
|
||||
tdate_a, "2010-05-03T11:00:00Z");
|
||||
indexr(id,4, i1, -100 ,tlong, 101,
|
||||
t1,"the quick fox jumped over the lazy dog",
|
||||
tdate_a, "2010-05-03T11:00:00Z",
|
||||
tdate_b, "2010-05-03T11:00:00Z");
|
||||
indexr(id,5, i1, 500, tlong, 500 ,
|
||||
t1,"the quick fox jumped way over the lazy dog",
|
||||
tdate_a, "2010-05-05T11:00:00Z");
|
||||
indexr(id,6, i1, -600, tlong, 600 ,t1,"humpty dumpy sat on a wall");
|
||||
indexr(id,7, i1, 123, tlong, 123 ,t1,"humpty dumpy had a great fall");
|
||||
indexr(id,8, i1, 876, tlong, 876,t1,"all the kings horses and all the kings men");
|
||||
indexr(id,8, i1, 876, tlong, 876,
|
||||
tdate_b, "2010-01-05T11:00:00Z",
|
||||
t1,"all the kings horses and all the kings men");
|
||||
indexr(id,9, i1, 7, tlong, 7,t1,"couldn't put humpty together again");
|
||||
indexr(id,10, i1, 4321, tlong, 4321,t1,"this too shall pass");
|
||||
indexr(id,11, i1, -987, tlong, 987,t1,"An eye for eye only ends up making the whole world blind.");
|
||||
indexr(id,12, i1, 379, tlong, 379,t1,"Great works are performed, not by strength, but by perseverance.");
|
||||
indexr(id,13, i1, 232, tlong, 232,t1,"no eggs on wall, lesson learned", oddField, "odd man out");
|
||||
indexr(id,11, i1, -987, tlong, 987,
|
||||
t1,"An eye for eye only ends up making the whole world blind.");
|
||||
indexr(id,12, i1, 379, tlong, 379,
|
||||
t1,"Great works are performed, not by strength, but by perseverance.");
|
||||
indexr(id,13, i1, 232, tlong, 232,
|
||||
t1,"no eggs on wall, lesson learned",
|
||||
oddField, "odd man out");
|
||||
|
||||
indexr(id, 14, "SubjectTerms_mfacet", new String[] {"mathematical models", "mathematical analysis"});
|
||||
indexr(id, 15, "SubjectTerms_mfacet", new String[] {"test 1", "test 2", "test3"});
|
||||
|
@ -140,6 +153,43 @@ public class TestDistributedSearch extends BaseDistributedSearchTestCase {
|
|||
query("q","*:*", "rows",0, "facet","true", "facet.query","quick", "facet.query","all", "facet.query","*:*");
|
||||
query("q","*:*", "rows",0, "facet","true", "facet.field",t1, "facet.mincount",2);
|
||||
|
||||
// simple date facet on one field
|
||||
query("q","*:*", "rows",100, "facet","true",
|
||||
"facet.date",tdate_a,
|
||||
"facet.date.other", "all",
|
||||
"facet.date.start","2010-05-01T11:00:00Z",
|
||||
"facet.date.gap","+1DAY",
|
||||
"facet.date.end","2010-05-20T11:00:00Z");
|
||||
|
||||
// date facet on multiple fields
|
||||
query("q","*:*", "rows",100, "facet","true",
|
||||
"facet.date",tdate_a,
|
||||
"facet.date",tdate_b,
|
||||
"facet.date.other", "all",
|
||||
"f."+tdate_b+".facet.date.start","2009-05-01T11:00:00Z",
|
||||
"f."+tdate_b+".facet.date.gap","+3MONTHS",
|
||||
"facet.date.start","2010-05-01T11:00:00Z",
|
||||
"facet.date.gap","+1DAY",
|
||||
"facet.date.end","2010-05-20T11:00:00Z");
|
||||
|
||||
// simple range facet on one field
|
||||
query("q","*:*", "rows",100, "facet","true",
|
||||
"facet.range",tlong,
|
||||
"facet.range.start",200,
|
||||
"facet.range.gap",100,
|
||||
"facet.range.end",900);
|
||||
|
||||
// range facet on multiple fields
|
||||
query("q","*:*", "rows",100, "facet","true",
|
||||
"facet.range",tlong,
|
||||
"facet.range",i1,
|
||||
"f."+i1+".facet.range.start",300,
|
||||
"f."+i1+".facet.range.gap",87,
|
||||
"facet.range.end",900,
|
||||
"facet.range.start",200,
|
||||
"facet.range.gap",100,
|
||||
"f."+tlong+".facet.range.end",900);
|
||||
|
||||
stress=0; // turn off stress... we want to tex max combos in min time
|
||||
for (int i=0; i<25*RANDOM_MULTIPLIER; i++) {
|
||||
String f = fieldNames[random.nextInt(fieldNames.length)];
|
||||
|
|
Loading…
Reference in New Issue