SOLR-6187: facet.mincount ignored in range faceting using distributed search

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1623429 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Erick Erickson 2014-09-08 16:34:23 +00:00
parent 6a4ea25666
commit 7fbd55f679
3 changed files with 267 additions and 28 deletions

View File

@ -179,6 +179,10 @@ Bug Fixes
* SOLR-6467: bin/solr script should direct stdout/stderr when starting in the background
to the solr-PORT-console.log in the logs directory instead of bin. (Timothy Potter)
* SOLR-6187: SOLR-6154: facet.mincount ignored in range faceting using distributed search
NOTE: This does NOT fixed for the (deprecated) facet.date idiom, use facet.range
instead. (Erick Erickson, Zacchio Bagnati, Ronald Matamoros, Vamsee Yalargadda)
Other Changes
---------------------

View File

@ -135,7 +135,7 @@ public class FacetComponent extends SearchComponent {
// only other required phase).
// We do this in distributedProcess so we can look at all of the
// requests in the outgoing queue at once.
for (int shardNum = 0; shardNum < rb.shards.length; shardNum++) {
List<String> distribFieldFacetRefinements = null;
@ -164,7 +164,7 @@ public class FacetComponent extends SearchComponent {
if (distribFieldFacetRefinements == null) {
distribFieldFacetRefinements = new ArrayList<>();
}
distribFieldFacetRefinements.add(facetCommand);
distribFieldFacetRefinements.add(termsKey);
distribFieldFacetRefinements.add(termsVal);
@ -175,7 +175,7 @@ public class FacetComponent extends SearchComponent {
if (distribFieldFacetRefinements == null
&& !pivotFacetRefinementRequestsExistForShard) {
// nothing to refine, short circut out
// nothing to refine, short circuit out
continue;
}
@ -214,22 +214,22 @@ public class FacetComponent extends SearchComponent {
shardsRefineRequest.params.set(FacetParams.FACET, "true");
shardsRefineRequest.params.remove(FacetParams.FACET_FIELD);
shardsRefineRequest.params.remove(FacetParams.FACET_QUERY);
for (int i = 0; i < distribFieldFacetRefinements.size();) {
String facetCommand = distribFieldFacetRefinements.get(i++);
String termsKey = distribFieldFacetRefinements.get(i++);
String termsVal = distribFieldFacetRefinements.get(i++);
shardsRefineRequest.params.add(FacetParams.FACET_FIELD,
facetCommand);
shardsRefineRequest.params.set(termsKey, termsVal);
}
}
if (newRequest) {
rb.addRequest(this, shardsRefineRequest);
}
// PivotFacetAdditions
if (pivotFacetRefinementRequestsExistForShard) {
if (newRequest) {
@ -314,6 +314,8 @@ public class FacetComponent extends SearchComponent {
}
modifyRequestForFieldFacets(rb, sreq, fi);
modifyRequestForRangeFacets(sreq, fi);
modifyRequestForPivotFacets(rb, sreq, fi.pivotFacets);
@ -326,7 +328,24 @@ public class FacetComponent extends SearchComponent {
// we could optionally remove faceting params
}
}
// we must get all the range buckets back in order to have coherent lists at the end, see SOLR-6154
private void modifyRequestForRangeFacets(ShardRequest sreq, FacetInfo fi) {
// Collect all the range fields.
if (sreq.params.getParams(FacetParams.FACET_RANGE) == null) {
return;
}
List<String> rangeFields = new ArrayList<>();
for (String field : sreq.params.getParams(FacetParams.FACET_RANGE)) {
rangeFields.add(field);
}
for (String field : rangeFields) {
sreq.params.remove("f." + field + ".facet.mincount");
sreq.params.add("f." + field + ".facet.mincount", "0");
}
}
private void modifyRequestForFieldFacets(ResponseBuilder rb, ShardRequest sreq, FacetInfo fi) {
for (DistribFieldFacet dff : fi.facets.values()) {
@ -372,7 +391,7 @@ public class FacetComponent extends SearchComponent {
dff.initialMincount = (int) Math.ceil((double) dff.minCount / rb.slices.length);
}
}
// Currently this is for testing only and allows overriding of the
// facet.limit set to the shards
dff.initialLimit = rb.req.getParams().getInt("facet.shard.limit", dff.initialLimit);
@ -517,7 +536,7 @@ public class FacetComponent extends SearchComponent {
dff.add(shardNum, (NamedList) facet_fields.get(dff.getKey()), dff.initialLimit);
}
}
// Distributed facet_dates
doDistribDates(fi, facet_counts);
@ -546,14 +565,14 @@ public class FacetComponent extends SearchComponent {
for (DistribFieldFacet dff : fi.facets.values()) {
// no need to check these facets for refinement
if (dff.initialLimit <= 0 && dff.initialMincount <= 1) continue;
// only other case where index-sort doesn't need refinement is if minCount==0
if (dff.minCount <= 1 && dff.sort.equals(FacetParams.FACET_SORT_INDEX)) continue;
@SuppressWarnings("unchecked") // generic array's are annoying
List<String>[] tmp = (List<String>[]) new List[rb.shards.length];
dff._toRefine = tmp;
ShardFacetCount[] counts = dff.getCountSorted();
int ntop = Math.min(counts.length,
dff.limit >= 0 ? dff.offset + dff.limit : Integer.MAX_VALUE);
@ -562,7 +581,7 @@ public class FacetComponent extends SearchComponent {
for (int i = 0; i < counts.length; i++) {
ShardFacetCount sfc = counts[i];
boolean needRefinement = false;
if (i < ntop) {
// automatically flag the top values for refinement
// this should always be true for facet.sort=index
@ -586,14 +605,14 @@ public class FacetComponent extends SearchComponent {
needRefinement = true;
}
}
if (needRefinement) {
// add a query for each shard missing the term that needs refinement
for (int shardNum = 0; shardNum < rb.shards.length; shardNum++) {
FixedBitSet fbs = dff.counted[shardNum];
// fbs can be null if a shard request failed
if (fbs != null &&
(sfc.termNum >= fbs.length() || !fbs.get(sfc.termNum)) &&
if (fbs != null &&
(sfc.termNum >= fbs.length() || !fbs.get(sfc.termNum)) &&
dff.maxPossible(sfc, shardNum) > 0) {
dff.needRefinements = true;
@ -607,6 +626,100 @@ public class FacetComponent extends SearchComponent {
}
}
}
removeFieldFacetsUnderLimits(rb);
removeRangeFacetsUnderLimits(rb);
removeQueryFacetsUnderLimits(rb);
}
private void removeQueryFacetsUnderLimits(ResponseBuilder rb) {
if (rb.stage != ResponseBuilder.STAGE_EXECUTE_QUERY) {
return;
}
FacetInfo fi = rb._facetInfo;
Map<String, QueryFacet> query_facets = fi.queryFacets;
if (query_facets == null) {
return;
}
LinkedHashMap<String, QueryFacet> newQueryFacets = new LinkedHashMap<>();
// The
int minCount = rb.req.getParams().getInt(FacetParams.FACET_MINCOUNT, 0);
boolean replace = false;
for (Map.Entry<String, QueryFacet> ent : query_facets.entrySet()) {
if (ent.getValue().count >= minCount) {
newQueryFacets.put(ent.getKey(), ent.getValue());
} else {
log.trace("Removing facetQuery/key: " + ent.getKey() + "/" + ent.getValue().toString() + " mincount=" + minCount);
replace = true;
}
}
if (replace) {
fi.queryFacets = newQueryFacets;
}
}
private void removeRangeFacetsUnderLimits(ResponseBuilder rb) {
if (rb.stage != ResponseBuilder.STAGE_EXECUTE_QUERY) {
return;
}
FacetInfo fi = rb._facetInfo;
@SuppressWarnings("unchecked")
SimpleOrderedMap<SimpleOrderedMap<Object>> facet_ranges =
(SimpleOrderedMap<SimpleOrderedMap<Object>>)
fi.rangeFacets;
if (facet_ranges == null) {
return;
}
// go through each facet_range
for (Map.Entry<String, SimpleOrderedMap<Object>> entry : facet_ranges) {
boolean replace = false;
final String field = entry.getKey();
int minCount = rb.req.getParams().getFieldInt(field, FacetParams.FACET_MINCOUNT, 0);
if (minCount == 0) {
continue;
}
@SuppressWarnings("unchecked")
NamedList<Integer> vals
= (NamedList<Integer>) facet_ranges.get(field).get("counts");
NamedList newList = new NamedList();
for (Map.Entry<String, Integer> pair : vals) {
if (pair.getValue() >= minCount) {
newList.add(pair.getKey(), pair.getValue());
} else {
log.trace("Removing facet/key: " + pair.getKey() + "/" + pair.getValue().toString() + " mincount=" + minCount);
replace = true;
}
}
if (replace) {
vals.clear();
vals.addAll(newList);
}
}
}
private void removeFieldFacetsUnderLimits(ResponseBuilder rb) {
if (rb.stage != ResponseBuilder.STAGE_DONE) {
return;
}
FacetInfo fi = rb._facetInfo;
if (fi.facets == null) {
return;
}
// Do field facets
for (Entry<String, DistribFieldFacet> ent : fi.facets.entrySet()) {
String field = ent.getKey();
int minCount = rb.req.getParams().getFieldInt(field, FacetParams.FACET_MINCOUNT, 0);
if (minCount == 0) { // return them all
continue;
}
ent.getValue().respectMinCount(minCount);
}
}
// The implementation below uses the first encountered shard's
@ -763,7 +876,7 @@ public class FacetComponent extends SearchComponent {
private void refineFacets(ResponseBuilder rb, ShardRequest sreq) {
FacetInfo fi = rb._facetInfo;
for (ShardResponse srsp : sreq.responses) {
// int shardNum = rb.getShardNum(srsp.shard);
NamedList facet_counts = (NamedList) srsp.getSolrResponse().getResponse().get("facet_counts");
@ -775,7 +888,7 @@ public class FacetComponent extends SearchComponent {
String key = facet_fields.getName(i);
DistribFieldFacet dff = fi.facets.get(key);
if (dff == null) continue;
NamedList shardCounts = (NamedList) facet_fields.getVal(i);
for (int j = 0; j < shardCounts.size(); j++) {
@ -1179,7 +1292,7 @@ public class FacetComponent extends SearchComponent {
int numReceived = sz;
FixedBitSet terms = new FixedBitSet(termNum + sz);
long last = 0;
for (int i = 0; i < sz; i++) {
String name = shardCounts.getName(i);
@ -1248,6 +1361,22 @@ public class FacetComponent extends SearchComponent {
// TODO: could store the last term in the shard to tell if this term
// comes before or after it. If it comes before, we could subtract 1
}
public void respectMinCount(long minCount) {
HashMap<String, ShardFacetCount> newOne = new HashMap<>();
boolean replace = false;
for (Map.Entry<String, ShardFacetCount> ent : counts.entrySet()) {
if (ent.getValue().count >= minCount) {
newOne.put(ent.getKey(), ent.getValue());
} else {
log.trace("Removing facet/key: " + ent.getKey() + "/" + ent.getValue().toString() + " mincount=" + minCount);
replace = true;
}
}
if (replace) {
counts = newOne;
}
}
}
/**

View File

@ -28,7 +28,9 @@ import org.apache.solr.client.solrj.SolrServer;
import org.apache.solr.client.solrj.SolrServerException;
import org.apache.solr.client.solrj.embedded.JettySolrRunner;
import org.apache.solr.client.solrj.impl.HttpSolrServer;
import org.apache.solr.client.solrj.response.FacetField;
import org.apache.solr.client.solrj.response.QueryResponse;
import org.apache.solr.client.solrj.response.RangeFacet;
import org.apache.solr.cloud.ChaosMonkey;
import org.apache.solr.common.SolrException;
import org.apache.solr.common.params.CommonParams;
@ -66,14 +68,14 @@ public class TestDistributedSearch extends BaseDistributedSearchTestCase {
del("*:*");
indexr(id,1, i1, 100, tlong, 100,t1,"now is the time for all good men",
indexr(id,1, i1, 100, tlong, 100,t1,"now is the time for all good men",
tdate_a, "2010-04-20T11:00:00Z",
tdate_b, "2009-08-20T11:00:00Z",
"foo_f", 1.414f, "foo_b", "true", "foo_d", 1.414d);
indexr(id,2, i1, 50 , tlong, 50,t1,"to come to the aid of their country.",
indexr(id,2, i1, 50 , tlong, 50,t1,"to come to the aid of their country.",
tdate_a, "2010-05-02T11:00:00Z",
tdate_b, "2009-11-02T11:00:00Z");
indexr(id,3, i1, 2, tlong, 2,t1,"how now brown cow",
indexr(id,3, i1, 2, tlong, 2,t1,"how now brown cow",
tdate_a, "2010-05-03T11:00:00Z");
indexr(id,4, i1, -100 ,tlong, 101,
t1,"the quick fox jumped over the lazy dog",
@ -175,13 +177,13 @@ public class TestDistributedSearch extends BaseDistributedSearchTestCase {
// a facet query to test out chars out of the ascii range
query("q","*:*", "rows",0, "facet","true", "facet.query","{!term f=foo_s}international\u00ff\u01ff\u2222\u3333");
// simple field facet on date fields
rsp = query("q","*:*", "rows", 0,
rsp = query("q","*:*", "rows", 0,
"facet","true", "facet.limit", 1, // TODO: limit shouldn't be needed: SOLR-6386
"facet.field", tdate_a);
assertEquals(1, rsp.getFacetFields().size());
rsp = query("q","*:*", "rows", 0,
rsp = query("q","*:*", "rows", 0,
"facet","true", "facet.limit", 1, // TODO: limit shouldn't be needed: SOLR-6386
"facet.field", tdate_b, "facet.field", tdate_a);
assertEquals(2, rsp.getFacetFields().size());
@ -225,7 +227,85 @@ public class TestDistributedSearch extends BaseDistributedSearchTestCase {
"facet.range.start",200,
"facet.range.gap",100,
"f."+tlong+".facet.range.end",900);
// Test mincounts. Do NOT want to go through all the stuff where with validateControlData in query() method
// Purposely packing a _bunch_ of stuff together here to insure that the proper level of mincount is used for
// each
ModifiableSolrParams minParams = new ModifiableSolrParams();
minParams.set("q","*:*");
minParams.set("rows", 1);
minParams.set("facet", "true");
minParams.set("facet.missing", "true");
minParams.set("facet.field", i1);
minParams.set("facet.missing", "true");
minParams.set("facet.mincount", 2);
// Return a separate section of ranges over i1. Should respect global range mincount
minParams.set("facet.range", i1);
minParams.set("f." + i1 + ".facet.range.start", 0);
minParams.set("f." + i1 + ".facet.range.gap", 200);
minParams.set("f." + i1 + ".facet.range.end", 1200);
minParams.set("f." + i1 + ".facet.mincount", 4);
// Return a separate section of ranges over tlong Should respect facet.mincount
minParams.add("facet.range", tlong);
minParams.set("f." + tlong + ".facet.range.start", 0);
minParams.set("f." + tlong + ".facet.range.gap", 100);
minParams.set("f." + tlong + ".facet.range.end", 1200);
// Repeat with a range type of date
minParams.add("facet.range", tdate_b);
minParams.set("f." + tdate_b + ".facet.range.start", "2009-02-01T00:00:00Z");
minParams.set("f." + tdate_b + ".facet.range.gap", "+1YEAR");
minParams.set("f." + tdate_b + ".facet.range.end", "2011-01-01T00:00:00Z");
minParams.set("f." + tdate_b + ".facet.mincount", 3);
// Insure that global mincount is respected for facet queries
minParams.set("facet.query", tdate_a + ":[2010-01-01T00:00:00Z TO 2011-01-01T00:00:00Z]"); // Should return some counts
//minParams.set("facet.query", tdate_a + ":[* TO *]"); // Should be removed
minParams.add("facet.query", tdate_b + ":[2008-01-01T00:00:00Z TO 2009-09-01T00:00:00Z]"); // Should be removed from response
setDistributedParams(minParams);
QueryResponse minResp = queryServer(minParams);
ModifiableSolrParams eParams = new ModifiableSolrParams();
eParams.set("q",tdate_b + ":[* TO *]");
eParams.set("rows", 1000);
eParams.set("fl", tdate_b);
setDistributedParams(eParams);
QueryResponse eResp = queryServer(eParams);
// Check that exactly the right numbers of counts came through
assertEquals("Should be exactly 2 range facets returned after minCounts taken into account ", 3, minResp.getFacetRanges().size());
assertEquals("Should only be 1 query facets returned after minCounts taken into account ", 1, minResp.getFacetQuery().size());
checkMinCountsField(minResp.getFacetField(i1).getValues(), new Object[]{null, 55L}); // Should just be the null entries for field
checkMinCountsRange(minResp.getFacetRanges().get(0).getCounts(), new Object[]{"0", 5L}); // range on i1
checkMinCountsRange(minResp.getFacetRanges().get(1).getCounts(), new Object[]{"0", 3L, "100", 3L}); // range on tlong
checkMinCountsRange(minResp.getFacetRanges().get(2).getCounts(), new Object[]{"2009-02-01T00:00:00Z", 3L}); // date (range) on tvh
assertTrue("Should have a facet for tdate_a", minResp.getFacetQuery().containsKey("a_n_tdt:[2010-01-01T00:00:00Z TO 2011-01-01T00:00:00Z]"));
int qCount = minResp.getFacetQuery().get("a_n_tdt:[2010-01-01T00:00:00Z TO 2011-01-01T00:00:00Z]");
assertEquals("tdate_a should be 5", qCount, 5);
// Now let's do some queries, the above is getting too complex
minParams = new ModifiableSolrParams();
minParams.set("q","*:*");
minParams.set("rows", 1);
minParams.set("facet", "true");
minParams.set("facet.mincount", 3);
minParams.set("facet.query", tdate_a + ":[2010-01-01T00:00:00Z TO 2010-05-04T00:00:00Z]");
minParams.add("facet.query", tdate_b + ":[2009-01-01T00:00:00Z TO 2010-01-01T00:00:00Z]"); // Should be removed
setDistributedParams(minParams);
minResp = queryServer(minParams);
assertEquals("Should only be 1 query facets returned after minCounts taken into account ", 1, minResp.getFacetQuery().size());
assertTrue("Should be an entry for a_n_tdt", minResp.getFacetQuery().containsKey("a_n_tdt:[2010-01-01T00:00:00Z TO 2010-05-04T00:00:00Z]"));
qCount = minResp.getFacetQuery().get("a_n_tdt:[2010-01-01T00:00:00Z TO 2010-05-04T00:00:00Z]");
assertEquals("a_n_tdt should have a count of 4 ", qCount, 4);
// variations of fl
query("q","*:*", "fl","score","sort",i1 + " desc");
query("q","*:*", "fl",i1 + ",score","sort",i1 + " desc");
@ -454,7 +534,33 @@ public class TestDistributedSearch extends BaseDistributedSearchTestCase {
}
}
}
protected void checkMinCountsField(List<FacetField.Count> counts, Object[] pairs) {
assertEquals("There should be exactly " + pairs.length / 2 + " returned counts. There were: " + counts.size(), counts.size(), pairs.length / 2);
assertTrue("Variable len param must be an even number, it was: " + pairs.length, (pairs.length % 2) == 0);
for (int pairs_idx = 0, counts_idx = 0; pairs_idx < pairs.length; pairs_idx += 2, counts_idx++) {
String act_name = counts.get(counts_idx).getName();
long act_count = counts.get(counts_idx).getCount();
String exp_name = (String) pairs[pairs_idx];
long exp_count = (long) pairs[pairs_idx + 1];
assertEquals("Expected ordered entry " + exp_name + " at position " + counts_idx + " got " + act_name, act_name, exp_name);
assertEquals("Expected count for entry: " + exp_name + " at position " + counts_idx + " got " + act_count, act_count, exp_count);
}
}
protected void checkMinCountsRange(List<RangeFacet.Count> counts, Object[] pairs) {
assertEquals("There should be exactly " + pairs.length / 2 + " returned counts. There were: " + counts.size(), counts.size(), pairs.length / 2);
assertTrue("Variable len param must be an even number, it was: " + pairs.length, (pairs.length % 2) == 0);
for (int pairs_idx = 0, counts_idx = 0; pairs_idx < pairs.length; pairs_idx += 2, counts_idx++) {
String act_name = counts.get(counts_idx).getValue();
long act_count = counts.get(counts_idx).getCount();
String exp_name = (String) pairs[pairs_idx];
long exp_count = (long) pairs[pairs_idx + 1];
assertEquals("Expected ordered entry " + exp_name + " at position " + counts_idx + " got " + act_name, act_name, exp_name);
assertEquals("Expected count for entry: " + exp_name + " at position " + counts_idx + " got " + act_count, act_count, exp_count);
}
}
protected void queryPartialResults(final List<String> upShards,
final List<SolrServer> upClients,
Object... q) throws Exception {