diff --git a/solr/core/src/java/org/apache/solr/search/facet/FacetFieldProcessor.java b/solr/core/src/java/org/apache/solr/search/facet/FacetFieldProcessor.java index bbc782cac7e..fb44f62f47b 100644 --- a/solr/core/src/java/org/apache/solr/search/facet/FacetFieldProcessor.java +++ b/solr/core/src/java/org/apache/solr/search/facet/FacetFieldProcessor.java @@ -31,6 +31,7 @@ import org.apache.lucene.index.LeafReaderContext; import org.apache.lucene.search.Query; import org.apache.lucene.util.PriorityQueue; import org.apache.solr.common.util.SimpleOrderedMap; +import org.apache.solr.schema.FieldType; import org.apache.solr.schema.SchemaField; import org.apache.solr.search.DocSet; @@ -310,7 +311,7 @@ abstract class FacetFieldProcessor extends FacetProcessor { if (freq.missing) { // TODO: it would be more efficient to build up a missing DocSet if we need it here anyway. SimpleOrderedMap missingBucket = new SimpleOrderedMap<>(); - fillBucket(missingBucket, getFieldMissingQuery(fcontext.searcher, freq.field), null); + fillBucket(missingBucket, getFieldMissingQuery(fcontext.searcher, freq.field), null, false); res.add("missing", missingBucket); } @@ -378,7 +379,7 @@ abstract class FacetFieldProcessor extends FacetProcessor { } } - processSubs(target, filter, subDomain); + processSubs(target, filter, subDomain, false); } @Override @@ -510,4 +511,41 @@ abstract class FacetFieldProcessor extends FacetProcessor { } } } + + + + protected SimpleOrderedMap refineFacets() throws IOException { + List leaves = (List)fcontext.facetInfo.get("_l"); + + // For leaf refinements, we do full faceting for each leaf bucket. Any sub-facets of these buckets will be fully evaluated. Because of this, we should never + // encounter leaf refinements that have sub-facets that return partial results. + + SimpleOrderedMap res = new SimpleOrderedMap<>(); + List bucketList = new ArrayList<>(leaves.size()); + res.add("buckets", bucketList); + + // TODO: an alternate implementations can fill all accs at once + createAccs(-1, 1); + + FieldType ft = sf.getType(); + for (Object bucketVal : leaves) { + SimpleOrderedMap bucket = new SimpleOrderedMap<>(); + bucketList.add(bucket); + bucket.add("val", bucketVal); + + // String internal = ft.toInternal( tobj.toString() ); // TODO - we need a better way to get from object to query... + + Query domainQ = ft.getFieldQuery(null, sf, bucketVal.toString()); + + fillBucket(bucket, domainQ, null, false); + } + + // If there are just a couple of leaves, and if the domain is large, then + // going by term is likely the most efficient? + // If the domain is small, or if the number of leaves is large, then doing + // the normal collection method may be best. + + return res; + } + } diff --git a/solr/core/src/java/org/apache/solr/search/facet/FacetFieldProcessorByArray.java b/solr/core/src/java/org/apache/solr/search/facet/FacetFieldProcessorByArray.java index 767bb55806d..95b9f0b82e9 100644 --- a/solr/core/src/java/org/apache/solr/search/facet/FacetFieldProcessorByArray.java +++ b/solr/core/src/java/org/apache/solr/search/facet/FacetFieldProcessorByArray.java @@ -57,6 +57,10 @@ abstract class FacetFieldProcessorByArray extends FacetFieldProcessor { } private SimpleOrderedMap calcFacets() throws IOException { + if (fcontext.facetInfo != null) { + return refineFacets(); + } + String prefix = freq.prefix; if (prefix == null || prefix.length() == 0) { prefixRef = null; diff --git a/solr/core/src/java/org/apache/solr/search/facet/FacetFieldProcessorByEnumTermsStream.java b/solr/core/src/java/org/apache/solr/search/facet/FacetFieldProcessorByEnumTermsStream.java index 2feff15e1cc..94f3b2d4c1f 100644 --- a/solr/core/src/java/org/apache/solr/search/facet/FacetFieldProcessorByEnumTermsStream.java +++ b/solr/core/src/java/org/apache/solr/search/facet/FacetFieldProcessorByEnumTermsStream.java @@ -333,7 +333,7 @@ class FacetFieldProcessorByEnumTermsStream extends FacetFieldProcessor implement bucket.add("val", bucketVal); addStats(bucket, 0); if (hasSubFacets) { - processSubs(bucket, bucketQuery, termSet); + processSubs(bucket, bucketQuery, termSet, false); } // TODO... termSet needs to stick around for streaming sub-facets? diff --git a/solr/core/src/java/org/apache/solr/search/facet/FacetModule.java b/solr/core/src/java/org/apache/solr/search/facet/FacetModule.java index 87aaa8f47f4..630e96856f6 100644 --- a/solr/core/src/java/org/apache/solr/search/facet/FacetModule.java +++ b/solr/core/src/java/org/apache/solr/search/facet/FacetModule.java @@ -39,6 +39,7 @@ import org.apache.solr.search.QueryContext; import org.apache.solr.search.SyntaxError; import org.apache.solr.util.RTimer; import org.noggit.JSONUtil; +import org.noggit.ObjectBuilder; public class FacetModule extends SearchComponent { @@ -52,7 +53,7 @@ public class FacetModule extends SearchComponent { public final static int PURPOSE_REFINE_JSON_FACETS = 0x00200000; // Internal information passed down from the top level to shards for distributed faceting. - private final static String FACET_STATE = "_facet_"; + private final static String FACET_INFO = "_facet_"; private final static String FACET_REFINE = "refine"; @@ -62,6 +63,58 @@ public class FacetModule extends SearchComponent { return (FacetComponentState) rb.req.getContext().get(FacetComponentState.class); } + + @Override + public void prepare(ResponseBuilder rb) throws IOException { + Map json = rb.req.getJSON(); + Map jsonFacet = null; + if (json == null) { + int version = rb.req.getParams().getInt("facet.version",1); + if (version <= 1) return; + boolean facetsEnabled = rb.req.getParams().getBool(FacetParams.FACET, false); + if (!facetsEnabled) return; + jsonFacet = new LegacyFacet(rb.req.getParams()).getLegacy(); + } else { + jsonFacet = (Map) json.get("facet"); + } + if (jsonFacet == null) return; + + SolrParams params = rb.req.getParams(); + + boolean isShard = params.getBool(ShardParams.IS_SHARD, false); + Map facetInfo = null; + if (isShard) { + String jfacet = params.get(FACET_INFO); + if (jfacet == null) { + // if this is a shard request, but there is no _facet_ info, then don't do anything. + return; + } + facetInfo = (Map) ObjectBuilder.fromJSON(jfacet); + } + + // At this point, we know we need to do something. Create and save the state. + rb.setNeedDocSet(true); + + // Parse the facet in the prepare phase? + FacetParser parser = new FacetTopParser(rb.req); + FacetRequest facetRequest = null; + try { + facetRequest = parser.parse(jsonFacet); + } catch (SyntaxError syntaxError) { + throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, syntaxError); + } + + FacetComponentState fcState = new FacetComponentState(); + fcState.rb = rb; + fcState.isShard = isShard; + fcState.facetInfo = facetInfo; + fcState.facetCommands = jsonFacet; + fcState.facetRequest = facetRequest; + + rb.req.getContext().put(FacetComponentState.class, fcState); + } + + @Override public void process(ResponseBuilder rb) throws IOException { // if this is null, faceting is not enabled @@ -77,6 +130,11 @@ public class FacetModule extends SearchComponent { fcontext.qcontext = QueryContext.newContext(fcontext.searcher); if (isShard) { fcontext.flags |= FacetContext.IS_SHARD; + fcontext.facetInfo = facetState.facetInfo.isEmpty() ? null : (Map)facetState.facetInfo.get(FACET_REFINE); + if (fcontext.facetInfo != null) { + fcontext.flags |= FacetContext.IS_REFINEMENT; + fcontext.flags |= FacetContext.SKIP_FACET; // the root bucket should have been received from all shards previously + } } FacetProcessor fproc = facetState.facetRequest.createFacetProcessor(fcontext); @@ -100,60 +158,14 @@ public class FacetModule extends SearchComponent { } - @Override - public void prepare(ResponseBuilder rb) throws IOException { - Map json = rb.req.getJSON(); - Map jsonFacet = null; - if (json == null) { - int version = rb.req.getParams().getInt("facet.version",1); - if (version <= 1) return; - boolean facetsEnabled = rb.req.getParams().getBool(FacetParams.FACET, false); - if (!facetsEnabled) return; - jsonFacet = new LegacyFacet(rb.req.getParams()).getLegacy(); - } else { - jsonFacet = (Map) json.get("facet"); - } - if (jsonFacet == null) return; - - SolrParams params = rb.req.getParams(); - - boolean isShard = params.getBool(ShardParams.IS_SHARD, false); - if (isShard) { - String jfacet = params.get(FACET_STATE); - if (jfacet == null) { - // if this is a shard request, but there is no facet state, then don't do anything. - return; - } - } - - // At this point, we know we need to do something. Create and save the state. - rb.setNeedDocSet(true); - - // Parse the facet in the prepare phase? - FacetParser parser = new FacetTopParser(rb.req); - FacetRequest facetRequest = null; - try { - facetRequest = parser.parse(jsonFacet); - } catch (SyntaxError syntaxError) { - throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, syntaxError); - } - - FacetComponentState fcState = new FacetComponentState(); - fcState.rb = rb; - fcState.isShard = isShard; - fcState.facetCommands = jsonFacet; - fcState.facetRequest = facetRequest; - - rb.req.getContext().put(FacetComponentState.class, fcState); - } private void clearFaceting(List outgoing) { // turn off faceting for requests not marked as being for faceting refinements for (ShardRequest sreq : outgoing) { if ((sreq.purpose & PURPOSE_REFINE_JSON_FACETS) != 0) continue; - sreq.params.remove("json.facet"); // this just saves space... the presence of FACET_STATE really control the faceting - sreq.params.remove(FACET_STATE); + sreq.params.remove("json.facet"); // this just saves space... the presence of FACET_INFO is enough to control the faceting + sreq.params.remove(FACET_INFO); } } @@ -215,16 +227,15 @@ public class FacetModule extends SearchComponent { // don't request any documents shardsRefineRequest.params.remove(CommonParams.START); shardsRefineRequest.params.set(CommonParams.ROWS, "0"); - shardsRefineRequest.params.set(CommonParams.ROWS, "0"); shardsRefineRequest.params.set(FacetParams.FACET, false); } shardsRefineRequest.purpose |= PURPOSE_REFINE_JSON_FACETS; - Map fstate = new HashMap<>(1); - fstate.put(FACET_REFINE, refinement); - String fstateString = JSONUtil.toJSON(fstate); - shardsRefineRequest.params.add(FACET_STATE, fstateString); + Map finfo = new HashMap<>(1); + finfo.put(FACET_REFINE, refinement); + String finfoStr = JSONUtil.toJSON(finfo); + shardsRefineRequest.params.add(FACET_INFO, finfoStr); if (newRequest) { rb.addRequest(this, shardsRefineRequest); @@ -242,12 +253,12 @@ public class FacetModule extends SearchComponent { if ((sreq.purpose & ShardRequest.PURPOSE_GET_TOP_IDS) != 0) { sreq.purpose |= FacetModule.PURPOSE_GET_JSON_FACETS; - sreq.params.set(FACET_STATE, "{}"); // The presence of FACET_STATE (_facet_) turns on json faceting + sreq.params.set(FACET_INFO, "{}"); // The presence of FACET_INFO (_facet_) turns on json faceting } else { // turn off faceting on other requests /*** distributedProcess will need to use other requests for refinement - sreq.params.remove("json.facet"); // this just saves space... the presence of FACET_STATE really control the faceting - sreq.params.remove(FACET_STATE); + sreq.params.remove("json.facet"); // this just saves space... the presence of FACET_INFO really control the faceting + sreq.params.remove(FACET_INFO); **/ } } @@ -267,6 +278,18 @@ public class FacetModule extends SearchComponent { facetState.merger = facetState.facetRequest.createFacetMerger(facet); facetState.mcontext = new FacetMerger.Context( sreq.responses.size() ); } + + if ((sreq.purpose & PURPOSE_REFINE_JSON_FACETS) != 0) { + System.err.println("REFINE FACET RESULT FROM SHARD = " + facet); + // call merge again with a diff flag set on the context??? +// throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "WORK IN PROGRESS, MERGING FACET REFINEMENT NOT SUPPORTED YET!"); + + facetState.mcontext.root = facet; + facetState.mcontext.setShard(shardRsp.getShard()); // TODO: roll newShard into setShard? + facetState.merger.merge(facet , facetState.mcontext); + return; + } + facetState.mcontext.root = facet; facetState.mcontext.newShard(shardRsp.getShard()); facetState.merger.merge(facet , facetState.mcontext); @@ -304,11 +327,15 @@ public class FacetModule extends SearchComponent { } +// TODO: perhaps factor out some sort of root/parent facet object that doesn't depend +// on stuff like ResponseBuilder, but contains request parameters, +// root filter lists (for filter exclusions), etc? class FacetComponentState { ResponseBuilder rb; Map facetCommands; FacetRequest facetRequest; boolean isShard; + Map facetInfo; // _facet_ param: contains out-of-band facet info, mainly for refinement requests // // Only used for distributed search diff --git a/solr/core/src/java/org/apache/solr/search/facet/FacetProcessor.java b/solr/core/src/java/org/apache/solr/search/facet/FacetProcessor.java index 4a839a29ccf..de6dd722d02 100644 --- a/solr/core/src/java/org/apache/solr/search/facet/FacetProcessor.java +++ b/solr/core/src/java/org/apache/solr/search/facet/FacetProcessor.java @@ -366,10 +366,13 @@ public abstract class FacetProcessor { } } - void fillBucket(SimpleOrderedMap bucket, Query q, DocSet result) throws IOException { + // TODO: rather than just have a raw "response", perhaps we should model as a bucket object that contains the response plus extra info? + void fillBucket(SimpleOrderedMap bucket, Query q, DocSet result, boolean skip) throws IOException { + + // TODO: we don't need the DocSet if we've already calculated everything during the first phase boolean needDocSet = freq.getFacetStats().size() > 0 || freq.getSubFacets().size() > 0; - // TODO: always collect counts or not??? + // TODO: put info in for the merger (like "skip=true"?) Maybe we don't need to if we leave out all extraneous info? int count; @@ -382,7 +385,7 @@ public abstract class FacetProcessor { } else { result = fcontext.searcher.getDocSet(q, fcontext.base); } - count = result.size(); + count = result.size(); // don't really need this if we are skipping, but it's free. } else { if (q == null) { count = fcontext.base.size(); @@ -392,8 +395,10 @@ public abstract class FacetProcessor { } try { - processStats(bucket, result, count); - processSubs(bucket, q, result); + if (!skip) { + processStats(bucket, result, count); + } + processSubs(bucket, q, result, skip); } finally { if (result != null) { // result.decref(); // OFF-HEAP @@ -402,7 +407,7 @@ public abstract class FacetProcessor { } } - void processSubs(SimpleOrderedMap response, Query filter, DocSet domain) throws IOException { + void processSubs(SimpleOrderedMap response, Query filter, DocSet domain, boolean skip) throws IOException { boolean emptyDomain = domain == null || domain.size() == 0; @@ -417,8 +422,18 @@ public abstract class FacetProcessor { continue; } + MapfacetInfoSub = null; + if (fcontext.facetInfo != null) { + facetInfoSub = (Map)fcontext.facetInfo.get(sub.getKey()); + } + + // If we're skipping this node, then we only need to process sub-facets that have facet info specified. + if (skip && facetInfoSub == null) continue; + // make a new context for each sub-facet since they can change the domain FacetContext subContext = fcontext.sub(filter, domain); + subContext.facetInfo = facetInfoSub; + if (!skip) subContext.flags &= ~FacetContext.SKIP_FACET; // turn off the skip flag if we're not skipping this bucket FacetProcessor subProcessor = subRequest.createFacetProcessor(subContext); if (fcontext.getDebugInfo() != null) { // if fcontext.debugInfo != null, it means rb.debug() == true diff --git a/solr/core/src/java/org/apache/solr/search/facet/FacetQuery.java b/solr/core/src/java/org/apache/solr/search/facet/FacetQuery.java index 174b832f006..584bec344c4 100644 --- a/solr/core/src/java/org/apache/solr/search/facet/FacetQuery.java +++ b/solr/core/src/java/org/apache/solr/search/facet/FacetQuery.java @@ -56,8 +56,12 @@ class FacetQueryProcessor extends FacetProcessor { @Override public void process() throws IOException { super.process(); + + if (fcontext.facetInfo != null) { + // FIXME - what needs to be done here? + } response = new SimpleOrderedMap<>(); - fillBucket(response, freq.q, null); + fillBucket(response, freq.q, null, (fcontext.flags & FacetContext.SKIP_FACET)!=0); } diff --git a/solr/core/src/java/org/apache/solr/search/facet/FacetRange.java b/solr/core/src/java/org/apache/solr/search/facet/FacetRange.java index a50fa2c9811..5d0989bd8d7 100644 --- a/solr/core/src/java/org/apache/solr/search/facet/FacetRange.java +++ b/solr/core/src/java/org/apache/solr/search/facet/FacetRange.java @@ -350,7 +350,7 @@ class FacetRangeProcessor extends FacetProcessor { if (freq.getSubFacets().size() > 0) { DocSet subBase = intersections[slot]; try { - processSubs(bucket, filters[slot], subBase); + processSubs(bucket, filters[slot], subBase, false); } finally { // subContext.base.decref(); // OFF-HEAP // subContext.base = null; // do not modify context after creation... there may be deferred execution (i.e. streaming) @@ -367,7 +367,7 @@ class FacetRangeProcessor extends FacetProcessor { } Query rangeQ = sf.getType().getRangeQuery(null, sf, range.low == null ? null : calc.formatValue(range.low), range.high==null ? null : calc.formatValue(range.high), range.includeLower, range.includeUpper); - fillBucket(bucket, rangeQ, null); + fillBucket(bucket, rangeQ, null, false); return bucket; } diff --git a/solr/core/src/java/org/apache/solr/search/facet/FacetRequest.java b/solr/core/src/java/org/apache/solr/search/facet/FacetRequest.java index 636460f7e60..9835f7d7165 100644 --- a/solr/core/src/java/org/apache/solr/search/facet/FacetRequest.java +++ b/solr/core/src/java/org/apache/solr/search/facet/FacetRequest.java @@ -168,7 +168,10 @@ public abstract class FacetRequest { class FacetContext { // Context info for actually executing a local facet command public static final int IS_SHARD=0x01; + public static final int IS_REFINEMENT=0x02; + public static final int SKIP_FACET=0x04; // refinement: skip calculating this immediate facet, but proceed to specific sub-facets based on facetInfo + Map facetInfo; // refinement info for this node QueryContext qcontext; SolrQueryRequest req; // TODO: replace with params? SolrIndexSearcher searcher; diff --git a/solr/core/src/test/org/apache/solr/search/facet/TestJsonFacetRefinement.java b/solr/core/src/test/org/apache/solr/search/facet/TestJsonFacetRefinement.java index a8f8ff207e3..f23ae8c297a 100644 --- a/solr/core/src/test/org/apache/solr/search/facet/TestJsonFacetRefinement.java +++ b/solr/core/src/test/org/apache/solr/search/facet/TestJsonFacetRefinement.java @@ -18,9 +18,12 @@ package org.apache.solr.search.facet; import java.io.IOException; +import java.util.List; import org.apache.solr.JSONTestUtil; import org.apache.solr.SolrTestCaseHS; +import org.apache.solr.client.solrj.SolrClient; +import org.apache.solr.common.params.ModifiableSolrParams; import org.apache.solr.common.util.SimpleOrderedMap; import org.apache.solr.request.SolrQueryRequest; import org.junit.AfterClass; @@ -209,6 +212,97 @@ public class TestJsonFacetRefinement extends SolrTestCaseHS { } + @Test + public void testBasicRefinement() throws Exception { + initServers(); + Client client = servers.getClient(random().nextInt()); + client.queryDefaults().set( "shards", servers.getShards(), "debugQuery", Boolean.toString(random().nextBoolean()) ); + + List clients = client.getClientProvider().all(); + assertTrue(clients.size() >= 3); + + client.deleteByQuery("*:*", null); + + ModifiableSolrParams p = params("cat_s", "cat_s", "num_d", "num_d"); + String cat_s = p.get("cat_s"); + String num_d = p.get("num_d"); + + clients.get(0).add( sdoc("id", "01", cat_s, "A", num_d, -1) ); // A wins count tie + clients.get(0).add( sdoc("id", "02", cat_s, "B", num_d, 3) ); + + clients.get(1).add( sdoc("id", "11", cat_s, "B", num_d, -5) ); // B highest count + clients.get(1).add( sdoc("id", "12", cat_s, "B", num_d, -11) ); + clients.get(1).add( sdoc("id", "13", cat_s, "A", num_d, 7) ); + + clients.get(2).add( sdoc("id", "21", cat_s, "A", num_d, 17) ); // A highest count + clients.get(2).add( sdoc("id", "22", cat_s, "A", num_d, -19) ); + clients.get(2).add( sdoc("id", "23", cat_s, "B", num_d, 11) ); + + client.commit(); + + // Shard responses should be A=1, B=2, A=2, merged should be "A=3, B=2" + // One shard will have _facet_={"refine":{"cat0":{"_l":["A"]}}} on the second phase + + /**** + // fake a refinement request... good for development/debugging + assertJQ(clients.get(1), + params(p, "q", "*:*", "_facet_","{refine:{cat0:{_l:[A]}}}", "isShard","true", "distrib","false", "shards.purpose","2097216", "ids","11,12,13", + "json.facet", "{" + + "cat0:{type:terms, field:cat_s, sort:'count desc', limit:1, overrequest:0, refine:true}" + + "}" + ) + , "facets=={foo:555}" + ); + ****/ + + client.testJQ(params(p, "q", "*:*", + "json.facet", "{" + + "cat0:{type:terms, field:${cat_s}, sort:'count desc', limit:1, overrequest:0, refine:false}" + + "}" + ) + , "facets=={ count:8" + + ", cat0:{ buckets:[ {val:A,count:3} ] }" + // w/o overrequest and refinement, count is lower than it should be (we don't see the A from the middle shard) + "}" + ); + + client.testJQ(params(p, "q", "*:*", + "json.facet", "{" + + "cat0:{type:terms, field:${cat_s}, sort:'count desc', limit:1, overrequest:0, refine:true}" + + "}" + ) + , "facets=={ count:8" + + ", cat0:{ buckets:[ {val:A,count:4} ] }" + // w/o overrequest, we need refining to get the correct count. + "}" + ); + + // test that basic stats work for refinement + client.testJQ(params(p, "q", "*:*", + "json.facet", "{" + + "cat0:{type:terms, field:${cat_s}, sort:'count desc', limit:1, overrequest:0, refine:true, facet:{ stat1:'sum(${num_d})'} }" + + "}" + ) + , "facets=={ count:8" + + ", cat0:{ buckets:[ {val:A,count:4, stat1:4.0} ] }" + + "}" + ); + + // test sorting buckets by a different stat + client.testJQ(params(p, "q", "*:*", + "json.facet", "{" + + " cat0:{type:terms, field:${cat_s}, sort:'min1 asc', limit:1, overrequest:0, refine:false, facet:{ min1:'min(${num_d})'} }" + + ",cat1:{type:terms, field:${cat_s}, sort:'min1 asc', limit:1, overrequest:0, refine:true, facet:{ min1:'min(${num_d})'} }" + + ",sum1:'sum(num_d)'" + // make sure that root bucket stats aren't affected by refinement + "}" + ) + , "facets=={ count:8" + + ", cat0:{ buckets:[ {val:A,count:3, min1:-19.0} ] }" + // B wins in shard2, so we're missing the "A" count for that shar w/o refinement. + ", cat1:{ buckets:[ {val:A,count:4, min1:-19.0} ] }" + // with refinement, we get the right count + ", sum1:2.0" + + "}" + ); + + + } }