From 43ca5078bf5f50cb46eaac3000ea7553dd47d26f Mon Sep 17 00:00:00 2001 From: yonik Date: Tue, 23 Aug 2016 12:54:49 -0400 Subject: [PATCH] SOLR-9432: JSON Facet refactoring to support refinement --- .../apache/solr/search/facet/FacetField.java | 49 +- .../solr/search/facet/FacetFieldMerger.java | 213 +++++++ .../apache/solr/search/facet/FacetMerger.java | 126 +++- .../apache/solr/search/facet/FacetModule.java | 548 +++++++----------- .../apache/solr/search/facet/FacetRange.java | 8 +- .../solr/search/facet/FacetRangeMerger.java | 123 ++++ .../solr/search/facet/FacetRequest.java | 69 ++- .../facet/FacetRequestSortedMerger.java | 234 ++++++++ .../org/apache/solr/search/facet/HLLAgg.java | 2 +- .../solr/search/facet/PercentileAgg.java | 2 +- .../apache/solr/search/facet/UniqueAgg.java | 2 +- .../java/org/apache/solr/JSONTestUtil.java | 13 + 12 files changed, 997 insertions(+), 392 deletions(-) create mode 100644 solr/core/src/java/org/apache/solr/search/facet/FacetFieldMerger.java create mode 100644 solr/core/src/java/org/apache/solr/search/facet/FacetRangeMerger.java create mode 100644 solr/core/src/java/org/apache/solr/search/facet/FacetRequestSortedMerger.java diff --git a/solr/core/src/java/org/apache/solr/search/facet/FacetField.java b/solr/core/src/java/org/apache/solr/search/facet/FacetField.java index 92c64e74b16..9cc5420c6a4 100644 --- a/solr/core/src/java/org/apache/solr/search/facet/FacetField.java +++ b/solr/core/src/java/org/apache/solr/search/facet/FacetField.java @@ -24,38 +24,45 @@ import org.apache.solr.common.SolrException; import org.apache.solr.schema.FieldType; import org.apache.solr.schema.SchemaField; - -public class FacetField extends FacetRequest { - String field; +// Any type of facet request that generates a variable number of buckets +// and the ability to sort by those generated buckets. +abstract class FacetRequestSorted extends FacetRequest { long offset; - long limit = 10; - long mincount = 1; + long limit; + long mincount; + String sortVariable; + SortDirection sortDirection; + RefineMethod refine; // null, NONE, or SIMPLE + + @Override + public RefineMethod getRefineMethod() { + return refine; + } + + @Override + public boolean returnsPartial() { + return limit > 0; + } + +} + + +public class FacetField extends FacetRequestSorted { + String field; boolean missing; boolean allBuckets; // show cumulative stats across all buckets (this can be different than non-bucketed stats across all docs because of multi-valued docs) boolean numBuckets; String prefix; - String sortVariable; - SortDirection sortDirection; FacetMethod method; int cacheDf; // 0 means "default", -1 means "never cache" // experimental - force perSeg collection when using dv method, currently for testing purposes only. Boolean perSeg; - // TODO: put this somewhere more generic? - public enum SortDirection { - asc(-1) , - desc(1); - - private final int multiplier; - private SortDirection(int multiplier) { - this.multiplier = multiplier; - } - - // asc==-1, desc==1 - public int getMultiplier() { - return multiplier; - } + { + // defaults for FacetRequestSorted + mincount = 1; + limit = 10; } public enum FacetMethod { diff --git a/solr/core/src/java/org/apache/solr/search/facet/FacetFieldMerger.java b/solr/core/src/java/org/apache/solr/search/facet/FacetFieldMerger.java new file mode 100644 index 00000000000..8a26f51d892 --- /dev/null +++ b/solr/core/src/java/org/apache/solr/search/facet/FacetFieldMerger.java @@ -0,0 +1,213 @@ +package org.apache.solr.search.facet; + +import java.util.ArrayList; +import java.util.Collection; +import java.util.HashMap; +import java.util.HashSet; +import java.util.List; +import java.util.Map; +import java.util.Set; + +import org.apache.solr.common.util.SimpleOrderedMap; + +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +// TODO: refactor more out to base class +public class FacetFieldMerger extends FacetRequestSortedMerger { + FacetBucket missingBucket; + FacetBucket allBuckets; + FacetMerger numBuckets; + int[] numReturnedPerShard; + + // LinkedHashMap buckets = new LinkedHashMap<>(); + // List sortedBuckets; + int numReturnedBuckets; // the number of buckets in the bucket lists returned from all of the shards + + + public FacetFieldMerger(FacetField freq) { + super(freq); + } + + @Override + public void merge(Object facetResult, Context mcontext) { + if (numReturnedPerShard == null) { + numReturnedPerShard = new int[mcontext.numShards]; + } + merge((SimpleOrderedMap)facetResult, mcontext); + } + + protected void merge(SimpleOrderedMap facetResult, Context mcontext) { + if (freq.missing) { + Object o = facetResult.get("missing"); + if (o != null) { + if (missingBucket == null) { + missingBucket = newBucket(null, mcontext); + } + missingBucket.mergeBucket((SimpleOrderedMap)o , mcontext); + } + } + + if (freq.allBuckets) { + Object o = facetResult.get("allBuckets"); + if (o != null) { + if (allBuckets == null) { + allBuckets = newBucket(null, mcontext); + } + allBuckets.mergeBucket((SimpleOrderedMap)o , mcontext); + } + } + + List bucketList = (List) facetResult.get("buckets"); + numReturnedPerShard[mcontext.shardNum] = bucketList.size(); + numReturnedBuckets += bucketList.size(); + mergeBucketList(bucketList , mcontext); + + if (freq.numBuckets) { + Object nb = facetResult.get("numBuckets"); + if (nb != null) { + if (numBuckets == null) { + numBuckets = new FacetNumBucketsMerger(); + } + numBuckets.merge(nb , mcontext); + } + } + + } + + + + + @Override + public Object getMergedResult() { + SimpleOrderedMap result = new SimpleOrderedMap(); + + if (numBuckets != null) { + int removed = 0; + if (freq.mincount > 1) { + for (FacetBucket bucket : buckets.values()) { + if (bucket.count < freq.mincount) removed++; + } + } + result.add("numBuckets", ((Number)numBuckets.getMergedResult()).longValue() - removed); + + // TODO: we can further increase this estimate. + // If not sorting by count, use a simple ratio to scale + // If sorting by count desc, then add up the highest_possible_missing_count from each shard + } + + sortBuckets(); + + int first = (int)freq.offset; + int end = freq.limit >=0 ? first + (int) freq.limit : Integer.MAX_VALUE; + int last = Math.min(sortedBuckets.size(), end); + + List resultBuckets = new ArrayList<>(Math.max(0, (last - first))); + + /** this only works if there are no filters (like mincount) + for (int i=first; i= 0 ? (int)freq.limit : Integer.MAX_VALUE; + for (FacetBucket bucket : sortedBuckets) { + if (bucket.getCount() < freq.mincount) { + continue; + } + + if (off > 0) { + --off; + continue; + } + + if (resultBuckets.size() >= lim) { + break; + } + + resultBuckets.add( bucket.getMergedBucket() ); + } + + + result.add("buckets", resultBuckets); + if (missingBucket != null) { + result.add("missing", missingBucket.getMergedBucket()); + } + if (allBuckets != null) { + result.add("allBuckets", allBuckets.getMergedBucket()); + } + + return result; + } + + + @Override + public void finish(Context mcontext) { + // TODO: check refine of subs? + // TODO: call subs each time with a shard/shardnum that is missing a bucket at this level? + // or pass a bit vector of shards w/ value??? + + // build up data structure and only then call the context (or whatever) to do the refinement? + // basically , only do at the top-level facet? + } + + + + private class FacetNumBucketsMerger extends FacetMerger { + long sumBuckets; + long shardsMissingSum; + long shardsTruncatedSum; + Set values; + + @Override + public void merge(Object facetResult, Context mcontext) { + SimpleOrderedMap map = (SimpleOrderedMap)facetResult; + long numBuckets = ((Number)map.get("numBuckets")).longValue(); + sumBuckets += numBuckets; + + List vals = (List)map.get("vals"); + if (vals != null) { + if (values == null) { + values = new HashSet<>(vals.size()*4); + } + values.addAll(vals); + if (numBuckets > values.size()) { + shardsTruncatedSum += numBuckets - values.size(); + } + } else { + shardsMissingSum += numBuckets; + } + } + + @Override + public void finish(Context mcontext) { + // nothing to do + } + + @Override + public Object getMergedResult() { + long exactCount = values == null ? 0 : values.size(); + return exactCount + shardsMissingSum + shardsTruncatedSum; + // TODO: reduce count by (at least) number of buckets that fail to hit mincount (after merging) + // that should make things match for most of the small tests at least + } + } +} diff --git a/solr/core/src/java/org/apache/solr/search/facet/FacetMerger.java b/solr/core/src/java/org/apache/solr/search/facet/FacetMerger.java index a8573c03ad0..9499d2caf25 100644 --- a/solr/core/src/java/org/apache/solr/search/facet/FacetMerger.java +++ b/solr/core/src/java/org/apache/solr/search/facet/FacetMerger.java @@ -16,17 +16,131 @@ */ package org.apache.solr.search.facet; -// -// The FacetMerger code is in the prototype stage, and this is the reason that -// many implementations are all in this file. They can be moved to separate -// files after the interfaces are locked down more. -// +import java.util.ArrayList; +import java.util.BitSet; +import java.util.Collection; +import java.util.Collections; +import java.util.HashMap; +import java.util.IdentityHashMap; +import java.util.Map; + +import static org.apache.solr.search.facet.FacetRequest.RefineMethod.SIMPLE; + + public abstract class FacetMerger { public abstract void merge(Object facetResult, Context mcontext); + + // FIXME + // public abstract Map getRefinement(Context mcontext); + public Map getRefinement(Context mcontext) { + return null; + } + public abstract void finish(Context mcontext); public abstract Object getMergedResult(); + // This class lets mergers know overall context such as what shard is being merged + // and what buckets have been seen by what shard. public static class Context { // FacetComponentState state; // todo: is this needed? - Object root; + final int numShards; + private final BitSet sawShard = new BitSet(); // [bucket0_shard0, bucket0_shard1, bucket0_shard2, bucket1_shard0, bucket1_shard1, bucket1_shard2] + private Map shardmap = new HashMap<>(); + + public Context(int numShards) { + this.numShards = numShards; + } + + Object root; // per-shard response + int maxBucket; // the current max bucket across all bucket types... incremented as we encounter more + int shardNum = -1; // TODO: keep same mapping across multiple phases... + boolean bucketWasMissing; + + public void newShard(String shard) { + Integer prev = shardmap.put(shard, ++shardNum); + assert prev == null; + this.bucketWasMissing = false; + } + + public void setShard(String shard) { + this.shardNum = shardmap.get(shard); + } + + public int getNewBucketNumber() { + return maxBucket++; + } + + public void setShardFlag(int bucketNum) { + // rely on normal bitset expansion (uses a doubling strategy) + sawShard.set( bucketNum * numShards + shardNum ); + } + + public boolean getShardFlag(int bucketNum) { + return sawShard.get( bucketNum * numShards + shardNum ); + } + + public boolean bucketWasMissing() { + return bucketWasMissing; + } + + public boolean setBucketWasMissing(boolean newVal) { + boolean oldVal = bucketWasMissing(); + bucketWasMissing = newVal; + return oldVal; + } + + private Map> refineSubMap = new IdentityHashMap<>(4); + public Collection getSubsWithRefinement(FacetRequest freq) { + if (freq.getSubFacets().isEmpty()) return Collections.emptyList(); + Collection subs = refineSubMap.get(freq); + if (subs != null) return subs; + + for (Map.Entry entry : freq.subFacets.entrySet()) { + Collection childSubs = getSubsWithRefinement(entry.getValue()); + if (childSubs.size() > 0 || entry.getValue().getRefineMethod() == SIMPLE) { + if (subs == null) { + subs = new ArrayList<>(freq.getSubFacets().size()); + } + subs.add(entry.getKey()); + } + } + + if (subs == null) { + subs = Collections.emptyList(); + } + refineSubMap.put(freq, subs); + return subs; + } + + + private Map> partialSubsMap = new IdentityHashMap<>(4); + public Collection getSubsWithPartial(FacetRequest freq) { + if (freq.getSubFacets().isEmpty()) return Collections.emptyList(); + Collection subs = partialSubsMap.get(freq); + if (subs != null) return subs; + + subs = null; + for (Map.Entry entry : freq.subFacets.entrySet()) { + Collection childSubs = getSubsWithPartial(entry.getValue()); + if (childSubs.size() > 0 || entry.getValue().returnsPartial()) { + if (subs == null) { + subs = new ArrayList<>(freq.getSubFacets().size()); + } + subs.add(entry.getKey()); + } + } + + if (subs == null) { + subs = Collections.emptyList(); + } + partialSubsMap.put(freq, subs); + return subs; + } + + } + + + } + + diff --git a/solr/core/src/java/org/apache/solr/search/facet/FacetModule.java b/solr/core/src/java/org/apache/solr/search/facet/FacetModule.java index ad324ebf5b4..b2831a326ef 100644 --- a/solr/core/src/java/org/apache/solr/search/facet/FacetModule.java +++ b/solr/core/src/java/org/apache/solr/search/facet/FacetModule.java @@ -18,6 +18,7 @@ package org.apache.solr.search.facet; import java.io.IOException; import java.util.ArrayList; +import java.util.Collection; import java.util.Collections; import java.util.Comparator; import java.util.HashMap; @@ -29,7 +30,9 @@ import java.util.Set; import org.apache.solr.client.solrj.SolrResponse; import org.apache.solr.common.SolrException; +import org.apache.solr.common.params.CommonParams; import org.apache.solr.common.params.FacetParams; +import org.apache.solr.common.params.ModifiableSolrParams; import org.apache.solr.common.params.ShardParams; import org.apache.solr.common.params.SolrParams; import org.apache.solr.common.util.NamedList; @@ -41,6 +44,7 @@ import org.apache.solr.handler.component.ShardResponse; import org.apache.solr.search.QueryContext; import org.apache.solr.search.SyntaxError; import org.apache.solr.util.RTimer; +import org.noggit.JSONUtil; public class FacetModule extends SearchComponent { @@ -55,6 +59,7 @@ public class FacetModule extends SearchComponent { // Internal information passed down from the top level to shards for distributed faceting. private final static String FACET_STATE = "_facet_"; + private final static String FACET_REFINE = "refine"; public FacetComponentState getFacetComponentState(ResponseBuilder rb) { @@ -149,12 +154,90 @@ public class FacetModule extends SearchComponent { } + private void clearFaceting(List outgoing) { + // turn off faceting for requests not marked as being for faceting refinements + for (ShardRequest sreq : outgoing) { + if ((sreq.purpose & PURPOSE_REFINE_JSON_FACETS) != 0) continue; + sreq.params.remove("json.facet"); // this just saves space... the presence of FACET_STATE really control the faceting + sreq.params.remove(FACET_STATE); + } + } + @Override public int distributedProcess(ResponseBuilder rb) throws IOException { FacetComponentState facetState = getFacetComponentState(rb); if (facetState == null) return ResponseBuilder.STAGE_DONE; + if (rb.stage != ResponseBuilder.STAGE_GET_FIELDS) { + return ResponseBuilder.STAGE_DONE; + } + + // Check if there are any refinements possible + if (facetState.mcontext.getSubsWithRefinement(facetState.facetRequest).isEmpty()) { + clearFaceting(rb.outgoing); + return ResponseBuilder.STAGE_DONE; + } + + // Overlap facet refinement requests (those shards that we need a count + // for particular facet values from), where possible, with + // the requests to get fields (because we know that is the + // only other required phase). + // We do this in distributedProcess so we can look at all of the + // requests in the outgoing queue at once. + + assert rb.shards.length == facetState.mcontext.numShards; + for (String shard : rb.shards) { + facetState.mcontext.setShard(shard); + + // shard-specific refinement + Map refinement = facetState.merger.getRefinement(facetState.mcontext); + if (refinement == null) continue; + + boolean newRequest = false; + ShardRequest shardsRefineRequest = null; + + // try to find a request that is already going out to that shard. + // If nshards becomes too great, we may want to move to hashing for + // better scalability. + for (ShardRequest sreq : rb.outgoing) { + if ( (sreq.purpose & (ShardRequest.PURPOSE_GET_FIELDS|ShardRequest.PURPOSE_REFINE_FACETS|ShardRequest.PURPOSE_REFINE_PIVOT_FACETS)) != 0 + && sreq.shards != null + && sreq.shards.length == 1 + && sreq.shards[0].equals(shard)) + { + shardsRefineRequest = sreq; + break; + } + } + + if (shardsRefineRequest == null) { + // we didn't find any other suitable requests going out to that shard, + // so create one ourselves. + newRequest = true; + shardsRefineRequest = new ShardRequest(); + shardsRefineRequest.shards = new String[] { shard }; + shardsRefineRequest.params = new ModifiableSolrParams(rb.req.getParams()); + // don't request any documents + shardsRefineRequest.params.remove(CommonParams.START); + shardsRefineRequest.params.set(CommonParams.ROWS, "0"); + shardsRefineRequest.params.set(CommonParams.ROWS, "0"); + shardsRefineRequest.params.set(FacetParams.FACET, false); + } + + shardsRefineRequest.purpose |= PURPOSE_REFINE_JSON_FACETS; + + Map fstate = new HashMap<>(1); + fstate.put(FACET_REFINE, refinement); + String fstateString = JSONUtil.toJSON(fstate); + shardsRefineRequest.params.add(FACET_STATE, fstateString); + + if (newRequest) { + rb.addRequest(this, shardsRefineRequest); + } + } + + // clearFaceting(rb.outgoing); return ResponseBuilder.STAGE_DONE; } @@ -165,11 +248,13 @@ public class FacetModule extends SearchComponent { if ((sreq.purpose & ShardRequest.PURPOSE_GET_TOP_IDS) != 0) { sreq.purpose |= FacetModule.PURPOSE_GET_JSON_FACETS; - sreq.params.set(FACET_STATE, "{}"); + sreq.params.set(FACET_STATE, "{}"); // The presence of FACET_STATE (_facet_) turns on json faceting } else { // turn off faceting on other requests - sreq.params.remove("json.facet"); + /*** distributedProcess will need to use other requests for refinement + sreq.params.remove("json.facet"); // this just saves space... the presence of FACET_STATE really control the faceting sreq.params.remove(FACET_STATE); + **/ } } @@ -186,8 +271,11 @@ public class FacetModule extends SearchComponent { if (facet == null) continue; if (facetState.merger == null) { facetState.merger = facetState.facetRequest.createFacetMerger(facet); + facetState.mcontext = new FacetMerger.Context( sreq.responses.size() ); } - facetState.merger.merge(facet , new FacetMerger.Context()); + facetState.mcontext.root = facet; + facetState.mcontext.newShard(shardRsp.getShard()); + facetState.merger.merge(facet , facetState.mcontext); } } @@ -199,6 +287,7 @@ public class FacetModule extends SearchComponent { if (facetState == null) return; if (facetState.merger != null) { + // TODO: merge any refinements rb.rsp.add("facets", facetState.merger.getMergedResult()); } } @@ -226,17 +315,23 @@ class FacetComponentState { // Only used for distributed search // FacetMerger merger; + FacetMerger.Context mcontext; } - +// base class for facet functions that can be used in a sort abstract class FacetSortableMerger extends FacetMerger { public void prepareSort() { } + @Override + public void finish(Context mcontext) { + // nothing to do for simple stats... + } + /** Return the normal comparison sort order. The sort direction is only to be used in special circumstances (such as making NaN sort * last regardless of sort order.) Normal sorters do not need to pay attention to direction. */ - public abstract int compareTo(FacetSortableMerger other, FacetField.SortDirection direction); + public abstract int compareTo(FacetSortableMerger other, FacetRequest.SortDirection direction); } abstract class FacetDoubleMerger extends FacetSortableMerger { @@ -252,12 +347,12 @@ abstract class FacetDoubleMerger extends FacetSortableMerger { @Override - public int compareTo(FacetSortableMerger other, FacetField.SortDirection direction) { + public int compareTo(FacetSortableMerger other, FacetRequest.SortDirection direction) { return compare(getDouble(), ((FacetDoubleMerger)other).getDouble(), direction); } - public static int compare(double a, double b, FacetField.SortDirection direction) { + public static int compare(double a, double b, FacetRequest.SortDirection direction) { if (a < b) return -1; if (a > b) return 1; @@ -295,7 +390,7 @@ class FacetLongMerger extends FacetSortableMerger { } @Override - public int compareTo(FacetSortableMerger other, FacetField.SortDirection direction) { + public int compareTo(FacetSortableMerger other, FacetRequest.SortDirection direction) { return Long.compare(val, ((FacetLongMerger)other).val); } } @@ -304,15 +399,20 @@ class FacetLongMerger extends FacetSortableMerger { // base class for facets that create buckets (and can hence have sub-facets) abstract class FacetBucketMerger extends FacetMerger { FacetRequestT freq; - int bucketNumber; public FacetBucketMerger(FacetRequestT freq) { this.freq = freq; } /** Bucketval is the representative value for the bucket. Only applicable to terms and range queries to distinguish buckets. */ - FacetBucket newBucket(Comparable bucketVal) { - return new FacetBucket(this, bucketVal, bucketNumber++); + FacetBucket newBucket(Comparable bucketVal, Context mcontext) { + return new FacetBucket(this, bucketVal, mcontext); + } + + @Override + public Map getRefinement(Context mcontext) { + Collection refineTags = mcontext.getSubsWithRefinement(freq); + return null; // FIXME } // do subs... @@ -334,6 +434,7 @@ abstract class FacetBucketMerger extends Fac } } + class FacetQueryMerger extends FacetBucketMerger { FacetBucket bucket; @@ -344,11 +445,32 @@ class FacetQueryMerger extends FacetBucketMerger { @Override public void merge(Object facet, Context mcontext) { if (bucket == null) { - bucket = newBucket(null); + bucket = newBucket(null, mcontext); } bucket.mergeBucket((SimpleOrderedMap) facet, mcontext); } + @Override + public Map getRefinement(Context mcontext) { + Collection tags; + if (mcontext.bucketWasMissing()) { + // if this bucket was missing, we need to get all subfacets that have partials (that need to list values for refinement) + tags = mcontext.getSubsWithPartial(freq); + } else { + tags = mcontext.getSubsWithRefinement(freq); + } + + Map refinement = bucket.getRefinement(mcontext, tags); + + return refinement; + } + + + @Override + public void finish(Context mcontext) { + // FIXME we need to propagate!!! + } + @Override public Object getMergedResult() { return bucket.getMergedBucket(); @@ -360,15 +482,15 @@ class FacetQueryMerger extends FacetBucketMerger { class FacetBucket { final FacetBucketMerger parent; final Comparable bucketValue; - final int bucketNumber; // this is just for internal correlation (the first bucket created is bucket 0, the next bucket 1, etc) + final int bucketNumber; // this is just for internal correlation (the first bucket created is bucket 0, the next bucket 1, across all field buckets) long count; Map subs; - public FacetBucket(FacetBucketMerger parent, Comparable bucketValue, int bucketNumber) { + public FacetBucket(FacetBucketMerger parent, Comparable bucketValue, FacetMerger.Context mcontext) { this.parent = parent; this.bucketValue = bucketValue; - this.bucketNumber = bucketNumber; + this.bucketNumber = mcontext.getNewBucketNumber(); // TODO: we don't need bucket numbers for all buckets... } public long getCount() { @@ -403,6 +525,8 @@ class FacetBucket { public void mergeBucket(SimpleOrderedMap bucket, FacetMerger.Context mcontext) { // todo: for refinements, we want to recurse, but not re-do stats for intermediate buckets + mcontext.setShardFlag(bucketNumber); + // drive merging off the received bucket? for (int i=0; i { - FacetBucket missingBucket; - FacetBucket allBuckets; - FacetMerger numBuckets; - - LinkedHashMap buckets = new LinkedHashMap<>(); - List sortedBuckets; - int numReturnedBuckets; // the number of buckets in the bucket lists returned from all of the shards - - private static class SortVal implements Comparable { - FacetBucket bucket; - FacetSortableMerger merger; // make this class inner and access merger , direction in parent? - FacetField.SortDirection direction; - - @Override - public int compareTo(SortVal o) { - int c = -merger.compareTo(o.merger, direction) * direction.getMultiplier(); - return c == 0 ? bucket.bucketValue.compareTo(o.bucket.bucketValue) : c; + public Map getRefinement(FacetMerger.Context mcontext, Collection refineTags) { + if (subs == null) { + return null; } - } - - public FacetFieldMerger(FacetField freq) { - super(freq); - } - - @Override - public void merge(Object facetResult, Context mcontext) { - merge((SimpleOrderedMap)facetResult, mcontext); - } - - protected void merge(SimpleOrderedMap facetResult, Context mcontext) { - if (freq.missing) { - Object o = facetResult.get("missing"); - if (o != null) { - if (missingBucket == null) { - missingBucket = newBucket(null); + Map refinement = null; + for (String tag : refineTags) { + FacetMerger subMerger = subs.get(tag); + if (subMerger != null) { + Map subRef = subMerger.getRefinement(mcontext); + if (subRef != null) { + if (refinement == null) { + refinement = new HashMap<>(refineTags.size()); + } + refinement.put(tag, subRef); } - missingBucket.mergeBucket((SimpleOrderedMap)o , mcontext); } } - - if (freq.allBuckets) { - Object o = facetResult.get("allBuckets"); - if (o != null) { - if (allBuckets == null) { - allBuckets = newBucket(null); - } - allBuckets.mergeBucket((SimpleOrderedMap)o , mcontext); - } - } - - List bucketList = (List) facetResult.get("buckets"); - numReturnedBuckets += bucketList.size(); - mergeBucketList(bucketList , mcontext); - - if (freq.numBuckets) { - Object nb = facetResult.get("numBuckets"); - if (nb != null) { - if (numBuckets == null) { - numBuckets = new FacetNumBucketsMerger(); - } - numBuckets.merge(nb , mcontext); - } - } - + return refinement; } - public void mergeBucketList(List bucketList, Context mcontext) { - for (SimpleOrderedMap bucketRes : bucketList) { - Comparable bucketVal = (Comparable)bucketRes.get("val"); - FacetBucket bucket = buckets.get(bucketVal); - if (bucket == null) { - bucket = newBucket(bucketVal); - buckets.put(bucketVal, bucket); + public Map getRefinement2(FacetMerger.Context mcontext, Collection refineTags) { + // TODO nocommit - partial results should turn off refining!!! + + boolean parentMissing = mcontext.bucketWasMissing(); + + // TODO: this is a redundant check for many types of facets... only do on field faceting + if (!parentMissing) { + // if parent bucket wasn't missing, check if this bucket was. + // this really only needs checking on certain buckets... (like terms facet) + boolean sawThisBucket = mcontext.getShardFlag(bucketNumber); + if (!sawThisBucket) { + mcontext.setBucketWasMissing(true); } - bucket.mergeBucket( bucketRes , mcontext ); - } - } - - public void sortBuckets() { - sortedBuckets = new ArrayList<>( buckets.values() ); - - Comparator comparator = null; - - final FacetField.SortDirection direction = freq.sortDirection; - final int sortMul = direction.getMultiplier(); - - if ("count".equals(freq.sortVariable)) { - comparator = (o1, o2) -> { - int v = -Long.compare(o1.count, o2.count) * sortMul; - return v == 0 ? o1.bucketValue.compareTo(o2.bucketValue) : v; - }; - Collections.sort(sortedBuckets, comparator); - } else if ("index".equals(freq.sortVariable)) { - comparator = (o1, o2) -> -o1.bucketValue.compareTo(o2.bucketValue) * sortMul; - Collections.sort(sortedBuckets, comparator); } else { - final String key = freq.sortVariable; - - /** - final FacetSortableMerger[] arr = new FacetSortableMerger[buckets.size()]; - final int[] index = new int[arr.length]; - int start = 0; - int nullStart = index.length; - int i=0; - for (FacetBucket bucket : buckets.values()) { - FacetMerger merger = bucket.getExistingMerger(key); - if (merger == null) { - index[--nullStart] = i; - } - if (merger != null) { - arr[start] = (FacetSortableMerger)merger; - index[start] = i; - start++; - } - i++; - } - - PrimUtils.sort(0, nullStart, index, new PrimUtils.IntComparator() { - @Override - public int compare(int a, int b) { - return arr[index[a]].compareTo(arr[index[b]], direction); - } - }); - **/ - - // timsort may do better here given that the lists may be partially sorted. - - List lst = new ArrayList(buckets.size()); - List nulls = new ArrayList(buckets.size()>>1); - for (int i=0; i o1.bucketValue.compareTo(o2.bucketValue)); - - ArrayList out = new ArrayList<>(buckets.size()); - for (SortVal sv : lst) { - out.add( sv.bucket ); - } - out.addAll(nulls); - sortedBuckets = out; - } - } - - @Override - public Object getMergedResult() { - SimpleOrderedMap result = new SimpleOrderedMap(); - - if (numBuckets != null) { - int removed = 0; - if (freq.mincount > 1) { - for (FacetBucket bucket : buckets.values()) { - if (bucket.count < freq.mincount) removed++; - } - } - result.add("numBuckets", ((Number)numBuckets.getMergedResult()).longValue() - removed); - - // TODO: we can further increase this estimate. - // If not sorting by count, use a simple ratio to scale - // If sorting by count desc, then add up the highest_possible_missing_count from each shard + // if parent bucket was missing, then we should be too + assert !mcontext.getShardFlag(bucketNumber); } - sortBuckets(); + Map refinement = null; - int first = (int)freq.offset; - int end = freq.limit >=0 ? first + (int) freq.limit : Integer.MAX_VALUE; - int last = Math.min(sortedBuckets.size(), end); - - List resultBuckets = new ArrayList<>(Math.max(0, (last - first))); - - /** this only works if there are no filters (like mincount) - for (int i=first; i(4); + if (bucketValue != null) { + refinement.put("_v", bucketValue); + } + refinement.put("_m",1); } - ***/ - // TODO: change effective offsets + limits at shards... + // TODO: listing things like sub-facets that have no field facets are redundant + // (we only need facet that have variable values) - int off = (int)freq.offset; - int lim = freq.limit >= 0 ? (int)freq.limit : Integer.MAX_VALUE; - for (FacetBucket bucket : sortedBuckets) { - if (bucket.getCount() < freq.mincount) { + for (Map.Entry sub : subs.entrySet()) { + if (refineTags != null && !refineTags.contains(sub.getKey())) { continue; } - - if (off > 0) { - --off; - continue; + Map subRef = sub.getValue().getRefinement(mcontext); + if (subRef != null) { + if (refinement == null) { + refinement = new HashMap<>(4); + } + refinement.put(sub.getKey(), subRef); } - - if (resultBuckets.size() >= lim) { - break; - } - - resultBuckets.add( bucket.getMergedBucket() ); } - result.add("buckets", resultBuckets); - if (missingBucket != null) { - result.add("missing", missingBucket.getMergedBucket()); - } - if (allBuckets != null) { - result.add("allBuckets", allBuckets.getMergedBucket()); - } - - return result; + // reset the "bucketMissing" flag on the way back out. + mcontext.setBucketWasMissing(parentMissing); + return refinement; } - - private class FacetNumBucketsMerger extends FacetMerger { - long sumBuckets; - long shardsMissingSum; - long shardsTruncatedSum; - Set values; - - @Override - public void merge(Object facetResult, Context mcontext) { - SimpleOrderedMap map = (SimpleOrderedMap)facetResult; - long numBuckets = ((Number)map.get("numBuckets")).longValue(); - sumBuckets += numBuckets; - - List vals = (List)map.get("vals"); - if (vals != null) { - if (values == null) { - values = new HashSet<>(vals.size()*4); - } - values.addAll(vals); - if (numBuckets > values.size()) { - shardsTruncatedSum += numBuckets - values.size(); - } - } else { - shardsMissingSum += numBuckets; - } - } - - @Override - public Object getMergedResult() { - long exactCount = values == null ? 0 : values.size(); - return exactCount + shardsMissingSum + shardsTruncatedSum; - // TODO: reduce count by (at least) number of buckets that fail to hit mincount (after merging) - // that should make things match for most of the small tests at least - } - } } -class FacetRangeMerger extends FacetBucketMerger { - FacetBucket beforeBucket; - FacetBucket afterBucket; - FacetBucket betweenBucket; - - LinkedHashMap buckets = new LinkedHashMap(); - - - public FacetRangeMerger(FacetRange freq) { - super(freq); - } - - @Override - FacetMerger createFacetMerger(String key, Object val) { - return super.createFacetMerger(key, val); - } - - @Override - public void merge(Object facetResult, Context mcontext) { - merge((SimpleOrderedMap) facetResult , mcontext); - } - - public void merge(SimpleOrderedMap facetResult, Context mcontext) { - boolean all = freq.others.contains(FacetParams.FacetRangeOther.ALL); - - if (all || freq.others.contains(FacetParams.FacetRangeOther.BEFORE)) { - Object o = facetResult.get("before"); - if (o != null) { - if (beforeBucket == null) { - beforeBucket = newBucket(null); - } - beforeBucket.mergeBucket((SimpleOrderedMap)o, mcontext); - } - } - - if (all || freq.others.contains(FacetParams.FacetRangeOther.AFTER)) { - Object o = facetResult.get("after"); - if (o != null) { - if (afterBucket == null) { - afterBucket = newBucket(null); - } - afterBucket.mergeBucket((SimpleOrderedMap)o , mcontext); - } - } - - if (all || freq.others.contains(FacetParams.FacetRangeOther.BETWEEN)) { - Object o = facetResult.get("between"); - if (o != null) { - if (betweenBucket == null) { - betweenBucket = newBucket(null); - } - betweenBucket.mergeBucket((SimpleOrderedMap)o , mcontext); - } - } - - List bucketList = (List) facetResult.get("buckets"); - mergeBucketList(bucketList , mcontext); - } - - // TODO: share more merging with field faceting - public void mergeBucketList(List bucketList, Context mcontext) { - for (SimpleOrderedMap bucketRes : bucketList) { - Comparable bucketVal = (Comparable)bucketRes.get("val"); - FacetBucket bucket = buckets.get(bucketVal); - if (bucket == null) { - bucket = newBucket(bucketVal); - buckets.put(bucketVal, bucket); - } - bucket.mergeBucket( bucketRes , mcontext ); - } - } - - @Override - public Object getMergedResult() { - SimpleOrderedMap result = new SimpleOrderedMap(4); - - List resultBuckets = new ArrayList<>(buckets.size()); - - for (FacetBucket bucket : buckets.values()) { - if (bucket.getCount() < freq.mincount) { - continue; - } - resultBuckets.add( bucket.getMergedBucket() ); - } - - result.add("buckets", resultBuckets); - - if (beforeBucket != null) { - result.add("before", beforeBucket.getMergedBucket()); - } - if (afterBucket != null) { - result.add("after", afterBucket.getMergedBucket()); - } - if (betweenBucket != null) { - result.add("between", betweenBucket.getMergedBucket()); - } - return result; - - } -} diff --git a/solr/core/src/java/org/apache/solr/search/facet/FacetRange.java b/solr/core/src/java/org/apache/solr/search/facet/FacetRange.java index 1b98de0ed4b..99f6fceb7e0 100644 --- a/solr/core/src/java/org/apache/solr/search/facet/FacetRange.java +++ b/solr/core/src/java/org/apache/solr/search/facet/FacetRange.java @@ -36,7 +36,7 @@ import org.apache.solr.schema.TrieField; import org.apache.solr.search.DocSet; import org.apache.solr.util.DateMathParser; -public class FacetRange extends FacetRequest { +public class FacetRange extends FacetRequestSorted { String field; Object start; Object end; @@ -44,8 +44,12 @@ public class FacetRange extends FacetRequest { boolean hardend = false; EnumSet include; EnumSet others; - long mincount = 0; + { + // defaults + mincount = 0; + limit = -1; + } @Override public FacetProcessor createFacetProcessor(FacetContext fcontext) { diff --git a/solr/core/src/java/org/apache/solr/search/facet/FacetRangeMerger.java b/solr/core/src/java/org/apache/solr/search/facet/FacetRangeMerger.java new file mode 100644 index 00000000000..587b9195775 --- /dev/null +++ b/solr/core/src/java/org/apache/solr/search/facet/FacetRangeMerger.java @@ -0,0 +1,123 @@ +package org.apache.solr.search.facet; + +import java.util.ArrayList; +import java.util.LinkedHashMap; +import java.util.List; + +import org.apache.solr.common.params.FacetParams; +import org.apache.solr.common.util.SimpleOrderedMap; + +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +public class FacetRangeMerger extends FacetRequestSortedMerger { + FacetBucket beforeBucket; + FacetBucket afterBucket; + FacetBucket betweenBucket; + + public FacetRangeMerger(FacetRange freq) { + super(freq); + } + + @Override + FacetMerger createFacetMerger(String key, Object val) { + return super.createFacetMerger(key, val); + } + + @Override + public void merge(Object facetResult, Context mcontext) { + merge((SimpleOrderedMap) facetResult , mcontext); + } + + @Override + public void sortBuckets() { + // TODO: mincount>0 will mess up order? + sortedBuckets = new ArrayList<>( buckets.values() ); + } + + @Override + public void finish(Context mcontext) { + // nothing to do + } + + public void merge(SimpleOrderedMap facetResult, Context mcontext) { + boolean all = freq.others.contains(FacetParams.FacetRangeOther.ALL); + + if (all || freq.others.contains(FacetParams.FacetRangeOther.BEFORE)) { + Object o = facetResult.get("before"); + if (o != null) { + if (beforeBucket == null) { + beforeBucket = newBucket(null, mcontext); + } + beforeBucket.mergeBucket((SimpleOrderedMap)o, mcontext); + } + } + + if (all || freq.others.contains(FacetParams.FacetRangeOther.AFTER)) { + Object o = facetResult.get("after"); + if (o != null) { + if (afterBucket == null) { + afterBucket = newBucket(null, mcontext); + } + afterBucket.mergeBucket((SimpleOrderedMap)o , mcontext); + } + } + + if (all || freq.others.contains(FacetParams.FacetRangeOther.BETWEEN)) { + Object o = facetResult.get("between"); + if (o != null) { + if (betweenBucket == null) { + betweenBucket = newBucket(null, mcontext); + } + betweenBucket.mergeBucket((SimpleOrderedMap)o , mcontext); + } + } + + List bucketList = (List) facetResult.get("buckets"); + mergeBucketList(bucketList , mcontext); + } + + + @Override + public Object getMergedResult() { + // TODO: use sortedBuckets + SimpleOrderedMap result = new SimpleOrderedMap(4); + + List resultBuckets = new ArrayList<>(buckets.size()); + + for (FacetBucket bucket : buckets.values()) { + if (bucket.getCount() < freq.mincount) { + continue; + } + resultBuckets.add( bucket.getMergedBucket() ); + } + + result.add("buckets", resultBuckets); + + if (beforeBucket != null) { + result.add("before", beforeBucket.getMergedBucket()); + } + if (afterBucket != null) { + result.add("after", afterBucket.getMergedBucket()); + } + if (betweenBucket != null) { + result.add("between", betweenBucket.getMergedBucket()); + } + return result; + + } +} diff --git a/solr/core/src/java/org/apache/solr/search/facet/FacetRequest.java b/solr/core/src/java/org/apache/solr/search/facet/FacetRequest.java index 0446202022a..76d7d2a4f3f 100644 --- a/solr/core/src/java/org/apache/solr/search/facet/FacetRequest.java +++ b/solr/core/src/java/org/apache/solr/search/facet/FacetRequest.java @@ -35,10 +35,48 @@ import org.apache.solr.search.QueryContext; import org.apache.solr.search.SolrIndexSearcher; import org.apache.solr.search.SyntaxError; +import static org.apache.solr.search.facet.FacetRequest.RefineMethod.NONE; + public abstract class FacetRequest { + + public static enum SortDirection { + asc(-1) , + desc(1); + + private final int multiplier; + private SortDirection(int multiplier) { + this.multiplier = multiplier; + } + + // asc==-1, desc==1 + public int getMultiplier() { + return multiplier; + } + } + + public static enum RefineMethod { + NONE, + SIMPLE; + // NONE is distinct from null since we may want to know if refinement was explicitly turned off. + public static FacetRequest.RefineMethod fromObj(Object method) { + if (method == null) return null; + if (method instanceof Boolean) { + return ((Boolean)method) ? SIMPLE : NONE; + } + if ("simple".equals(method)) { + return SIMPLE; + } else if ("none".equals(method)) { + return NONE; + } else { + throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "Unknown RefineMethod method " + method); + } + } + } + + protected Map facetStats; // per-bucket statistics - protected Map subFacets; // list of facets + protected Map subFacets; // per-bucket sub-facets protected List filters; protected boolean processEmpty; protected Domain domain; @@ -64,6 +102,22 @@ public abstract class FacetRequest { return subFacets; } + /** Returns null if unset */ + public RefineMethod getRefineMethod() { + return null; + } + + public boolean doRefine() { + return !(getRefineMethod()==null || getRefineMethod()==NONE); + } + + /** Returns true if this facet can return just some of the facet buckets that match all the criteria. + * This is normally true only for facets with a limit. + */ + public boolean returnsPartial() { + return false; + } + public void addStat(String key, AggValueSource stat) { facetStats.put(key, stat); } @@ -541,6 +595,9 @@ class FacetFieldParser extends FacetParser { facet.method = FacetField.FacetMethod.fromString(getString(m, "method", null)); facet.cacheDf = (int)getLong(m, "cacheDf", facet.cacheDf); + // TODO: pull up to higher level? + facet.refine = FacetField.RefineMethod.fromObj(m.get("refine")); + facet.perSeg = (Boolean)m.get("perSeg"); // facet.sort may depend on a facet stat... @@ -562,18 +619,18 @@ class FacetFieldParser extends FacetParser { private void parseSort(Object sort) { if (sort == null) { facet.sortVariable = "count"; - facet.sortDirection = FacetField.SortDirection.desc; + facet.sortDirection = FacetRequest.SortDirection.desc; } else if (sort instanceof String) { String sortStr = (String)sort; if (sortStr.endsWith(" asc")) { facet.sortVariable = sortStr.substring(0, sortStr.length()-" asc".length()); - facet.sortDirection = FacetField.SortDirection.asc; + facet.sortDirection = FacetRequest.SortDirection.asc; } else if (sortStr.endsWith(" desc")) { facet.sortVariable = sortStr.substring(0, sortStr.length()-" desc".length()); - facet.sortDirection = FacetField.SortDirection.desc; + facet.sortDirection = FacetRequest.SortDirection.desc; } else { facet.sortVariable = sortStr; - facet.sortDirection = "index".equals(facet.sortVariable) ? FacetField.SortDirection.asc : FacetField.SortDirection.desc; // default direction for "index" is ascending + facet.sortDirection = "index".equals(facet.sortVariable) ? FacetRequest.SortDirection.asc : FacetRequest.SortDirection.desc; // default direction for "index" is ascending } } else { // sort : { myvar : 'desc' } @@ -583,7 +640,7 @@ class FacetFieldParser extends FacetParser { String k = entry.getKey(); Object v = entry.getValue(); facet.sortVariable = k; - facet.sortDirection = FacetField.SortDirection.valueOf(v.toString()); + facet.sortDirection = FacetRequest.SortDirection.valueOf(v.toString()); } } diff --git a/solr/core/src/java/org/apache/solr/search/facet/FacetRequestSortedMerger.java b/solr/core/src/java/org/apache/solr/search/facet/FacetRequestSortedMerger.java new file mode 100644 index 00000000000..955882d8167 --- /dev/null +++ b/solr/core/src/java/org/apache/solr/search/facet/FacetRequestSortedMerger.java @@ -0,0 +1,234 @@ +package org.apache.solr.search.facet; + +import java.util.ArrayList; +import java.util.Collection; +import java.util.Collections; +import java.util.Comparator; +import java.util.HashMap; +import java.util.LinkedHashMap; +import java.util.List; +import java.util.Map; + +import org.apache.solr.common.util.SimpleOrderedMap; + +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +// base class for facets that create a list of buckets that can be sorted +abstract class FacetRequestSortedMerger extends FacetBucketMerger { + LinkedHashMap buckets = new LinkedHashMap<>(); + List sortedBuckets; + + public FacetRequestSortedMerger(FacetRequestT freq) { + super(freq); + } + + private static class SortVal implements Comparable { + FacetBucket bucket; + FacetSortableMerger merger; // make this class inner and access merger , direction in parent? + FacetRequest.SortDirection direction; + + @Override + public int compareTo(SortVal o) { + int c = -merger.compareTo(o.merger, direction) * direction.getMultiplier(); + return c == 0 ? bucket.bucketValue.compareTo(o.bucket.bucketValue) : c; + } + } + + public void mergeBucketList(List bucketList, Context mcontext) { + for (SimpleOrderedMap bucketRes : bucketList) { + Comparable bucketVal = (Comparable)bucketRes.get("val"); + FacetBucket bucket = buckets.get(bucketVal); + if (bucket == null) { + bucket = newBucket(bucketVal, mcontext); + buckets.put(bucketVal, bucket); + } + bucket.mergeBucket( bucketRes , mcontext ); + } + } + + public void sortBuckets() { + sortedBuckets = new ArrayList<>( buckets.values() ); + + Comparator comparator = null; + + final FacetRequest.SortDirection direction = freq.sortDirection; + final int sortMul = direction.getMultiplier(); + + if ("count".equals(freq.sortVariable)) { + comparator = (o1, o2) -> { + int v = -Long.compare(o1.count, o2.count) * sortMul; + return v == 0 ? o1.bucketValue.compareTo(o2.bucketValue) : v; + }; + Collections.sort(sortedBuckets, comparator); + } else if ("index".equals(freq.sortVariable)) { + comparator = (o1, o2) -> -o1.bucketValue.compareTo(o2.bucketValue) * sortMul; + Collections.sort(sortedBuckets, comparator); + } else { + final String key = freq.sortVariable; + + /** + final FacetSortableMerger[] arr = new FacetSortableMerger[buckets.size()]; + final int[] index = new int[arr.length]; + int start = 0; + int nullStart = index.length; + int i=0; + for (FacetBucket bucket : buckets.values()) { + FacetMerger merger = bucket.getExistingMerger(key); + if (merger == null) { + index[--nullStart] = i; + } + if (merger != null) { + arr[start] = (FacetSortableMerger)merger; + index[start] = i; + start++; + } + i++; + } + + PrimUtils.sort(0, nullStart, index, new PrimUtils.IntComparator() { + @Override + public int compare(int a, int b) { + return arr[index[a]].compareTo(arr[index[b]], direction); + } + }); + **/ + + + List lst = new ArrayList<>(buckets.size()); + List nulls = new ArrayList<>(buckets.size()>>1); + for (int i=0; i o1.bucketValue.compareTo(o2.bucketValue)); + + ArrayList out = new ArrayList<>(buckets.size()); + for (SortVal sv : lst) { + out.add( sv.bucket ); + } + out.addAll(nulls); + sortedBuckets = out; + } + } + + + @Override + public Map getRefinement(Context mcontext) { + // step 1) If this facet request has refining, then we need to fully request top buckets that were not seen by this shard. + // step 2) If this facet does not have refining, but some sub-facets do, we need to check/recurse those sub-facets in *every* top bucket. + // A combination of the two is possible and makes step 2 redundant for any buckets we fully requested in step 1. + + Map refinement = null; + + Collection tags = mcontext.getSubsWithRefinement(freq); + if (tags.isEmpty() && !freq.doRefine()) { + // we don't have refining, and neither do our subs + return null; + } + + // Tags for sub facets that have partial facets somewhere in their children. + // If we are missing a bucket for this shard, we'll need to get the specific buckets that need refining. + Collection tagsWithPartial = mcontext.getSubsWithPartial(freq); + + boolean thisMissing = mcontext.bucketWasMissing(); + + int num = (int)(freq.offset + freq.limit); + int numBucketsToCheck = Math.min(buckets.size(), num); + + Collection bucketList; + if (buckets.size() < num) { + // no need to sort + // todo: but we may need to filter.... simplify by always sorting? + bucketList = buckets.values(); + } else { + // only sort once + if (sortedBuckets == null) { + sortBuckets(); // todo: make sure this filters buckets as well + } + bucketList = sortedBuckets; + } + + ArrayList leafBuckets = null; // "_l" missing buckets specified by bucket value only (no need to specify anything further) + ArrayList missingBuckets = null; // "_m" missing buckets that need to specify values for partial facets + ArrayList skipBuckets = null; // "_s" present buckets that we need to recurse into because children facets have refinement requirements + + for (FacetBucket bucket : bucketList) { + if (numBucketsToCheck-- <= 0) break; + // if this bucket is missing, + assert thisMissing == false || thisMissing == true && mcontext.getShardFlag(bucket.bucketNumber) == false; + boolean saw = !thisMissing && mcontext.getShardFlag(bucket.bucketNumber); + if (!saw) { + // we didn't see the bucket for this shard + Map bucketRefinement = null; + + // find facets that we need to fill in buckets for + if (!tagsWithPartial.isEmpty()) { + boolean prev = mcontext.setBucketWasMissing(true); + bucketRefinement = bucket.getRefinement(mcontext, tagsWithPartial); + mcontext.setBucketWasMissing(prev); + + if (bucketRefinement != null) { + if (missingBuckets==null) missingBuckets = new ArrayList<>(); + missingBuckets.add(bucketRefinement); + } + } + + // if we didn't add to "_m" (missing), then we should add to "_l" (leaf missing) + if (bucketRefinement == null) { + if (leafBuckets == null) leafBuckets = new ArrayList<>(); + leafBuckets.add(bucket.bucketValue); + } + + } else if (!tags.isEmpty()) { + // we had this bucket, but we need to recurse to certain children that have refinements + Map bucketRefinement = bucket.getRefinement(mcontext, tagsWithPartial); + if (bucketRefinement != null) { + if (skipBuckets == null) skipBuckets = new ArrayList<>(); + skipBuckets.add(bucketRefinement); + } + } + + } + + // TODO: what if we don't need to refine any variable buckets, but we do need to contribute to numBuckets, missing, allBuckets, etc... + // because we were "missing". That will be handled at a higher level (i.e. we'll be in someone's missing bucket?) + // TODO: test with a sub-facet with a limit of 0 and something like a missing bucket + if (leafBuckets != null || missingBuckets != null || skipBuckets != null) { + refinement = new HashMap<>(3); + if (leafBuckets != null) refinement.put("_l",leafBuckets); + if (missingBuckets != null) refinement.put("_m", missingBuckets); + if (skipBuckets != null) refinement.put("_s", skipBuckets); + } + + return refinement; + } + + +} diff --git a/solr/core/src/java/org/apache/solr/search/facet/HLLAgg.java b/solr/core/src/java/org/apache/solr/search/facet/HLLAgg.java index 09436c1a708..89e23868bef 100644 --- a/solr/core/src/java/org/apache/solr/search/facet/HLLAgg.java +++ b/solr/core/src/java/org/apache/solr/search/facet/HLLAgg.java @@ -99,7 +99,7 @@ public class HLLAgg extends StrAggValueSource { } @Override - public int compareTo(FacetSortableMerger other, FacetField.SortDirection direction) { + public int compareTo(FacetSortableMerger other, FacetRequest.SortDirection direction) { return Long.compare( getLong(), ((Merger)other).getLong() ); } } diff --git a/solr/core/src/java/org/apache/solr/search/facet/PercentileAgg.java b/solr/core/src/java/org/apache/solr/search/facet/PercentileAgg.java index 6285b3911ba..a1f44f0ab73 100644 --- a/solr/core/src/java/org/apache/solr/search/facet/PercentileAgg.java +++ b/solr/core/src/java/org/apache/solr/search/facet/PercentileAgg.java @@ -207,7 +207,7 @@ public class PercentileAgg extends SimpleAggValueSource { } @Override - public int compareTo(FacetSortableMerger other, FacetField.SortDirection direction) { + public int compareTo(FacetSortableMerger other, FacetRequest.SortDirection direction) { return Double.compare(getSortVal(), ((Merger) other).getSortVal()); } diff --git a/solr/core/src/java/org/apache/solr/search/facet/UniqueAgg.java b/solr/core/src/java/org/apache/solr/search/facet/UniqueAgg.java index 261ed60c2d2..341bdaf803a 100644 --- a/solr/core/src/java/org/apache/solr/search/facet/UniqueAgg.java +++ b/solr/core/src/java/org/apache/solr/search/facet/UniqueAgg.java @@ -113,7 +113,7 @@ public class UniqueAgg extends StrAggValueSource { } @Override - public int compareTo(FacetSortableMerger other, FacetField.SortDirection direction) { + public int compareTo(FacetSortableMerger other, FacetRequest.SortDirection direction) { return Long.compare( getLong(), ((Merger)other).getLong() ); } } diff --git a/solr/test-framework/src/java/org/apache/solr/JSONTestUtil.java b/solr/test-framework/src/java/org/apache/solr/JSONTestUtil.java index 634608b2bf4..f5b2ffb45f7 100644 --- a/solr/test-framework/src/java/org/apache/solr/JSONTestUtil.java +++ b/solr/test-framework/src/java/org/apache/solr/JSONTestUtil.java @@ -73,6 +73,19 @@ public class JSONTestUtil { return match(path, input, expected, delta); } + /** + * @param input Object structure to parse and test against + * @param pathAndExpected JSON path expression + '==' + expected value + * @param delta tollerance allowed in comparing float/double values + */ + public static String matchObj(Object input, String pathAndExpected, double delta) throws Exception { + int pos = pathAndExpected.indexOf("=="); + String path = pos>=0 ? pathAndExpected.substring(0,pos) : null; + String expected = pos>=0 ? pathAndExpected.substring(pos+2) : pathAndExpected; + Object expectObj = failRepeatedKeys ? new NoDupsObjectBuilder(new JSONParser(expected)).getVal() : ObjectBuilder.fromJSON(expected); + return matchObj(path, input, expectObj, delta); + } + /** * @param path JSON path expression * @param input JSON Structure to parse and test against