SOLR-9432: JSON Facet refactoring to support refinement

This commit is contained in:
yonik 2016-08-23 12:54:49 -04:00
parent 61e1f095e9
commit 43ca5078bf
12 changed files with 997 additions and 392 deletions

View File

@ -24,38 +24,45 @@ import org.apache.solr.common.SolrException;
import org.apache.solr.schema.FieldType;
import org.apache.solr.schema.SchemaField;
public class FacetField extends FacetRequest {
String field;
// Any type of facet request that generates a variable number of buckets
// and the ability to sort by those generated buckets.
abstract class FacetRequestSorted extends FacetRequest {
long offset;
long limit = 10;
long mincount = 1;
long limit;
long mincount;
String sortVariable;
SortDirection sortDirection;
RefineMethod refine; // null, NONE, or SIMPLE
@Override
public RefineMethod getRefineMethod() {
return refine;
}
@Override
public boolean returnsPartial() {
return limit > 0;
}
}
public class FacetField extends FacetRequestSorted {
String field;
boolean missing;
boolean allBuckets; // show cumulative stats across all buckets (this can be different than non-bucketed stats across all docs because of multi-valued docs)
boolean numBuckets;
String prefix;
String sortVariable;
SortDirection sortDirection;
FacetMethod method;
int cacheDf; // 0 means "default", -1 means "never cache"
// experimental - force perSeg collection when using dv method, currently for testing purposes only.
Boolean perSeg;
// TODO: put this somewhere more generic?
public enum SortDirection {
asc(-1) ,
desc(1);
private final int multiplier;
private SortDirection(int multiplier) {
this.multiplier = multiplier;
}
// asc==-1, desc==1
public int getMultiplier() {
return multiplier;
}
{
// defaults for FacetRequestSorted
mincount = 1;
limit = 10;
}
public enum FacetMethod {

View File

@ -0,0 +1,213 @@
package org.apache.solr.search.facet;
import java.util.ArrayList;
import java.util.Collection;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;
import org.apache.solr.common.util.SimpleOrderedMap;
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
// TODO: refactor more out to base class
public class FacetFieldMerger extends FacetRequestSortedMerger<FacetField> {
FacetBucket missingBucket;
FacetBucket allBuckets;
FacetMerger numBuckets;
int[] numReturnedPerShard;
// LinkedHashMap<Object,FacetBucket> buckets = new LinkedHashMap<>();
// List<FacetBucket> sortedBuckets;
int numReturnedBuckets; // the number of buckets in the bucket lists returned from all of the shards
public FacetFieldMerger(FacetField freq) {
super(freq);
}
@Override
public void merge(Object facetResult, Context mcontext) {
if (numReturnedPerShard == null) {
numReturnedPerShard = new int[mcontext.numShards];
}
merge((SimpleOrderedMap)facetResult, mcontext);
}
protected void merge(SimpleOrderedMap facetResult, Context mcontext) {
if (freq.missing) {
Object o = facetResult.get("missing");
if (o != null) {
if (missingBucket == null) {
missingBucket = newBucket(null, mcontext);
}
missingBucket.mergeBucket((SimpleOrderedMap)o , mcontext);
}
}
if (freq.allBuckets) {
Object o = facetResult.get("allBuckets");
if (o != null) {
if (allBuckets == null) {
allBuckets = newBucket(null, mcontext);
}
allBuckets.mergeBucket((SimpleOrderedMap)o , mcontext);
}
}
List<SimpleOrderedMap> bucketList = (List<SimpleOrderedMap>) facetResult.get("buckets");
numReturnedPerShard[mcontext.shardNum] = bucketList.size();
numReturnedBuckets += bucketList.size();
mergeBucketList(bucketList , mcontext);
if (freq.numBuckets) {
Object nb = facetResult.get("numBuckets");
if (nb != null) {
if (numBuckets == null) {
numBuckets = new FacetNumBucketsMerger();
}
numBuckets.merge(nb , mcontext);
}
}
}
@Override
public Object getMergedResult() {
SimpleOrderedMap result = new SimpleOrderedMap();
if (numBuckets != null) {
int removed = 0;
if (freq.mincount > 1) {
for (FacetBucket bucket : buckets.values()) {
if (bucket.count < freq.mincount) removed++;
}
}
result.add("numBuckets", ((Number)numBuckets.getMergedResult()).longValue() - removed);
// TODO: we can further increase this estimate.
// If not sorting by count, use a simple ratio to scale
// If sorting by count desc, then add up the highest_possible_missing_count from each shard
}
sortBuckets();
int first = (int)freq.offset;
int end = freq.limit >=0 ? first + (int) freq.limit : Integer.MAX_VALUE;
int last = Math.min(sortedBuckets.size(), end);
List<SimpleOrderedMap> resultBuckets = new ArrayList<>(Math.max(0, (last - first)));
/** this only works if there are no filters (like mincount)
for (int i=first; i<last; i++) {
FacetBucket bucket = sortedBuckets.get(i);
resultBuckets.add( bucket.getMergedBucket() );
}
***/
// TODO: change effective offsets + limits at shards...
int off = (int)freq.offset;
int lim = freq.limit >= 0 ? (int)freq.limit : Integer.MAX_VALUE;
for (FacetBucket bucket : sortedBuckets) {
if (bucket.getCount() < freq.mincount) {
continue;
}
if (off > 0) {
--off;
continue;
}
if (resultBuckets.size() >= lim) {
break;
}
resultBuckets.add( bucket.getMergedBucket() );
}
result.add("buckets", resultBuckets);
if (missingBucket != null) {
result.add("missing", missingBucket.getMergedBucket());
}
if (allBuckets != null) {
result.add("allBuckets", allBuckets.getMergedBucket());
}
return result;
}
@Override
public void finish(Context mcontext) {
// TODO: check refine of subs?
// TODO: call subs each time with a shard/shardnum that is missing a bucket at this level?
// or pass a bit vector of shards w/ value???
// build up data structure and only then call the context (or whatever) to do the refinement?
// basically , only do at the top-level facet?
}
private class FacetNumBucketsMerger extends FacetMerger {
long sumBuckets;
long shardsMissingSum;
long shardsTruncatedSum;
Set<Object> values;
@Override
public void merge(Object facetResult, Context mcontext) {
SimpleOrderedMap map = (SimpleOrderedMap)facetResult;
long numBuckets = ((Number)map.get("numBuckets")).longValue();
sumBuckets += numBuckets;
List vals = (List)map.get("vals");
if (vals != null) {
if (values == null) {
values = new HashSet<>(vals.size()*4);
}
values.addAll(vals);
if (numBuckets > values.size()) {
shardsTruncatedSum += numBuckets - values.size();
}
} else {
shardsMissingSum += numBuckets;
}
}
@Override
public void finish(Context mcontext) {
// nothing to do
}
@Override
public Object getMergedResult() {
long exactCount = values == null ? 0 : values.size();
return exactCount + shardsMissingSum + shardsTruncatedSum;
// TODO: reduce count by (at least) number of buckets that fail to hit mincount (after merging)
// that should make things match for most of the small tests at least
}
}
}

View File

@ -16,17 +16,131 @@
*/
package org.apache.solr.search.facet;
//
// The FacetMerger code is in the prototype stage, and this is the reason that
// many implementations are all in this file. They can be moved to separate
// files after the interfaces are locked down more.
//
import java.util.ArrayList;
import java.util.BitSet;
import java.util.Collection;
import java.util.Collections;
import java.util.HashMap;
import java.util.IdentityHashMap;
import java.util.Map;
import static org.apache.solr.search.facet.FacetRequest.RefineMethod.SIMPLE;
public abstract class FacetMerger {
public abstract void merge(Object facetResult, Context mcontext);
// FIXME
// public abstract Map<String,Object> getRefinement(Context mcontext);
public Map<String,Object> getRefinement(Context mcontext) {
return null;
}
public abstract void finish(Context mcontext);
public abstract Object getMergedResult();
// This class lets mergers know overall context such as what shard is being merged
// and what buckets have been seen by what shard.
public static class Context {
// FacetComponentState state; // todo: is this needed?
Object root;
final int numShards;
private final BitSet sawShard = new BitSet(); // [bucket0_shard0, bucket0_shard1, bucket0_shard2, bucket1_shard0, bucket1_shard1, bucket1_shard2]
private Map<String,Integer> shardmap = new HashMap<>();
public Context(int numShards) {
this.numShards = numShards;
}
Object root; // per-shard response
int maxBucket; // the current max bucket across all bucket types... incremented as we encounter more
int shardNum = -1; // TODO: keep same mapping across multiple phases...
boolean bucketWasMissing;
public void newShard(String shard) {
Integer prev = shardmap.put(shard, ++shardNum);
assert prev == null;
this.bucketWasMissing = false;
}
public void setShard(String shard) {
this.shardNum = shardmap.get(shard);
}
public int getNewBucketNumber() {
return maxBucket++;
}
public void setShardFlag(int bucketNum) {
// rely on normal bitset expansion (uses a doubling strategy)
sawShard.set( bucketNum * numShards + shardNum );
}
public boolean getShardFlag(int bucketNum) {
return sawShard.get( bucketNum * numShards + shardNum );
}
public boolean bucketWasMissing() {
return bucketWasMissing;
}
public boolean setBucketWasMissing(boolean newVal) {
boolean oldVal = bucketWasMissing();
bucketWasMissing = newVal;
return oldVal;
}
private Map<FacetRequest, Collection<String>> refineSubMap = new IdentityHashMap<>(4);
public Collection<String> getSubsWithRefinement(FacetRequest freq) {
if (freq.getSubFacets().isEmpty()) return Collections.emptyList();
Collection<String> subs = refineSubMap.get(freq);
if (subs != null) return subs;
for (Map.Entry<String,FacetRequest> entry : freq.subFacets.entrySet()) {
Collection<String> childSubs = getSubsWithRefinement(entry.getValue());
if (childSubs.size() > 0 || entry.getValue().getRefineMethod() == SIMPLE) {
if (subs == null) {
subs = new ArrayList<>(freq.getSubFacets().size());
}
subs.add(entry.getKey());
}
}
if (subs == null) {
subs = Collections.emptyList();
}
refineSubMap.put(freq, subs);
return subs;
}
private Map<FacetRequest, Collection<String>> partialSubsMap = new IdentityHashMap<>(4);
public Collection<String> getSubsWithPartial(FacetRequest freq) {
if (freq.getSubFacets().isEmpty()) return Collections.emptyList();
Collection<String> subs = partialSubsMap.get(freq);
if (subs != null) return subs;
subs = null;
for (Map.Entry<String,FacetRequest> entry : freq.subFacets.entrySet()) {
Collection<String> childSubs = getSubsWithPartial(entry.getValue());
if (childSubs.size() > 0 || entry.getValue().returnsPartial()) {
if (subs == null) {
subs = new ArrayList<>(freq.getSubFacets().size());
}
subs.add(entry.getKey());
}
}
if (subs == null) {
subs = Collections.emptyList();
}
partialSubsMap.put(freq, subs);
return subs;
}
}
}

View File

@ -18,6 +18,7 @@ package org.apache.solr.search.facet;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Collections;
import java.util.Comparator;
import java.util.HashMap;
@ -29,7 +30,9 @@ import java.util.Set;
import org.apache.solr.client.solrj.SolrResponse;
import org.apache.solr.common.SolrException;
import org.apache.solr.common.params.CommonParams;
import org.apache.solr.common.params.FacetParams;
import org.apache.solr.common.params.ModifiableSolrParams;
import org.apache.solr.common.params.ShardParams;
import org.apache.solr.common.params.SolrParams;
import org.apache.solr.common.util.NamedList;
@ -41,6 +44,7 @@ import org.apache.solr.handler.component.ShardResponse;
import org.apache.solr.search.QueryContext;
import org.apache.solr.search.SyntaxError;
import org.apache.solr.util.RTimer;
import org.noggit.JSONUtil;
public class FacetModule extends SearchComponent {
@ -55,6 +59,7 @@ public class FacetModule extends SearchComponent {
// Internal information passed down from the top level to shards for distributed faceting.
private final static String FACET_STATE = "_facet_";
private final static String FACET_REFINE = "refine";
public FacetComponentState getFacetComponentState(ResponseBuilder rb) {
@ -149,12 +154,90 @@ public class FacetModule extends SearchComponent {
}
private void clearFaceting(List<ShardRequest> outgoing) {
// turn off faceting for requests not marked as being for faceting refinements
for (ShardRequest sreq : outgoing) {
if ((sreq.purpose & PURPOSE_REFINE_JSON_FACETS) != 0) continue;
sreq.params.remove("json.facet"); // this just saves space... the presence of FACET_STATE really control the faceting
sreq.params.remove(FACET_STATE);
}
}
@Override
public int distributedProcess(ResponseBuilder rb) throws IOException {
FacetComponentState facetState = getFacetComponentState(rb);
if (facetState == null) return ResponseBuilder.STAGE_DONE;
if (rb.stage != ResponseBuilder.STAGE_GET_FIELDS) {
return ResponseBuilder.STAGE_DONE;
}
// Check if there are any refinements possible
if (facetState.mcontext.getSubsWithRefinement(facetState.facetRequest).isEmpty()) {
clearFaceting(rb.outgoing);
return ResponseBuilder.STAGE_DONE;
}
// Overlap facet refinement requests (those shards that we need a count
// for particular facet values from), where possible, with
// the requests to get fields (because we know that is the
// only other required phase).
// We do this in distributedProcess so we can look at all of the
// requests in the outgoing queue at once.
assert rb.shards.length == facetState.mcontext.numShards;
for (String shard : rb.shards) {
facetState.mcontext.setShard(shard);
// shard-specific refinement
Map<String,Object> refinement = facetState.merger.getRefinement(facetState.mcontext);
if (refinement == null) continue;
boolean newRequest = false;
ShardRequest shardsRefineRequest = null;
// try to find a request that is already going out to that shard.
// If nshards becomes too great, we may want to move to hashing for
// better scalability.
for (ShardRequest sreq : rb.outgoing) {
if ( (sreq.purpose & (ShardRequest.PURPOSE_GET_FIELDS|ShardRequest.PURPOSE_REFINE_FACETS|ShardRequest.PURPOSE_REFINE_PIVOT_FACETS)) != 0
&& sreq.shards != null
&& sreq.shards.length == 1
&& sreq.shards[0].equals(shard))
{
shardsRefineRequest = sreq;
break;
}
}
if (shardsRefineRequest == null) {
// we didn't find any other suitable requests going out to that shard,
// so create one ourselves.
newRequest = true;
shardsRefineRequest = new ShardRequest();
shardsRefineRequest.shards = new String[] { shard };
shardsRefineRequest.params = new ModifiableSolrParams(rb.req.getParams());
// don't request any documents
shardsRefineRequest.params.remove(CommonParams.START);
shardsRefineRequest.params.set(CommonParams.ROWS, "0");
shardsRefineRequest.params.set(CommonParams.ROWS, "0");
shardsRefineRequest.params.set(FacetParams.FACET, false);
}
shardsRefineRequest.purpose |= PURPOSE_REFINE_JSON_FACETS;
Map<String,Object> fstate = new HashMap<>(1);
fstate.put(FACET_REFINE, refinement);
String fstateString = JSONUtil.toJSON(fstate);
shardsRefineRequest.params.add(FACET_STATE, fstateString);
if (newRequest) {
rb.addRequest(this, shardsRefineRequest);
}
}
// clearFaceting(rb.outgoing);
return ResponseBuilder.STAGE_DONE;
}
@ -165,11 +248,13 @@ public class FacetModule extends SearchComponent {
if ((sreq.purpose & ShardRequest.PURPOSE_GET_TOP_IDS) != 0) {
sreq.purpose |= FacetModule.PURPOSE_GET_JSON_FACETS;
sreq.params.set(FACET_STATE, "{}");
sreq.params.set(FACET_STATE, "{}"); // The presence of FACET_STATE (_facet_) turns on json faceting
} else {
// turn off faceting on other requests
sreq.params.remove("json.facet");
/*** distributedProcess will need to use other requests for refinement
sreq.params.remove("json.facet"); // this just saves space... the presence of FACET_STATE really control the faceting
sreq.params.remove(FACET_STATE);
**/
}
}
@ -186,8 +271,11 @@ public class FacetModule extends SearchComponent {
if (facet == null) continue;
if (facetState.merger == null) {
facetState.merger = facetState.facetRequest.createFacetMerger(facet);
facetState.mcontext = new FacetMerger.Context( sreq.responses.size() );
}
facetState.merger.merge(facet , new FacetMerger.Context());
facetState.mcontext.root = facet;
facetState.mcontext.newShard(shardRsp.getShard());
facetState.merger.merge(facet , facetState.mcontext);
}
}
@ -199,6 +287,7 @@ public class FacetModule extends SearchComponent {
if (facetState == null) return;
if (facetState.merger != null) {
// TODO: merge any refinements
rb.rsp.add("facets", facetState.merger.getMergedResult());
}
}
@ -226,17 +315,23 @@ class FacetComponentState {
// Only used for distributed search
//
FacetMerger merger;
FacetMerger.Context mcontext;
}
// base class for facet functions that can be used in a sort
abstract class FacetSortableMerger extends FacetMerger {
public void prepareSort() {
}
@Override
public void finish(Context mcontext) {
// nothing to do for simple stats...
}
/** Return the normal comparison sort order. The sort direction is only to be used in special circumstances (such as making NaN sort
* last regardless of sort order.) Normal sorters do not need to pay attention to direction.
*/
public abstract int compareTo(FacetSortableMerger other, FacetField.SortDirection direction);
public abstract int compareTo(FacetSortableMerger other, FacetRequest.SortDirection direction);
}
abstract class FacetDoubleMerger extends FacetSortableMerger {
@ -252,12 +347,12 @@ abstract class FacetDoubleMerger extends FacetSortableMerger {
@Override
public int compareTo(FacetSortableMerger other, FacetField.SortDirection direction) {
public int compareTo(FacetSortableMerger other, FacetRequest.SortDirection direction) {
return compare(getDouble(), ((FacetDoubleMerger)other).getDouble(), direction);
}
public static int compare(double a, double b, FacetField.SortDirection direction) {
public static int compare(double a, double b, FacetRequest.SortDirection direction) {
if (a < b) return -1;
if (a > b) return 1;
@ -295,7 +390,7 @@ class FacetLongMerger extends FacetSortableMerger {
}
@Override
public int compareTo(FacetSortableMerger other, FacetField.SortDirection direction) {
public int compareTo(FacetSortableMerger other, FacetRequest.SortDirection direction) {
return Long.compare(val, ((FacetLongMerger)other).val);
}
}
@ -304,15 +399,20 @@ class FacetLongMerger extends FacetSortableMerger {
// base class for facets that create buckets (and can hence have sub-facets)
abstract class FacetBucketMerger<FacetRequestT extends FacetRequest> extends FacetMerger {
FacetRequestT freq;
int bucketNumber;
public FacetBucketMerger(FacetRequestT freq) {
this.freq = freq;
}
/** Bucketval is the representative value for the bucket. Only applicable to terms and range queries to distinguish buckets. */
FacetBucket newBucket(Comparable bucketVal) {
return new FacetBucket(this, bucketVal, bucketNumber++);
FacetBucket newBucket(Comparable bucketVal, Context mcontext) {
return new FacetBucket(this, bucketVal, mcontext);
}
@Override
public Map<String, Object> getRefinement(Context mcontext) {
Collection<String> refineTags = mcontext.getSubsWithRefinement(freq);
return null; // FIXME
}
// do subs...
@ -334,6 +434,7 @@ abstract class FacetBucketMerger<FacetRequestT extends FacetRequest> extends Fac
}
}
class FacetQueryMerger extends FacetBucketMerger<FacetQuery> {
FacetBucket bucket;
@ -344,11 +445,32 @@ class FacetQueryMerger extends FacetBucketMerger<FacetQuery> {
@Override
public void merge(Object facet, Context mcontext) {
if (bucket == null) {
bucket = newBucket(null);
bucket = newBucket(null, mcontext);
}
bucket.mergeBucket((SimpleOrderedMap) facet, mcontext);
}
@Override
public Map<String, Object> getRefinement(Context mcontext) {
Collection<String> tags;
if (mcontext.bucketWasMissing()) {
// if this bucket was missing, we need to get all subfacets that have partials (that need to list values for refinement)
tags = mcontext.getSubsWithPartial(freq);
} else {
tags = mcontext.getSubsWithRefinement(freq);
}
Map<String,Object> refinement = bucket.getRefinement(mcontext, tags);
return refinement;
}
@Override
public void finish(Context mcontext) {
// FIXME we need to propagate!!!
}
@Override
public Object getMergedResult() {
return bucket.getMergedBucket();
@ -360,15 +482,15 @@ class FacetQueryMerger extends FacetBucketMerger<FacetQuery> {
class FacetBucket {
final FacetBucketMerger parent;
final Comparable bucketValue;
final int bucketNumber; // this is just for internal correlation (the first bucket created is bucket 0, the next bucket 1, etc)
final int bucketNumber; // this is just for internal correlation (the first bucket created is bucket 0, the next bucket 1, across all field buckets)
long count;
Map<String, FacetMerger> subs;
public FacetBucket(FacetBucketMerger parent, Comparable bucketValue, int bucketNumber) {
public FacetBucket(FacetBucketMerger parent, Comparable bucketValue, FacetMerger.Context mcontext) {
this.parent = parent;
this.bucketValue = bucketValue;
this.bucketNumber = bucketNumber;
this.bucketNumber = mcontext.getNewBucketNumber(); // TODO: we don't need bucket numbers for all buckets...
}
public long getCount() {
@ -403,6 +525,8 @@ class FacetBucket {
public void mergeBucket(SimpleOrderedMap bucket, FacetMerger.Context mcontext) {
// todo: for refinements, we want to recurse, but not re-do stats for intermediate buckets
mcontext.setShardFlag(bucketNumber);
// drive merging off the received bucket?
for (int i=0; i<bucket.size(); i++) {
String key = bucket.getName(i);
@ -440,368 +564,84 @@ class FacetBucket {
return out;
}
}
class FacetFieldMerger extends FacetBucketMerger<FacetField> {
FacetBucket missingBucket;
FacetBucket allBuckets;
FacetMerger numBuckets;
LinkedHashMap<Object,FacetBucket> buckets = new LinkedHashMap<>();
List<FacetBucket> sortedBuckets;
int numReturnedBuckets; // the number of buckets in the bucket lists returned from all of the shards
private static class SortVal implements Comparable<SortVal> {
FacetBucket bucket;
FacetSortableMerger merger; // make this class inner and access merger , direction in parent?
FacetField.SortDirection direction;
@Override
public int compareTo(SortVal o) {
int c = -merger.compareTo(o.merger, direction) * direction.getMultiplier();
return c == 0 ? bucket.bucketValue.compareTo(o.bucket.bucketValue) : c;
public Map<String, Object> getRefinement(FacetMerger.Context mcontext, Collection<String> refineTags) {
if (subs == null) {
return null;
}
}
public FacetFieldMerger(FacetField freq) {
super(freq);
}
@Override
public void merge(Object facetResult, Context mcontext) {
merge((SimpleOrderedMap)facetResult, mcontext);
}
protected void merge(SimpleOrderedMap facetResult, Context mcontext) {
if (freq.missing) {
Object o = facetResult.get("missing");
if (o != null) {
if (missingBucket == null) {
missingBucket = newBucket(null);
Map<String,Object> refinement = null;
for (String tag : refineTags) {
FacetMerger subMerger = subs.get(tag);
if (subMerger != null) {
Map<String,Object> subRef = subMerger.getRefinement(mcontext);
if (subRef != null) {
if (refinement == null) {
refinement = new HashMap<>(refineTags.size());
}
refinement.put(tag, subRef);
}
missingBucket.mergeBucket((SimpleOrderedMap)o , mcontext);
}
}
if (freq.allBuckets) {
Object o = facetResult.get("allBuckets");
if (o != null) {
if (allBuckets == null) {
allBuckets = newBucket(null);
}
allBuckets.mergeBucket((SimpleOrderedMap)o , mcontext);
}
}
List<SimpleOrderedMap> bucketList = (List<SimpleOrderedMap>) facetResult.get("buckets");
numReturnedBuckets += bucketList.size();
mergeBucketList(bucketList , mcontext);
if (freq.numBuckets) {
Object nb = facetResult.get("numBuckets");
if (nb != null) {
if (numBuckets == null) {
numBuckets = new FacetNumBucketsMerger();
}
numBuckets.merge(nb , mcontext);
}
}
return refinement;
}
public void mergeBucketList(List<SimpleOrderedMap> bucketList, Context mcontext) {
for (SimpleOrderedMap bucketRes : bucketList) {
Comparable bucketVal = (Comparable)bucketRes.get("val");
FacetBucket bucket = buckets.get(bucketVal);
if (bucket == null) {
bucket = newBucket(bucketVal);
buckets.put(bucketVal, bucket);
public Map<String, Object> getRefinement2(FacetMerger.Context mcontext, Collection<String> refineTags) {
// TODO nocommit - partial results should turn off refining!!!
boolean parentMissing = mcontext.bucketWasMissing();
// TODO: this is a redundant check for many types of facets... only do on field faceting
if (!parentMissing) {
// if parent bucket wasn't missing, check if this bucket was.
// this really only needs checking on certain buckets... (like terms facet)
boolean sawThisBucket = mcontext.getShardFlag(bucketNumber);
if (!sawThisBucket) {
mcontext.setBucketWasMissing(true);
}
bucket.mergeBucket( bucketRes , mcontext );
}
}
public void sortBuckets() {
sortedBuckets = new ArrayList<>( buckets.values() );
Comparator<FacetBucket> comparator = null;
final FacetField.SortDirection direction = freq.sortDirection;
final int sortMul = direction.getMultiplier();
if ("count".equals(freq.sortVariable)) {
comparator = (o1, o2) -> {
int v = -Long.compare(o1.count, o2.count) * sortMul;
return v == 0 ? o1.bucketValue.compareTo(o2.bucketValue) : v;
};
Collections.sort(sortedBuckets, comparator);
} else if ("index".equals(freq.sortVariable)) {
comparator = (o1, o2) -> -o1.bucketValue.compareTo(o2.bucketValue) * sortMul;
Collections.sort(sortedBuckets, comparator);
} else {
final String key = freq.sortVariable;
/**
final FacetSortableMerger[] arr = new FacetSortableMerger[buckets.size()];
final int[] index = new int[arr.length];
int start = 0;
int nullStart = index.length;
int i=0;
for (FacetBucket bucket : buckets.values()) {
FacetMerger merger = bucket.getExistingMerger(key);
if (merger == null) {
index[--nullStart] = i;
}
if (merger != null) {
arr[start] = (FacetSortableMerger)merger;
index[start] = i;
start++;
}
i++;
}
PrimUtils.sort(0, nullStart, index, new PrimUtils.IntComparator() {
@Override
public int compare(int a, int b) {
return arr[index[a]].compareTo(arr[index[b]], direction);
}
});
**/
// timsort may do better here given that the lists may be partially sorted.
List<SortVal> lst = new ArrayList<SortVal>(buckets.size());
List<FacetBucket> nulls = new ArrayList<FacetBucket>(buckets.size()>>1);
for (int i=0; i<sortedBuckets.size(); i++) {
FacetBucket bucket = sortedBuckets.get(i);
FacetMerger merger = bucket.getExistingMerger(key);
if (merger == null) {
nulls.add(bucket);
}
if (merger != null) {
SortVal sv = new SortVal();
sv.bucket = bucket;
sv.merger = (FacetSortableMerger)merger;
sv.direction = direction;
// sv.pos = i; // if we need position in the future...
lst.add(sv);
}
}
Collections.sort(lst);
Collections.sort(nulls, (o1, o2) -> o1.bucketValue.compareTo(o2.bucketValue));
ArrayList<FacetBucket> out = new ArrayList<>(buckets.size());
for (SortVal sv : lst) {
out.add( sv.bucket );
}
out.addAll(nulls);
sortedBuckets = out;
}
}
@Override
public Object getMergedResult() {
SimpleOrderedMap result = new SimpleOrderedMap();
if (numBuckets != null) {
int removed = 0;
if (freq.mincount > 1) {
for (FacetBucket bucket : buckets.values()) {
if (bucket.count < freq.mincount) removed++;
}
}
result.add("numBuckets", ((Number)numBuckets.getMergedResult()).longValue() - removed);
// TODO: we can further increase this estimate.
// If not sorting by count, use a simple ratio to scale
// If sorting by count desc, then add up the highest_possible_missing_count from each shard
// if parent bucket was missing, then we should be too
assert !mcontext.getShardFlag(bucketNumber);
}
sortBuckets();
Map<String,Object> refinement = null;
int first = (int)freq.offset;
int end = freq.limit >=0 ? first + (int) freq.limit : Integer.MAX_VALUE;
int last = Math.min(sortedBuckets.size(), end);
List<SimpleOrderedMap> resultBuckets = new ArrayList<>(Math.max(0, (last - first)));
/** this only works if there are no filters (like mincount)
for (int i=first; i<last; i++) {
FacetBucket bucket = sortedBuckets.get(i);
resultBuckets.add( bucket.getMergedBucket() );
if (!mcontext.bucketWasMissing()) {
// this is just a pass-through bucket... see if there is anything to do at all
if (subs == null || refineTags.isEmpty()) {
return null;
}
} else {
// for missing bucket, go over all sub-facts
refineTags = null;
refinement = new HashMap<>(4);
if (bucketValue != null) {
refinement.put("_v", bucketValue);
}
refinement.put("_m",1);
}
***/
// TODO: change effective offsets + limits at shards...
// TODO: listing things like sub-facets that have no field facets are redundant
// (we only need facet that have variable values)
int off = (int)freq.offset;
int lim = freq.limit >= 0 ? (int)freq.limit : Integer.MAX_VALUE;
for (FacetBucket bucket : sortedBuckets) {
if (bucket.getCount() < freq.mincount) {
for (Map.Entry<String,FacetMerger> sub : subs.entrySet()) {
if (refineTags != null && !refineTags.contains(sub.getKey())) {
continue;
}
if (off > 0) {
--off;
continue;
Map<String,Object> subRef = sub.getValue().getRefinement(mcontext);
if (subRef != null) {
if (refinement == null) {
refinement = new HashMap<>(4);
}
refinement.put(sub.getKey(), subRef);
}
if (resultBuckets.size() >= lim) {
break;
}
resultBuckets.add( bucket.getMergedBucket() );
}
result.add("buckets", resultBuckets);
if (missingBucket != null) {
result.add("missing", missingBucket.getMergedBucket());
}
if (allBuckets != null) {
result.add("allBuckets", allBuckets.getMergedBucket());
}
return result;
// reset the "bucketMissing" flag on the way back out.
mcontext.setBucketWasMissing(parentMissing);
return refinement;
}
private class FacetNumBucketsMerger extends FacetMerger {
long sumBuckets;
long shardsMissingSum;
long shardsTruncatedSum;
Set<Object> values;
@Override
public void merge(Object facetResult, Context mcontext) {
SimpleOrderedMap map = (SimpleOrderedMap)facetResult;
long numBuckets = ((Number)map.get("numBuckets")).longValue();
sumBuckets += numBuckets;
List vals = (List)map.get("vals");
if (vals != null) {
if (values == null) {
values = new HashSet<>(vals.size()*4);
}
values.addAll(vals);
if (numBuckets > values.size()) {
shardsTruncatedSum += numBuckets - values.size();
}
} else {
shardsMissingSum += numBuckets;
}
}
@Override
public Object getMergedResult() {
long exactCount = values == null ? 0 : values.size();
return exactCount + shardsMissingSum + shardsTruncatedSum;
// TODO: reduce count by (at least) number of buckets that fail to hit mincount (after merging)
// that should make things match for most of the small tests at least
}
}
}
class FacetRangeMerger extends FacetBucketMerger<FacetRange> {
FacetBucket beforeBucket;
FacetBucket afterBucket;
FacetBucket betweenBucket;
LinkedHashMap<Object, FacetBucket> buckets = new LinkedHashMap<Object, FacetBucket>();
public FacetRangeMerger(FacetRange freq) {
super(freq);
}
@Override
FacetMerger createFacetMerger(String key, Object val) {
return super.createFacetMerger(key, val);
}
@Override
public void merge(Object facetResult, Context mcontext) {
merge((SimpleOrderedMap) facetResult , mcontext);
}
public void merge(SimpleOrderedMap facetResult, Context mcontext) {
boolean all = freq.others.contains(FacetParams.FacetRangeOther.ALL);
if (all || freq.others.contains(FacetParams.FacetRangeOther.BEFORE)) {
Object o = facetResult.get("before");
if (o != null) {
if (beforeBucket == null) {
beforeBucket = newBucket(null);
}
beforeBucket.mergeBucket((SimpleOrderedMap)o, mcontext);
}
}
if (all || freq.others.contains(FacetParams.FacetRangeOther.AFTER)) {
Object o = facetResult.get("after");
if (o != null) {
if (afterBucket == null) {
afterBucket = newBucket(null);
}
afterBucket.mergeBucket((SimpleOrderedMap)o , mcontext);
}
}
if (all || freq.others.contains(FacetParams.FacetRangeOther.BETWEEN)) {
Object o = facetResult.get("between");
if (o != null) {
if (betweenBucket == null) {
betweenBucket = newBucket(null);
}
betweenBucket.mergeBucket((SimpleOrderedMap)o , mcontext);
}
}
List<SimpleOrderedMap> bucketList = (List<SimpleOrderedMap>) facetResult.get("buckets");
mergeBucketList(bucketList , mcontext);
}
// TODO: share more merging with field faceting
public void mergeBucketList(List<SimpleOrderedMap> bucketList, Context mcontext) {
for (SimpleOrderedMap bucketRes : bucketList) {
Comparable bucketVal = (Comparable)bucketRes.get("val");
FacetBucket bucket = buckets.get(bucketVal);
if (bucket == null) {
bucket = newBucket(bucketVal);
buckets.put(bucketVal, bucket);
}
bucket.mergeBucket( bucketRes , mcontext );
}
}
@Override
public Object getMergedResult() {
SimpleOrderedMap result = new SimpleOrderedMap(4);
List<SimpleOrderedMap> resultBuckets = new ArrayList<>(buckets.size());
for (FacetBucket bucket : buckets.values()) {
if (bucket.getCount() < freq.mincount) {
continue;
}
resultBuckets.add( bucket.getMergedBucket() );
}
result.add("buckets", resultBuckets);
if (beforeBucket != null) {
result.add("before", beforeBucket.getMergedBucket());
}
if (afterBucket != null) {
result.add("after", afterBucket.getMergedBucket());
}
if (betweenBucket != null) {
result.add("between", betweenBucket.getMergedBucket());
}
return result;
}
}

View File

@ -36,7 +36,7 @@ import org.apache.solr.schema.TrieField;
import org.apache.solr.search.DocSet;
import org.apache.solr.util.DateMathParser;
public class FacetRange extends FacetRequest {
public class FacetRange extends FacetRequestSorted {
String field;
Object start;
Object end;
@ -44,8 +44,12 @@ public class FacetRange extends FacetRequest {
boolean hardend = false;
EnumSet<FacetParams.FacetRangeInclude> include;
EnumSet<FacetParams.FacetRangeOther> others;
long mincount = 0;
{
// defaults
mincount = 0;
limit = -1;
}
@Override
public FacetProcessor createFacetProcessor(FacetContext fcontext) {

View File

@ -0,0 +1,123 @@
package org.apache.solr.search.facet;
import java.util.ArrayList;
import java.util.LinkedHashMap;
import java.util.List;
import org.apache.solr.common.params.FacetParams;
import org.apache.solr.common.util.SimpleOrderedMap;
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
public class FacetRangeMerger extends FacetRequestSortedMerger<FacetRange> {
FacetBucket beforeBucket;
FacetBucket afterBucket;
FacetBucket betweenBucket;
public FacetRangeMerger(FacetRange freq) {
super(freq);
}
@Override
FacetMerger createFacetMerger(String key, Object val) {
return super.createFacetMerger(key, val);
}
@Override
public void merge(Object facetResult, Context mcontext) {
merge((SimpleOrderedMap) facetResult , mcontext);
}
@Override
public void sortBuckets() {
// TODO: mincount>0 will mess up order?
sortedBuckets = new ArrayList<>( buckets.values() );
}
@Override
public void finish(Context mcontext) {
// nothing to do
}
public void merge(SimpleOrderedMap facetResult, Context mcontext) {
boolean all = freq.others.contains(FacetParams.FacetRangeOther.ALL);
if (all || freq.others.contains(FacetParams.FacetRangeOther.BEFORE)) {
Object o = facetResult.get("before");
if (o != null) {
if (beforeBucket == null) {
beforeBucket = newBucket(null, mcontext);
}
beforeBucket.mergeBucket((SimpleOrderedMap)o, mcontext);
}
}
if (all || freq.others.contains(FacetParams.FacetRangeOther.AFTER)) {
Object o = facetResult.get("after");
if (o != null) {
if (afterBucket == null) {
afterBucket = newBucket(null, mcontext);
}
afterBucket.mergeBucket((SimpleOrderedMap)o , mcontext);
}
}
if (all || freq.others.contains(FacetParams.FacetRangeOther.BETWEEN)) {
Object o = facetResult.get("between");
if (o != null) {
if (betweenBucket == null) {
betweenBucket = newBucket(null, mcontext);
}
betweenBucket.mergeBucket((SimpleOrderedMap)o , mcontext);
}
}
List<SimpleOrderedMap> bucketList = (List<SimpleOrderedMap>) facetResult.get("buckets");
mergeBucketList(bucketList , mcontext);
}
@Override
public Object getMergedResult() {
// TODO: use sortedBuckets
SimpleOrderedMap result = new SimpleOrderedMap(4);
List<SimpleOrderedMap> resultBuckets = new ArrayList<>(buckets.size());
for (FacetBucket bucket : buckets.values()) {
if (bucket.getCount() < freq.mincount) {
continue;
}
resultBuckets.add( bucket.getMergedBucket() );
}
result.add("buckets", resultBuckets);
if (beforeBucket != null) {
result.add("before", beforeBucket.getMergedBucket());
}
if (afterBucket != null) {
result.add("after", afterBucket.getMergedBucket());
}
if (betweenBucket != null) {
result.add("between", betweenBucket.getMergedBucket());
}
return result;
}
}

View File

@ -35,10 +35,48 @@ import org.apache.solr.search.QueryContext;
import org.apache.solr.search.SolrIndexSearcher;
import org.apache.solr.search.SyntaxError;
import static org.apache.solr.search.facet.FacetRequest.RefineMethod.NONE;
public abstract class FacetRequest {
public static enum SortDirection {
asc(-1) ,
desc(1);
private final int multiplier;
private SortDirection(int multiplier) {
this.multiplier = multiplier;
}
// asc==-1, desc==1
public int getMultiplier() {
return multiplier;
}
}
public static enum RefineMethod {
NONE,
SIMPLE;
// NONE is distinct from null since we may want to know if refinement was explicitly turned off.
public static FacetRequest.RefineMethod fromObj(Object method) {
if (method == null) return null;
if (method instanceof Boolean) {
return ((Boolean)method) ? SIMPLE : NONE;
}
if ("simple".equals(method)) {
return SIMPLE;
} else if ("none".equals(method)) {
return NONE;
} else {
throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "Unknown RefineMethod method " + method);
}
}
}
protected Map<String,AggValueSource> facetStats; // per-bucket statistics
protected Map<String,FacetRequest> subFacets; // list of facets
protected Map<String,FacetRequest> subFacets; // per-bucket sub-facets
protected List<String> filters;
protected boolean processEmpty;
protected Domain domain;
@ -64,6 +102,22 @@ public abstract class FacetRequest {
return subFacets;
}
/** Returns null if unset */
public RefineMethod getRefineMethod() {
return null;
}
public boolean doRefine() {
return !(getRefineMethod()==null || getRefineMethod()==NONE);
}
/** Returns true if this facet can return just some of the facet buckets that match all the criteria.
* This is normally true only for facets with a limit.
*/
public boolean returnsPartial() {
return false;
}
public void addStat(String key, AggValueSource stat) {
facetStats.put(key, stat);
}
@ -541,6 +595,9 @@ class FacetFieldParser extends FacetParser<FacetField> {
facet.method = FacetField.FacetMethod.fromString(getString(m, "method", null));
facet.cacheDf = (int)getLong(m, "cacheDf", facet.cacheDf);
// TODO: pull up to higher level?
facet.refine = FacetField.RefineMethod.fromObj(m.get("refine"));
facet.perSeg = (Boolean)m.get("perSeg");
// facet.sort may depend on a facet stat...
@ -562,18 +619,18 @@ class FacetFieldParser extends FacetParser<FacetField> {
private void parseSort(Object sort) {
if (sort == null) {
facet.sortVariable = "count";
facet.sortDirection = FacetField.SortDirection.desc;
facet.sortDirection = FacetRequest.SortDirection.desc;
} else if (sort instanceof String) {
String sortStr = (String)sort;
if (sortStr.endsWith(" asc")) {
facet.sortVariable = sortStr.substring(0, sortStr.length()-" asc".length());
facet.sortDirection = FacetField.SortDirection.asc;
facet.sortDirection = FacetRequest.SortDirection.asc;
} else if (sortStr.endsWith(" desc")) {
facet.sortVariable = sortStr.substring(0, sortStr.length()-" desc".length());
facet.sortDirection = FacetField.SortDirection.desc;
facet.sortDirection = FacetRequest.SortDirection.desc;
} else {
facet.sortVariable = sortStr;
facet.sortDirection = "index".equals(facet.sortVariable) ? FacetField.SortDirection.asc : FacetField.SortDirection.desc; // default direction for "index" is ascending
facet.sortDirection = "index".equals(facet.sortVariable) ? FacetRequest.SortDirection.asc : FacetRequest.SortDirection.desc; // default direction for "index" is ascending
}
} else {
// sort : { myvar : 'desc' }
@ -583,7 +640,7 @@ class FacetFieldParser extends FacetParser<FacetField> {
String k = entry.getKey();
Object v = entry.getValue();
facet.sortVariable = k;
facet.sortDirection = FacetField.SortDirection.valueOf(v.toString());
facet.sortDirection = FacetRequest.SortDirection.valueOf(v.toString());
}
}

View File

@ -0,0 +1,234 @@
package org.apache.solr.search.facet;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Collections;
import java.util.Comparator;
import java.util.HashMap;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;
import org.apache.solr.common.util.SimpleOrderedMap;
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
// base class for facets that create a list of buckets that can be sorted
abstract class FacetRequestSortedMerger<FacetRequestT extends FacetRequestSorted> extends FacetBucketMerger<FacetRequestT> {
LinkedHashMap<Object,FacetBucket> buckets = new LinkedHashMap<>();
List<FacetBucket> sortedBuckets;
public FacetRequestSortedMerger(FacetRequestT freq) {
super(freq);
}
private static class SortVal implements Comparable<SortVal> {
FacetBucket bucket;
FacetSortableMerger merger; // make this class inner and access merger , direction in parent?
FacetRequest.SortDirection direction;
@Override
public int compareTo(SortVal o) {
int c = -merger.compareTo(o.merger, direction) * direction.getMultiplier();
return c == 0 ? bucket.bucketValue.compareTo(o.bucket.bucketValue) : c;
}
}
public void mergeBucketList(List<SimpleOrderedMap> bucketList, Context mcontext) {
for (SimpleOrderedMap bucketRes : bucketList) {
Comparable bucketVal = (Comparable)bucketRes.get("val");
FacetBucket bucket = buckets.get(bucketVal);
if (bucket == null) {
bucket = newBucket(bucketVal, mcontext);
buckets.put(bucketVal, bucket);
}
bucket.mergeBucket( bucketRes , mcontext );
}
}
public void sortBuckets() {
sortedBuckets = new ArrayList<>( buckets.values() );
Comparator<FacetBucket> comparator = null;
final FacetRequest.SortDirection direction = freq.sortDirection;
final int sortMul = direction.getMultiplier();
if ("count".equals(freq.sortVariable)) {
comparator = (o1, o2) -> {
int v = -Long.compare(o1.count, o2.count) * sortMul;
return v == 0 ? o1.bucketValue.compareTo(o2.bucketValue) : v;
};
Collections.sort(sortedBuckets, comparator);
} else if ("index".equals(freq.sortVariable)) {
comparator = (o1, o2) -> -o1.bucketValue.compareTo(o2.bucketValue) * sortMul;
Collections.sort(sortedBuckets, comparator);
} else {
final String key = freq.sortVariable;
/**
final FacetSortableMerger[] arr = new FacetSortableMerger[buckets.size()];
final int[] index = new int[arr.length];
int start = 0;
int nullStart = index.length;
int i=0;
for (FacetBucket bucket : buckets.values()) {
FacetMerger merger = bucket.getExistingMerger(key);
if (merger == null) {
index[--nullStart] = i;
}
if (merger != null) {
arr[start] = (FacetSortableMerger)merger;
index[start] = i;
start++;
}
i++;
}
PrimUtils.sort(0, nullStart, index, new PrimUtils.IntComparator() {
@Override
public int compare(int a, int b) {
return arr[index[a]].compareTo(arr[index[b]], direction);
}
});
**/
List<SortVal> lst = new ArrayList<>(buckets.size());
List<FacetBucket> nulls = new ArrayList<>(buckets.size()>>1);
for (int i=0; i<sortedBuckets.size(); i++) {
FacetBucket bucket = sortedBuckets.get(i);
FacetMerger merger = bucket.getExistingMerger(key);
if (merger == null) {
nulls.add(bucket);
}
if (merger != null) {
SortVal sv = new SortVal();
sv.bucket = bucket;
sv.merger = (FacetSortableMerger)merger;
sv.direction = direction;
// sv.pos = i; // if we need position in the future...
lst.add(sv);
}
}
Collections.sort(lst);
Collections.sort(nulls, (o1, o2) -> o1.bucketValue.compareTo(o2.bucketValue));
ArrayList<FacetBucket> out = new ArrayList<>(buckets.size());
for (SortVal sv : lst) {
out.add( sv.bucket );
}
out.addAll(nulls);
sortedBuckets = out;
}
}
@Override
public Map<String, Object> getRefinement(Context mcontext) {
// step 1) If this facet request has refining, then we need to fully request top buckets that were not seen by this shard.
// step 2) If this facet does not have refining, but some sub-facets do, we need to check/recurse those sub-facets in *every* top bucket.
// A combination of the two is possible and makes step 2 redundant for any buckets we fully requested in step 1.
Map<String,Object> refinement = null;
Collection<String> tags = mcontext.getSubsWithRefinement(freq);
if (tags.isEmpty() && !freq.doRefine()) {
// we don't have refining, and neither do our subs
return null;
}
// Tags for sub facets that have partial facets somewhere in their children.
// If we are missing a bucket for this shard, we'll need to get the specific buckets that need refining.
Collection<String> tagsWithPartial = mcontext.getSubsWithPartial(freq);
boolean thisMissing = mcontext.bucketWasMissing();
int num = (int)(freq.offset + freq.limit);
int numBucketsToCheck = Math.min(buckets.size(), num);
Collection<FacetBucket> bucketList;
if (buckets.size() < num) {
// no need to sort
// todo: but we may need to filter.... simplify by always sorting?
bucketList = buckets.values();
} else {
// only sort once
if (sortedBuckets == null) {
sortBuckets(); // todo: make sure this filters buckets as well
}
bucketList = sortedBuckets;
}
ArrayList<Object> leafBuckets = null; // "_l" missing buckets specified by bucket value only (no need to specify anything further)
ArrayList<Object> missingBuckets = null; // "_m" missing buckets that need to specify values for partial facets
ArrayList<Object> skipBuckets = null; // "_s" present buckets that we need to recurse into because children facets have refinement requirements
for (FacetBucket bucket : bucketList) {
if (numBucketsToCheck-- <= 0) break;
// if this bucket is missing,
assert thisMissing == false || thisMissing == true && mcontext.getShardFlag(bucket.bucketNumber) == false;
boolean saw = !thisMissing && mcontext.getShardFlag(bucket.bucketNumber);
if (!saw) {
// we didn't see the bucket for this shard
Map<String,Object> bucketRefinement = null;
// find facets that we need to fill in buckets for
if (!tagsWithPartial.isEmpty()) {
boolean prev = mcontext.setBucketWasMissing(true);
bucketRefinement = bucket.getRefinement(mcontext, tagsWithPartial);
mcontext.setBucketWasMissing(prev);
if (bucketRefinement != null) {
if (missingBuckets==null) missingBuckets = new ArrayList<>();
missingBuckets.add(bucketRefinement);
}
}
// if we didn't add to "_m" (missing), then we should add to "_l" (leaf missing)
if (bucketRefinement == null) {
if (leafBuckets == null) leafBuckets = new ArrayList<>();
leafBuckets.add(bucket.bucketValue);
}
} else if (!tags.isEmpty()) {
// we had this bucket, but we need to recurse to certain children that have refinements
Map<String,Object> bucketRefinement = bucket.getRefinement(mcontext, tagsWithPartial);
if (bucketRefinement != null) {
if (skipBuckets == null) skipBuckets = new ArrayList<>();
skipBuckets.add(bucketRefinement);
}
}
}
// TODO: what if we don't need to refine any variable buckets, but we do need to contribute to numBuckets, missing, allBuckets, etc...
// because we were "missing". That will be handled at a higher level (i.e. we'll be in someone's missing bucket?)
// TODO: test with a sub-facet with a limit of 0 and something like a missing bucket
if (leafBuckets != null || missingBuckets != null || skipBuckets != null) {
refinement = new HashMap<>(3);
if (leafBuckets != null) refinement.put("_l",leafBuckets);
if (missingBuckets != null) refinement.put("_m", missingBuckets);
if (skipBuckets != null) refinement.put("_s", skipBuckets);
}
return refinement;
}
}

View File

@ -99,7 +99,7 @@ public class HLLAgg extends StrAggValueSource {
}
@Override
public int compareTo(FacetSortableMerger other, FacetField.SortDirection direction) {
public int compareTo(FacetSortableMerger other, FacetRequest.SortDirection direction) {
return Long.compare( getLong(), ((Merger)other).getLong() );
}
}

View File

@ -207,7 +207,7 @@ public class PercentileAgg extends SimpleAggValueSource {
}
@Override
public int compareTo(FacetSortableMerger other, FacetField.SortDirection direction) {
public int compareTo(FacetSortableMerger other, FacetRequest.SortDirection direction) {
return Double.compare(getSortVal(), ((Merger) other).getSortVal());
}

View File

@ -113,7 +113,7 @@ public class UniqueAgg extends StrAggValueSource {
}
@Override
public int compareTo(FacetSortableMerger other, FacetField.SortDirection direction) {
public int compareTo(FacetSortableMerger other, FacetRequest.SortDirection direction) {
return Long.compare( getLong(), ((Merger)other).getLong() );
}
}

View File

@ -73,6 +73,19 @@ public class JSONTestUtil {
return match(path, input, expected, delta);
}
/**
* @param input Object structure to parse and test against
* @param pathAndExpected JSON path expression + '==' + expected value
* @param delta tollerance allowed in comparing float/double values
*/
public static String matchObj(Object input, String pathAndExpected, double delta) throws Exception {
int pos = pathAndExpected.indexOf("==");
String path = pos>=0 ? pathAndExpected.substring(0,pos) : null;
String expected = pos>=0 ? pathAndExpected.substring(pos+2) : pathAndExpected;
Object expectObj = failRepeatedKeys ? new NoDupsObjectBuilder(new JSONParser(expected)).getVal() : ObjectBuilder.fromJSON(expected);
return matchObj(path, input, expectObj, delta);
}
/**
* @param path JSON path expression
* @param input JSON Structure to parse and test against