SOLR-2894: Distributed query support for facet.pivot

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1617789 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Chris M. Hostetter 2014-08-13 18:23:53 +00:00
parent 7e4603a988
commit 910d467a93
23 changed files with 4423 additions and 564 deletions

View File

@ -188,6 +188,8 @@ New Features
* SOLR-6304 : JsonLoader should be able to flatten an input JSON to multiple docs (Noble Paul)
* SOLR-2894: Distributed query support for facet.pivot (Dan Cooper, Erik Hatcher, Chris Russell,
Andrew Muldowney, Brett Lucey, Mark Miller, hossman)
Bug Fixes
----------------------

View File

@ -0,0 +1,164 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.solr.handler.component;
import java.util.ArrayList;
import java.util.BitSet;
import java.util.Collections;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import org.apache.solr.common.params.FacetParams;
import org.apache.solr.common.util.NamedList;
import org.apache.solr.handler.component.FacetComponent.FacetBase;
/**
* Models a single instance of a "pivot" specified by a {@link FacetParams#FACET_PIVOT}
* param, which may contain multiple nested fields.
*
* This class is also used to coordinate the refinement requests needed from various
* shards when doing processing a distributed request
*/
public class PivotFacet extends FacetBase {
/**
* Local param used to indicate that refinements are requried on a pivot. Should
* also be used as the prefix for contatenanting with the value to determine the
* name of the multi-valued param that will contain all of the values needed for
* refinement.
*/
public static final String REFINE_PARAM = "fpt";
// TODO: is this really needed? can't we just loop over 0<=i<rb.shards.length ?
public final BitSet knownShards = new BitSet();
private final Map<Integer, List<PivotFacetValue>> queuedRefinements = new HashMap<>();
// if null, then either we haven't collected any responses from shards
// or all the shards that have responded so far haven't had any values for the top
// field of this pivot. May be null forever if no doc in any shard has a value
// for the top field of the pivot
private PivotFacetField pivotFacetField;
public PivotFacet(ResponseBuilder rb, String facetStr) {
super(rb, FacetParams.FACET_PIVOT, facetStr);
}
/**
* Tracks that the specified shard needs to be asked to refine the specified
* {@link PivotFacetValue}
*
* @see #getQueuedRefinements
*/
public void addRefinement(int shardNumber, PivotFacetValue value) {
if (!queuedRefinements.containsKey(shardNumber)) {
queuedRefinements.put(shardNumber, new ArrayList<PivotFacetValue>());
}
queuedRefinements.get(shardNumber).add(value);
}
/**
* An immutable List of the {@link PivotFacetValue}s that need to be
* refined for this pivot. Once these refinements have been processed,
* the caller should clear them using {@link #removeAllRefinementsForShard}
*
* @see #addRefinement
* @see #removeAllRefinementsForShard
* @return a list of the values to refine, or an empty list.
*/
public List<PivotFacetValue> getQueuedRefinements(int shardNumber) {
List<PivotFacetValue> raw = queuedRefinements.get(shardNumber);
if (null == raw) {
raw = Collections.<PivotFacetValue>emptyList();
}
return Collections.<PivotFacetValue>unmodifiableList(raw);
}
/**
* Clears the list of queued refinements for the specified shard
*
* @see #addRefinement
* @see #getQueuedRefinements
*/
public void removeAllRefinementsForShard(int shardNumber) {
queuedRefinements.remove(shardNumber);
}
/**
* If true, then additional refinement requests are needed to flesh out the correct
* counts for this Pivot
*
* @see #getQueuedRefinements
*/
public boolean isRefinementsRequired() {
return ! queuedRefinements.isEmpty();
}
/**
* A recursive method for generating <code>NamedLists</code> for this pivot
* suitable for including in a pivot facet response to the original distributed request.
*
* @see PivotFacetField#trim
* @see PivotFacetField#convertToListOfNamedLists
*/
public List<NamedList<Object>> getTrimmedPivotsAsListOfNamedLists(ResponseBuilder rb) {
if (null == pivotFacetField) {
// no values in any shard for the top field of this pivot
return Collections.<NamedList<Object>>emptyList();
}
pivotFacetField.trim();
return pivotFacetField.convertToListOfNamedLists();
}
/**
* A recursive method for determining which {@link PivotFacetValue}s need to be
* refined for this pivot.
*
* @see PivotFacetField#queuePivotRefinementRequests
*/
public void queuePivotRefinementRequests() {
if (null == pivotFacetField) return; // NOOP
pivotFacetField.sort();
pivotFacetField.queuePivotRefinementRequests(this);
}
/**
* Recursively merges the response from the specified shard, tracking the known shards.
*
* @see PivotFacetField#contributeFromShard
* @see PivotFacetField#createFromListOfNamedLists
*/
public void mergeResponseFromShard(int shardNumber, ResponseBuilder rb, List<NamedList<Object>> response) {
knownShards.set(shardNumber);
if (pivotFacetField == null) {
pivotFacetField = PivotFacetField.createFromListOfNamedLists(shardNumber, rb, null, response);
} else {
pivotFacetField.contributeFromShard(shardNumber, rb, response);
}
}
public String toString() {
return "[" + facetStr + "] | " + this.getKey();
}
}

View File

@ -0,0 +1,386 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.solr.handler.component;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.LinkedList;
import java.util.List;
import java.util.Locale;
import java.util.Map;
import org.apache.solr.common.params.FacetParams;
import org.apache.solr.common.params.SolrParams;
import org.apache.solr.common.util.NamedList;
/**
* Models a single field somewhere in a hierarchy of fields as part of a pivot facet.
* This pivot field contains {@link PivotFacetValue}s which may each contain a nested
* {@link PivotFacetField} child. This <code>PivotFacetField</code> may itself
* be a child of a {@link PivotFacetValue} parent.
*
* @see PivotFacetValue
* @see PivotFacetFieldValueCollection
*/
@SuppressWarnings("rawtypes")
public class PivotFacetField {
public final String field;
// null if this is a top level pivot,
// otherwise the value of the parent pivot we are nested under
public final PivotFacetValue parentValue;
public final PivotFacetFieldValueCollection valueCollection;
// Facet parameters relating to this field
private final int facetFieldLimit;
private final int facetFieldMinimumCount;
private final int facetFieldOffset;
private final String facetFieldSort;
private final Map<Integer, Integer> numberOfValuesContributedByShard = new HashMap<>();
private final Map<Integer, Integer> shardLowestCount = new HashMap<>();
private boolean needRefinementAtThisLevel = true;
private PivotFacetField(ResponseBuilder rb, PivotFacetValue parent, String fieldName) {
field = fieldName;
parentValue = parent;
// facet params
SolrParams parameters = rb.req.getParams();
facetFieldMinimumCount = parameters.getFieldInt(field, FacetParams.FACET_PIVOT_MINCOUNT, 1);
facetFieldOffset = parameters.getFieldInt(field, FacetParams.FACET_OFFSET, 0);
facetFieldLimit = parameters.getFieldInt(field, FacetParams.FACET_LIMIT, 100);
String defaultSort = (facetFieldLimit > 0) ? FacetParams.FACET_SORT_COUNT : FacetParams.FACET_SORT_INDEX;
facetFieldSort = parameters.getFieldParam(field, FacetParams.FACET_SORT, defaultSort);
valueCollection = new PivotFacetFieldValueCollection(facetFieldMinimumCount, facetFieldOffset, facetFieldLimit, facetFieldSort);
if ( (facetFieldLimit < 0) ||
// TODO: possible refinement issue if limit=0 & mincount=0 & missing=true
// (ie: we only want the missing count for this field)
(facetFieldLimit <= 0 && facetFieldMinimumCount == 0) ||
(facetFieldSort.equals(FacetParams.FACET_SORT_INDEX) && facetFieldMinimumCount <= 0)
) {
// in any of these cases, there's no need to refine this level of the pivot
needRefinementAtThisLevel = false;
}
}
/**
* A recursive method that walks up the tree of pivot fields/values to build
* a list of String representations of the values that lead down to this
* PivotFacetField.
*
* @return A mutable List of the pivot values leading down to this pivot field,
* will never be null but may contain nulls and may be empty if this is a top
* level pivot field
* @see PivotFacetValue#getValuePath
*/
public List<String> getValuePath() {
if (null != parentValue) {
return parentValue.getValuePath();
}
return new ArrayList<String>(3);
}
/**
* A recursive method to construct a new <code>PivotFacetField</code> object from
* the contents of the {@link NamedList}s provided by the specified shard, relative
* to a parent value (if this is not the top field in the pivot hierarchy)
*
* The associated child {@link PivotFacetValue}s will be recursively built as well.
*
* @see PivotFacetValue#createFromNamedList
* @param shardNumber the id of the shard that provided this data
* @param rb The response builder of the current request
* @param owner the parent value in the current pivot (may be null)
* @param pivotValues the data from the specified shard for this pivot field, may be null or empty
* @return the new PivotFacetField, null if pivotValues is null or empty.
*/
public static PivotFacetField createFromListOfNamedLists(int shardNumber, ResponseBuilder rb, PivotFacetValue owner, List<NamedList<Object>> pivotValues) {
if (null == pivotValues || pivotValues.size() <= 0) return null;
NamedList<Object> firstValue = pivotValues.get(0);
PivotFacetField createdPivotFacetField
= new PivotFacetField(rb, owner, PivotFacetHelper.getField(firstValue));
int lowestCount = Integer.MAX_VALUE;
for (NamedList<Object> pivotValue : pivotValues) {
lowestCount = Math.min(lowestCount, PivotFacetHelper.getCount(pivotValue));
PivotFacetValue newValue = PivotFacetValue.createFromNamedList
(shardNumber, rb, createdPivotFacetField, pivotValue);
createdPivotFacetField.valueCollection.add(newValue);
}
createdPivotFacetField.shardLowestCount.put(shardNumber, lowestCount);
createdPivotFacetField.numberOfValuesContributedByShard.put(shardNumber, pivotValues.size());
return createdPivotFacetField;
}
/**
* Destructive method that recursively prunes values from the data structure
* based on the counts for those values and the effective sort, mincount, limit,
* and offset being used for each field.
* <p>
* This method should only be called after all refinement is completed just prior
* calling {@link #convertToListOfNamedLists}
* </p>
*
* @see PivotFacet#getTrimmedPivotsAsListOfNamedLists
* @see PivotFacetFieldValueCollection#trim
*/
public void trim() {
// SOLR-6331...
//
// we can probably optimize the memory usage by trimming each level of the pivot once
// we know we've fully refined the values at that level
// (ie: fold this logic into refineNextLevelOfFacets)
this.valueCollection.trim();
}
/**
* Recursively sorts the collection of values associated with this field, and
* any sub-pivots those values have.
*
* @see FacetParams#FACET_SORT
* @see PivotFacetFieldValueCollection#sort
*/
public void sort() {
this.valueCollection.sort();
}
/**
* A recursive method for generating <code>NamedLists</code> from this field
* suitable for including in a pivot facet response to the original distributed request.
*/
public List<NamedList<Object>> convertToListOfNamedLists() {
List<NamedList<Object>> convertedPivotList = null;
if (valueCollection.size() > 0) {
convertedPivotList = new LinkedList<>();
for (PivotFacetValue pivot : valueCollection)
convertedPivotList.add(pivot.convertToNamedList());
}
return convertedPivotList;
}
/**
* A recursive method for determining which {@link PivotFacetValue}s need to be
* refined for this pivot.
*
* @see PivotFacet#queuePivotRefinementRequests
*/
public void queuePivotRefinementRequests(PivotFacet pf) {
if (needRefinementAtThisLevel && ! valueCollection.getExplicitValuesList().isEmpty()) {
if (FacetParams.FACET_SORT_COUNT.equals(facetFieldSort)) {
// we only need to things that are currently in our limit,
// or might be in our limit if we get increased counts from shards that
// didn't include this value the first time
final int indexOfCountThreshold
= Math.min(valueCollection.getExplicitValuesListSize(),
facetFieldOffset + facetFieldLimit) - 1;
final int countThreshold = valueCollection.getAt(indexOfCountThreshold).getCount();
int positionInResults = 0;
for (PivotFacetValue value : valueCollection.getExplicitValuesList()) {
if (positionInResults <= indexOfCountThreshold) {
// This element is within the top results, so we need to get information
// from all of the shards.
processDefiniteCandidateElement(pf, value);
} else {
// This element is not within the top results, but may still need to be refined.
processPossibleCandidateElement(pf, value, countThreshold);
}
positionInResults++;
}
} else { // FACET_SORT_INDEX
// everything needs refined to see what the per-shard mincount excluded
for (PivotFacetValue value : valueCollection.getExplicitValuesList()) {
processDefiniteCandidateElement(pf, value);
}
}
needRefinementAtThisLevel = false;
}
if ( pf.isRefinementsRequired() ) {
// if any refinements are needed, then we need to stop and wait to
// see how the picture may change before drilling down to child pivot fields
return;
} else {
// Since outstanding requests have been filled, then we can drill down
// to the next deeper level and check it.
refineNextLevelOfFacets(pf);
}
}
/**
* Adds refinement requests for the value for each shard that has not already contributed
* a count for this value.
*/
private void processDefiniteCandidateElement(PivotFacet pf, PivotFacetValue value) {
for (int shard = pf.knownShards.nextSetBit(0);
0 <= shard;
shard = pf.knownShards.nextSetBit(shard+1)) {
if ( ! value.shardHasContributed(shard) ) {
if ( // if we're doing index order, we need to refine anything
// (mincount may have excluded from a shard)
FacetParams.FACET_SORT_INDEX.equals(facetFieldSort)
// if we are doing count order, we need to refine if the limit was hit
// (if it not, the shard doesn't have the value or it would have returned already)
|| numberOfValuesContributedByShardWasLimitedByFacetFieldLimit(shard) ) {
pf.addRefinement(shard, value);
}
}
}
}
private boolean numberOfValuesContributedByShardWasLimitedByFacetFieldLimit(int shardNumber) {
return facetFieldLimit <= numberOfValuesContributedByShard(shardNumber);
}
private int numberOfValuesContributedByShard(final int shardNumber) {
return numberOfValuesContributedByShard.containsKey(shardNumber)
? numberOfValuesContributedByShard.get(shardNumber)
: 0;
}
/**
* Checks the {@link #lowestCountContributedbyShard} for each shard, combined with the
* counts we already know, to see if this value is a viable candidate --
* <b>Does not make sense when using {@link FacetParams#FACET_SORT_INDEX}</b>
*
* @see #processDefiniteCandidateElement
*/
private void processPossibleCandidateElement(PivotFacet pf, PivotFacetValue value,
final int refinementThreshold) {
assert FacetParams.FACET_SORT_COUNT.equals(facetFieldSort)
: "Method only makes sense when sorting by count";
int maxPossibleCountAfterRefinement = value.getCount();
for (int shard = pf.knownShards.nextSetBit(0);
0 <= shard;
shard = pf.knownShards.nextSetBit(shard+1)) {
if ( ! value.shardHasContributed(shard) ) {
maxPossibleCountAfterRefinement += lowestCountContributedbyShard(shard);
}
}
if (refinementThreshold <= maxPossibleCountAfterRefinement) {
processDefiniteCandidateElement(pf, value);
}
}
private int lowestCountContributedbyShard(int shardNumber) {
return (shardLowestCount.containsKey(shardNumber))
? shardLowestCount.get(shardNumber)
: 0;
}
private void refineNextLevelOfFacets(PivotFacet pf) {
List<PivotFacetValue> explicitValsToRefine
= valueCollection.getNextLevelValuesToRefine();
for (PivotFacetValue value : explicitValsToRefine) {
if (null != value.getChildPivot()) {
value.getChildPivot().queuePivotRefinementRequests(pf);
}
}
PivotFacetValue missing = this.valueCollection.getMissingValue();
if(null != missing && null != missing.getChildPivot()) {
missing.getChildPivot().queuePivotRefinementRequests(pf);
}
}
private void incrementShardValueCount(int shardNumber) {
if (!numberOfValuesContributedByShard.containsKey(shardNumber)) {
numberOfValuesContributedByShard.put(shardNumber, 1);
} else {
numberOfValuesContributedByShard.put(shardNumber, numberOfValuesContributedByShard.get(shardNumber)+1);
}
}
private void contributeValueFromShard(int shardNumber, ResponseBuilder rb, NamedList<Object> shardValue) {
incrementShardValueCount(shardNumber);
Comparable value = PivotFacetHelper.getValue(shardValue);
int count = PivotFacetHelper.getCount(shardValue);
// We're changing values so we most mark the collection as dirty
valueCollection.markDirty();
if ( ( !shardLowestCount.containsKey(shardNumber) )
|| shardLowestCount.get(shardNumber) > count) {
shardLowestCount.put(shardNumber, count);
}
PivotFacetValue facetValue = valueCollection.get(value);
if (null == facetValue) {
// never seen before, we need to create it from scratch
facetValue = PivotFacetValue.createFromNamedList(shardNumber, rb, this, shardValue);
this.valueCollection.add(facetValue);
} else {
facetValue.mergeContributionFromShard(shardNumber, rb, shardValue);
}
}
/**
* Recursively merges the contributions from the specified shard for each
* {@link PivotFacetValue} represended in the <code>response</code>.
*
* @see PivotFacetValue#mergeContributionFromShard
* @param shardNumber the id of the shard that provided this data
* @param rb The response builder of the current request
* @param response the data from the specified shard for this pivot field, may be null
*/
public void contributeFromShard(int shardNumber, ResponseBuilder rb, List<NamedList<Object>> response) {
if (null == response) return;
for (NamedList<Object> responseValue : response) {
contributeValueFromShard(shardNumber, rb, responseValue);
}
}
public String toString(){
return String.format(Locale.ROOT, "P:%s F:%s V:%s",
parentValue, field, valueCollection);
}
}

View File

@ -0,0 +1,342 @@
package org.apache.solr.handler.component;
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import java.util.ArrayList;
import java.util.Collections;
import java.util.Comparator;
import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
import java.util.Locale;
import java.util.Map;
import org.apache.solr.common.params.FacetParams;
/**
* Emcapsulates a collection of {@link PivotFacetValue}s associated with a
* {@link PivotFacetField} withs pecial tracking of a {@link PivotFacetValue}
* corrisponding to the <code>null</code> value when {@link FacetParams#FACET_MISSING}
* is used.
*
* @see #markDirty
* @see PivotFacetField
* @see PivotFacetValue
*/
@SuppressWarnings("rawtypes")
public class PivotFacetFieldValueCollection implements Iterable<PivotFacetValue> {
private List<PivotFacetValue> explicitValues;
private PivotFacetValue missingValue;
private Map<Comparable, PivotFacetValue> valuesMap;
private boolean dirty = true;
//Facet parameters relating to this field
private final int facetFieldMinimumCount;
private final int facetFieldOffset;
private final int facetFieldLimit;
private final String facetFieldSort;
public PivotFacetFieldValueCollection(int minCount, int offset, int limit, String fieldSort){
this.explicitValues = new ArrayList<>();
this.valuesMap = new HashMap<>();
this.facetFieldMinimumCount = minCount;
this.facetFieldOffset = offset;
this.facetFieldLimit = limit;
this.facetFieldSort = fieldSort;
}
/**
* Indicates that the values in this collection have been modified by the caller.
*
* Any caller that manipulates the {@link PivotFacetValue}s contained in this collection
* must call this method after doing so.
*/
public void markDirty() {
dirty = true;
}
/**
* The {@link PivotFacetValue} with corisponding to a a value of
* <code>null</code> when {@link FacetParams#FACET_MISSING} is used.
*
* @return the appropriate <code>PivotFacetValue</code> object, may be null
* if we "missing" is not in use, or if it does not meat the mincount.
*/
public PivotFacetValue getMissingValue(){
return missingValue;
}
/**
* Read-Only access to the Collection of {@link PivotFacetValue}s corrisponding to
* non-missing values.
*
* @see #getMissingValue
*/
public List<PivotFacetValue> getExplicitValuesList() {
return Collections.unmodifiableList(explicitValues);
}
/**
* Size of {@link #getExplicitValuesList}
*/
public int getExplicitValuesListSize() {
return this.explicitValues.size();
}
/**
* Total number of {@link PivotFacetValue}s, including the "missing" value if used.
*
* @see #getMissingValue
* @see #getExplicitValuesList
*/
public int size() {
return this.getExplicitValuesListSize() + (this.missingValue == null ? 0 : 1);
}
/**
* Returns the appropriate sub-list of the explicit values that need to be refined,
* based on the {@link FacetParams#FACET_OFFSET} &amp; {@link FacetParams#FACET_LIMIT}
* for this field.
*
* @see #getExplicitValuesList
* @see List#subList
*/
public List<PivotFacetValue> getNextLevelValuesToRefine() {
final int numRefinableValues = getExplicitValuesListSize();
if (facetFieldOffset < numRefinableValues) {
final int offsetPlusCount = (facetFieldLimit >= 0)
? Math.min(facetFieldLimit + facetFieldOffset, numRefinableValues)
: numRefinableValues;
return getExplicitValuesList().subList(facetFieldOffset, offsetPlusCount);
} else {
return Collections.<PivotFacetValue>emptyList();
}
}
/**
* Fast lookup to retrieve a {@link PivotFacetValue} from this collection if it
* exists
*
* @param value of the <code>PivotFacetValue</code> to lookup, if
* <code>null</code> this returns the same as {@link #getMissingValue}
* @return the corrisponding <code>PivotFacetValue</code> or null if there is
* no <code>PivotFacetValue</code> in this collection corrisponding to
* the specified value.
*/
public PivotFacetValue get(Comparable value){
return valuesMap.get(value);
}
/**
* Fetchs a {@link PivotFacetValue} from this collection via the index, may not
* be used to fetch the <code>PivotFacetValue</code> corrisponding to the missing-value.
*
* @see #getExplicitValuesList
* @see List#get(int)
* @see #getMissingValue
*/
public PivotFacetValue getAt(int index){
return explicitValues.get(index);
}
/**
* Adds a {@link PivotFacetValue} to this collection -- callers must not use this
* method if a {@link PivotFacetValue} with the same value already exists in this collection
*/
public void add(PivotFacetValue pfValue) {
Comparable val = pfValue.getValue();
assert ! this.valuesMap.containsKey(val)
: "Must not add duplicate PivotFacetValue with redundent inner value";
dirty = true;
if(null == val) {
this.missingValue = pfValue;
} else {
this.explicitValues.add(pfValue);
}
this.valuesMap.put(val, pfValue);
}
/**
* Destructive method that recursively prunes values from the data structure
* based on the counts for those values and the effective sort, mincount, limit,
* and offset being used for each field.
* <p>
* This method should only be called after all refinement is completed.
* </p>
*
* @see PivotFacetField#trim
* @see PivotFacet#getTrimmedPivotsAsListOfNamedLists
*/
public void trim() { // NOTE: destructive
// TODO: see comment in PivotFacetField about potential optimization
// (ie: trim as we refine)
trimNonNullValues();
trimNullValue();
}
private void trimNullValue(){
if (missingValue == null) {
return;
}
if (missingValue.getCount() >= facetFieldMinimumCount){
if (null != missingValue.getChildPivot()) {
missingValue.getChildPivot().trim();
}
} else { // missing count less than mincount
missingValue = null;
}
}
private void trimNonNullValues(){
if (explicitValues != null && explicitValues.size() > 0) {
sort();
ArrayList<PivotFacetValue> trimmedValues = new ArrayList<>();
int facetsSkipped = 0;
for (PivotFacetValue pivotValue : explicitValues) {
if (pivotValue.getCount() >= facetFieldMinimumCount) {
if (facetsSkipped >= facetFieldOffset) {
trimmedValues.add(pivotValue);
if (pivotValue.getChildPivot() != null) {
pivotValue.getChildPivot().trim();
}
if (facetFieldLimit > 0 && trimmedValues.size() >= facetFieldLimit) {
break;
}
} else {
facetsSkipped++;
}
}
}
explicitValues = trimmedValues;
valuesMap.clear();
}
}
/**
* Sorts the collection and recursively sorts the collections assocaited with
* any sub-pivots.
*
* @see FacetParams#FACET_SORT
* @see PivotFacetField#sort
*/
public void sort() {
if (dirty) {
if (facetFieldSort.equals(FacetParams.FACET_SORT_COUNT)) {
Collections.sort(this.explicitValues, new PivotFacetCountComparator());
} else if (facetFieldSort.equals(FacetParams.FACET_SORT_INDEX)) {
Collections.sort(this.explicitValues, new PivotFacetValueComparator());
}
dirty = false;
}
for (PivotFacetValue value : this.explicitValues)
if (value.getChildPivot() != null) {
value.getChildPivot().sort();
}
if (missingValue != null && missingValue.getChildPivot() != null) {
missingValue.getChildPivot().sort();
}
}
/**
* Iterator over all elements in this Collection, including the result of
* {@link #getMissingValue} as the last element (if it exists)
*/
@Override
public Iterator<PivotFacetValue> iterator() {
Iterator<PivotFacetValue> it = new Iterator<PivotFacetValue>() {
private final Iterator valuesIterator = explicitValues.iterator();
private boolean shouldGiveMissingValue = (missingValue != null);
@Override
public boolean hasNext() {
return valuesIterator.hasNext() || shouldGiveMissingValue;
}
@Override
public PivotFacetValue next() {
while(valuesIterator.hasNext()){
return (PivotFacetValue) valuesIterator.next();
}
//else
if(shouldGiveMissingValue){
shouldGiveMissingValue = false;
return missingValue;
}
return null;
}
@Override
public void remove() {
throw new UnsupportedOperationException("Can't remove from this iterator");
}
};
return it;
}
/** Sorts {@link PivotFacetValue} instances by their count */
public class PivotFacetCountComparator implements Comparator<PivotFacetValue> {
public int compare(PivotFacetValue left, PivotFacetValue right) {
int countCmp = right.getCount() - left.getCount();
return (0 != countCmp) ? countCmp :
compareWithNullLast(left.getValue(), right.getValue());
}
}
/** Sorts {@link PivotFacetValue} instances by their value */
public class PivotFacetValueComparator implements Comparator<PivotFacetValue> {
public int compare(PivotFacetValue left, PivotFacetValue right) {
return compareWithNullLast(left.getValue(), right.getValue());
}
}
/**
* A helper method for use in <code>Comparator</code> classes where object properties
* are <code>Comparable</code> but may be null.
*/
static int compareWithNullLast(final Comparable o1, final Comparable o2) {
if (null == o1) {
if (null == o2) {
return 0;
}
return 1; // o1 is null, o2 is not
}
if (null == o2) {
return -1; // o2 is null, o1 is not
}
return o1.compareTo(o2);
}
public String toString(){
return String.format(Locale.ROOT, "Values:%s | Missing:%s ", explicitValues, missingValue);
}
}

View File

@ -17,254 +17,105 @@
package org.apache.solr.handler.component;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Deque;
import java.util.LinkedList;
import java.util.List;
import java.util.Map;
import org.apache.lucene.index.Term;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.TermQuery;
import org.apache.lucene.search.TermRangeQuery;
import org.apache.lucene.util.BytesRefBuilder;
import org.apache.solr.common.SolrException;
import org.apache.solr.util.PivotListEntry;
import org.apache.solr.common.SolrException.ErrorCode;
import org.apache.solr.common.params.FacetParams;
import org.apache.solr.common.params.SolrParams;
import org.apache.solr.common.util.NamedList;
import org.apache.solr.common.util.SimpleOrderedMap;
import org.apache.solr.request.SimpleFacets;
import org.apache.solr.request.SolrQueryRequest;
import org.apache.solr.schema.FieldType;
import org.apache.solr.schema.SchemaField;
import org.apache.solr.search.DocSet;
import org.apache.solr.search.SolrIndexSearcher;
import org.apache.solr.search.SyntaxError;
import org.apache.solr.common.util.StrUtils;
/**
* @since solr 4.0
*/
public class PivotFacetHelper extends SimpleFacets
{
import java.util.ArrayList;
import java.util.List;
import java.util.Collections;
protected int minMatch;
public class PivotFacetHelper {
public PivotFacetHelper(SolrQueryRequest req, DocSet docs, SolrParams params, ResponseBuilder rb) {
super(req, docs, params, rb);
minMatch = params.getInt( FacetParams.FACET_PIVOT_MINCOUNT, 1 );
}
/**
* Encodes a value path as a string for the purposes of a refinement request
*
* @see PivotFacetValue#getValuePath
* @see #decodeRefinementValuePath
*/
public static String encodeRefinementValuePath(List<String> values) {
// HACK: prefix flag every value to account for empty string vs null
// NOTE: even if we didn't have to worry about null's smartSplit is stupid about
// pruning empty strings from list
// "^" prefix = null
// "~" prefix = not null, may be empty string
public SimpleOrderedMap<List<NamedList<Object>>> process(String[] pivots) throws IOException {
if (!rb.doFacets || pivots == null)
return null;
assert null != values;
SimpleOrderedMap<List<NamedList<Object>>> pivotResponse = new SimpleOrderedMap<>();
for (String pivot : pivots) {
//ex: pivot == "features,cat" or even "{!ex=mytag}features,cat"
try {
this.parseParams(FacetParams.FACET_PIVOT, pivot);
} catch (SyntaxError e) {
throw new SolrException(ErrorCode.BAD_REQUEST, e);
// special case: empty list => empty string
if (values.isEmpty()) { return ""; }
StringBuilder out = new StringBuilder();
for (String val : values) {
if (null == val) {
out.append('^');
} else {
out.append('~');
StrUtils.appendEscapedTextToBuilder(out, val, ',');
}
pivot = facetValue;//facetValue potentially modified from parseParams()
String[] fields = pivot.split(",");
if( fields.length < 2 ) {
throw new SolrException( ErrorCode.BAD_REQUEST,
"Pivot Facet needs at least two fields: "+pivot );
}
String field = fields[0];
String subField = fields[1];
Deque<String> fnames = new LinkedList<>();
for( int i=fields.length-1; i>1; i-- ) {
fnames.push( fields[i] );
}
NamedList<Integer> superFacets = this.getTermCounts(field);
//super.key usually == pivot unless local-param 'key' used
pivotResponse.add(key, doPivots(superFacets, field, subField, fnames, docs));
out.append(',');
}
return pivotResponse;
out.deleteCharAt(out.length()-1); // prune the last seperator
return out.toString();
// return StrUtils.join(values, ',');
}
/**
* Recursive function to do all the pivots
* Decodes a value path string specified for refinement.
*
* @see #encodeRefinementValuePath
*/
protected List<NamedList<Object>> doPivots(NamedList<Integer> superFacets,
String field, String subField, Deque<String> fnames,
DocSet docs) throws IOException
{
SolrIndexSearcher searcher = rb.req.getSearcher();
// TODO: optimize to avoid converting to an external string and then having to convert back to internal below
SchemaField sfield = searcher.getSchema().getField(field);
FieldType ftype = sfield.getType();
public static List<String> decodeRefinementValuePath(String valuePath) {
List <String> rawvals = StrUtils.splitSmart(valuePath, ",", true);
// special case: empty list => empty string
if (rawvals.isEmpty()) return rawvals;
String nextField = fnames.poll();
List<NamedList<Object>> values = new ArrayList<>( superFacets.size() );
for (Map.Entry<String, Integer> kv : superFacets) {
// Only sub-facet if parent facet has positive count - still may not be any values for the sub-field though
if (kv.getValue() >= minMatch) {
// may be null when using facet.missing
final String fieldValue = kv.getKey();
// don't reuse the same BytesRef each time since we will be
// constructing Term objects used in TermQueries that may be cached.
BytesRefBuilder termval = null;
SimpleOrderedMap<Object> pivot = new SimpleOrderedMap<>();
pivot.add( "field", field );
if (null == fieldValue) {
pivot.add( "value", null );
} else {
termval = new BytesRefBuilder();
ftype.readableToIndexed(fieldValue, termval);
pivot.add( "value", ftype.toObject(sfield, termval.get()) );
}
pivot.add( "count", kv.getValue() );
if( subField == null ) {
values.add( pivot );
}
else {
DocSet subset = null;
if ( null == termval ) {
DocSet hasVal = searcher.getDocSet
(new TermRangeQuery(field, null, null, false, false));
subset = docs.andNot(hasVal);
} else {
Query query = new TermQuery(new Term(field, termval.get()));
subset = searcher.getDocSet(query, docs);
}
super.docs = subset;//used by getTermCounts()
NamedList<Integer> nl = this.getTermCounts(subField);
if (nl.size() >= minMatch) {
pivot.add( "pivot", doPivots( nl, subField, nextField, fnames, subset) );
values.add( pivot ); // only add response if there are some counts
}
}
List<String> out = new ArrayList<String>(rawvals.size());
for (String raw : rawvals) {
assert 0 < raw.length();
if ('^' == raw.charAt(0)) {
assert 1 == raw.length();
out.add(null);
} else {
assert '~' == raw.charAt(0);
out.add(raw.substring(1));
}
}
// put the field back on the list
fnames.push( nextField );
return values;
return out;
}
/** @see PivotListEntry#VALUE */
public static Comparable getValue(NamedList<Object> pivotList) {
return (Comparable) PivotFacetHelper.retrieve(PivotListEntry.VALUE,
pivotList);
}
/** @see PivotListEntry#FIELD */
public static String getField(NamedList<Object> pivotList) {
return (String) PivotFacetHelper.retrieve(PivotListEntry.FIELD, pivotList);
}
/** @see PivotListEntry#COUNT */
public static Integer getCount(NamedList<Object> pivotList) {
return (Integer) PivotFacetHelper.retrieve(PivotListEntry.COUNT, pivotList);
}
/** @see PivotListEntry#PIVOT */
public static List<NamedList<Object>> getPivots(NamedList<Object> pivotList) {
int pivotIdx = pivotList.indexOf(PivotListEntry.PIVOT.getName(), 0);
if (pivotIdx > -1) {
return (List<NamedList<Object>>) pivotList.getVal(pivotIdx);
}
return null;
}
private static Object retrieve(PivotListEntry entryToGet, NamedList<Object> pivotList) {
return pivotList.get(entryToGet.getName(), entryToGet.getIndex());
}
// TODO: This is code from various patches to support distributed search.
// Some parts may be helpful for whoever implements distributed search.
//
// @Override
// public int distributedProcess(ResponseBuilder rb) throws IOException {
// if (!rb.doFacets) {
// return ResponseBuilder.STAGE_DONE;
// }
//
// if (rb.stage == ResponseBuilder.STAGE_GET_FIELDS) {
// SolrParams params = rb.req.getParams();
// String[] pivots = params.getParams(FacetParams.FACET_PIVOT);
// for ( ShardRequest sreq : rb.outgoing ) {
// if (( sreq.purpose & ShardRequest.PURPOSE_GET_FIELDS ) != 0
// && sreq.shards != null && sreq.shards.length == 1 ) {
// sreq.params.set( FacetParams.FACET, "true" );
// sreq.params.set( FacetParams.FACET_PIVOT, pivots );
// sreq.params.set( FacetParams.FACET_PIVOT_MINCOUNT, 1 ); // keep this at 1 regardless so that it accumulates everything
// }
// }
// }
// return ResponseBuilder.STAGE_DONE;
// }
//
// @Override
// public void handleResponses(ResponseBuilder rb, ShardRequest sreq) {
// if (!rb.doFacets) return;
//
//
// if ((sreq.purpose & ShardRequest.PURPOSE_GET_FACETS)!=0) {
// SimpleOrderedMap<List<NamedList<Object>>> tf = rb._pivots;
// if ( null == tf ) {
// tf = new SimpleOrderedMap<List<NamedList<Object>>>();
// rb._pivots = tf;
// }
// for (ShardResponse srsp: sreq.responses) {
// int shardNum = rb.getShardNum(srsp.getShard());
//
// NamedList facet_counts = (NamedList)srsp.getSolrResponse().getResponse().get("facet_counts");
//
// // handle facet trees from shards
// SimpleOrderedMap<List<NamedList<Object>>> shard_pivots =
// (SimpleOrderedMap<List<NamedList<Object>>>)facet_counts.get( PIVOT_KEY );
//
// if ( shard_pivots != null ) {
// for (int j=0; j< shard_pivots.size(); j++) {
// // TODO -- accumulate the results from each shard
// // The following code worked to accumulate facets for an previous
// // two level patch... it is here for reference till someone can upgrade
// /**
// String shard_tree_name = (String) shard_pivots.getName( j );
// SimpleOrderedMap<NamedList> shard_tree = (SimpleOrderedMap<NamedList>)shard_pivots.getVal( j );
// SimpleOrderedMap<NamedList> facet_tree = tf.get( shard_tree_name );
// if ( null == facet_tree) {
// facet_tree = new SimpleOrderedMap<NamedList>();
// tf.add( shard_tree_name, facet_tree );
// }
//
// for( int o = 0; o < shard_tree.size() ; o++ ) {
// String shard_outer = (String) shard_tree.getName( o );
// NamedList shard_innerList = (NamedList) shard_tree.getVal( o );
// NamedList tree_innerList = (NamedList) facet_tree.get( shard_outer );
// if ( null == tree_innerList ) {
// tree_innerList = new NamedList();
// facet_tree.add( shard_outer, tree_innerList );
// }
//
// for ( int i = 0 ; i < shard_innerList.size() ; i++ ) {
// String shard_term = (String) shard_innerList.getName( i );
// long shard_count = ((Number) shard_innerList.getVal(i)).longValue();
// int tree_idx = tree_innerList.indexOf( shard_term, 0 );
//
// if ( -1 == tree_idx ) {
// tree_innerList.add( shard_term, shard_count );
// } else {
// long tree_count = ((Number) tree_innerList.getVal( tree_idx )).longValue();
// tree_innerList.setVal( tree_idx, shard_count + tree_count );
// }
// } // innerList loop
// } // outer loop
// **/
// } // each tree loop
// }
// }
// }
// return ;
// }
//
// @Override
// public void finishStage(ResponseBuilder rb) {
// if (!rb.doFacets || rb.stage != ResponseBuilder.STAGE_GET_FIELDS) return;
// // wait until STAGE_GET_FIELDS
// // so that "result" is already stored in the response (for aesthetics)
//
// SimpleOrderedMap<List<NamedList<Object>>> tf = rb._pivots;
//
// // get 'facet_counts' from the response
// NamedList facetCounts = (NamedList) rb.rsp.getValues().get("facet_counts");
// if (facetCounts == null) {
// facetCounts = new NamedList();
// rb.rsp.add("facet_counts", facetCounts);
// }
// facetCounts.add( PIVOT_KEY, tf );
// rb._pivots = null;
// }
//
// public String getDescription() {
// return "Handle Pivot (multi-level) Faceting";
// }
}

View File

@ -0,0 +1,252 @@
package org.apache.solr.handler.component;
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import org.apache.lucene.util.BytesRefBuilder;
import org.apache.solr.schema.SchemaField;
import org.apache.solr.schema.FieldType;
import org.apache.solr.search.SolrIndexSearcher;
import org.apache.solr.search.DocSet;
import org.apache.solr.search.SyntaxError;
import org.apache.solr.common.SolrException;
import org.apache.solr.common.util.NamedList;
import org.apache.solr.common.util.SimpleOrderedMap;
import org.apache.solr.common.util.StrUtils;
import org.apache.solr.common.SolrException.ErrorCode;
import org.apache.solr.common.params.SolrParams;
import org.apache.solr.common.params.FacetParams;
import org.apache.solr.request.SimpleFacets;
import org.apache.solr.request.SolrQueryRequest;
import org.apache.lucene.search.Query;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Deque;
import java.util.LinkedList;
import java.util.List;
import java.util.Map;
/**
* Processes all Pivot facet logic for a single node -- both non-distrib, and per-shard
*/
public class PivotFacetProcessor extends SimpleFacets
{
protected SolrParams params;
public PivotFacetProcessor(SolrQueryRequest req, DocSet docs, SolrParams params, ResponseBuilder rb) {
super(req, docs, params, rb);
this.params = params;
}
/**
* Processes all of the specified {@link FacetParams#FACET_PIVOT} strings, generating
* a completel response tree for each pivot. The values in this response will either
* be the complete tree of fields and values for the specified pivot in the local index,
* or the requested refinements if the pivot params include the {@link PivotFacet#REFINE_PARAM}
*/
public SimpleOrderedMap<List<NamedList<Object>>> process(String[] pivots) throws IOException {
if (!rb.doFacets || pivots == null)
return null;
SimpleOrderedMap<List<NamedList<Object>>> pivotResponse = new SimpleOrderedMap<>();
for (String pivotList : pivots) {
try {
this.parseParams(FacetParams.FACET_PIVOT, pivotList);
} catch (SyntaxError e) {
throw new SolrException(ErrorCode.BAD_REQUEST, e);
}
List<String> pivotFields = StrUtils.splitSmart(facetValue, ",", true);
if( pivotFields.size() < 1 ) {
throw new SolrException( ErrorCode.BAD_REQUEST,
"Pivot Facet needs at least one field name: " + pivotList);
} else {
SolrIndexSearcher searcher = rb.req.getSearcher();
for (String fieldName : pivotFields) {
SchemaField sfield = searcher.getSchema().getField(fieldName);
if (sfield == null) {
throw new SolrException(ErrorCode.BAD_REQUEST, "\"" + fieldName + "\" is not a valid field name in pivot: " + pivotList);
}
}
}
//REFINEMENT
String fieldValueKey = localParams == null ? null : localParams.get(PivotFacet.REFINE_PARAM);
if(fieldValueKey != null ){
String[] refinementValuesByField = params.getParams(PivotFacet.REFINE_PARAM+fieldValueKey);
for(String refinements : refinementValuesByField){
pivotResponse.addAll(processSingle(pivotFields, refinements));
}
} else{
pivotResponse.addAll(processSingle(pivotFields, null));
}
}
return pivotResponse;
}
/**
* Process a single branch of refinement values for a specific pivot
* @param pivotFields the ordered list of fields in this pivot
* @param refinements the comma seperate list of refinement values corrisponding to each field in the pivot, or null if there are no refinements
*/
private SimpleOrderedMap<List<NamedList<Object>>> processSingle(List<String> pivotFields,
String refinements) throws IOException {
SolrIndexSearcher searcher = rb.req.getSearcher();
SimpleOrderedMap<List<NamedList<Object>>> pivotResponse = new SimpleOrderedMap<>();
String field = pivotFields.get(0);
SchemaField sfield = searcher.getSchema().getField(field);
Deque<String> fnames = new LinkedList<>();
for( int i = pivotFields.size()-1; i>1; i-- ) {
fnames.push( pivotFields.get(i) );
}
NamedList<Integer> facetCounts;
Deque<String> vnames = new LinkedList<>();
if (null != refinements) {
// All values, split by the field they should go to
List<String> refinementValuesByField
= PivotFacetHelper.decodeRefinementValuePath(refinements);
for( int i=refinementValuesByField.size()-1; i>0; i-- ) {
vnames.push(refinementValuesByField.get(i));//Only for [1] and on
}
String firstFieldsValues = refinementValuesByField.get(0);
facetCounts = new NamedList<Integer>();
facetCounts.add(firstFieldsValues,
getSubsetSize(this.docs, sfield, firstFieldsValues));
} else {
// no refinements needed
facetCounts = this.getTermCountsForPivots(field, this.docs);
}
if(pivotFields.size() > 1) {
String subField = pivotFields.get(1);
pivotResponse.add(key,
doPivots(facetCounts, field, subField, fnames, vnames, this.docs));
} else {
pivotResponse.add(key, doPivots(facetCounts, field, null, fnames, vnames, this.docs));
}
return pivotResponse;
}
/**
* Recursive function to compute all the pivot counts for the values under teh specified field
*/
protected List<NamedList<Object>> doPivots(NamedList<Integer> superFacets,
String field, String subField, Deque<String> fnames,Deque<String> vnames,DocSet docs) throws IOException {
SolrIndexSearcher searcher = rb.req.getSearcher();
// TODO: optimize to avoid converting to an external string and then having to convert back to internal below
SchemaField sfield = searcher.getSchema().getField(field);
FieldType ftype = sfield.getType();
String nextField = fnames.poll();
// re-useable BytesRefBuilder for conversion of term values to Objects
BytesRefBuilder termval = new BytesRefBuilder();
List<NamedList<Object>> values = new ArrayList<>( superFacets.size() );
for (Map.Entry<String, Integer> kv : superFacets) {
// Only sub-facet if parent facet has positive count - still may not be any values for the sub-field though
if (kv.getValue() >= getMinCountForField(field)) {
final String fieldValue = kv.getKey();
SimpleOrderedMap<Object> pivot = new SimpleOrderedMap<>();
pivot.add( "field", field );
if (null == fieldValue) {
pivot.add( "value", null );
} else {
ftype.readableToIndexed(fieldValue, termval);
pivot.add( "value", ftype.toObject(sfield, termval.get()) );
}
pivot.add( "count", kv.getValue() );
DocSet subset = getSubset(docs, sfield, fieldValue);
if( subField != null ) {
NamedList<Integer> facetCounts;
if(!vnames.isEmpty()){
String val = vnames.pop();
facetCounts = new NamedList<Integer>();
facetCounts.add(val, getSubsetSize(subset,
searcher.getSchema().getField(subField),
val));
} else {
facetCounts = this.getTermCountsForPivots(subField, subset);
}
if (facetCounts.size() >= 1) {
pivot.add( "pivot", doPivots( facetCounts, subField, nextField, fnames, vnames, subset) );
}
}
values.add( pivot );
}
}
// put the field back on the list
fnames.push( nextField );
return values;
}
/**
* Given a base docset, computes the size of the subset of documents corrisponding to the specified pivotValue
*
* @param base the set of documents to evalute relative to
* @param field the field type used by the pivotValue
* @param pivotValue String representation of the value, may be null (ie: "missing")
*/
private int getSubsetSize(DocSet base, SchemaField field, String pivotValue) throws IOException {
FieldType ft = field.getType();
if ( null == pivotValue ) {
Query query = ft.getRangeQuery(null, field, null, null, false, false);
DocSet hasVal = searcher.getDocSet(query);
return base.andNotSize(hasVal);
} else {
Query query = ft.getFieldQuery(null, field, pivotValue);
return searcher.numDocs(query, base);
}
}
/**
* Given a base docset, computes the subset of documents corrisponding to the specified pivotValue
*
* @param base the set of documents to evalute relative to
* @param field the field type used by the pivotValue
* @param pivotValue String representation of the value, may be null (ie: "missing")
*/
private DocSet getSubset(DocSet base, SchemaField field, String pivotValue) throws IOException {
FieldType ft = field.getType();
if ( null == pivotValue ) {
Query query = ft.getRangeQuery(null, field, null, null, false, false);
DocSet hasVal = searcher.getDocSet(query);
return base.andNot(hasVal);
} else {
Query query = ft.getFieldQuery(null, field, pivotValue);
return searcher.getDocSet(query, base);
}
}
private int getMinCountForField(String fieldname){
return params.getFieldInt(fieldname, FacetParams.FACET_PIVOT_MINCOUNT, 1);
}
}

View File

@ -0,0 +1,206 @@
package org.apache.solr.handler.component;
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import java.util.BitSet;
import java.util.Date;
import java.util.List;
import java.util.Locale;
import org.apache.solr.common.params.FacetParams;
import org.apache.solr.common.util.NamedList;
import org.apache.solr.common.util.SimpleOrderedMap;
import org.apache.solr.schema.TrieDateField;
import org.apache.solr.util.PivotListEntry;
/**
* Models a single (value, count) pair that will exist in the collection of values for a
* {@link PivotFacetField} parent. This <code>PivotFacetValue</code> may itself have a
* nested {@link PivotFacetField} child
*
* @see PivotFacetField
* @see PivotFacetFieldValueCollection
*/
@SuppressWarnings("rawtypes")
public class PivotFacetValue {
private final BitSet sourceShards = new BitSet();
private final PivotFacetField parentPivot;
private final Comparable value;
// child can't be final, circular ref on construction
private PivotFacetField childPivot = null;
private int count; // mutable
private PivotFacetValue(PivotFacetField parent, Comparable val) {
this.parentPivot = parent;
this.value = val;
}
/**
* The value of the asssocated field modeled by this <code>PivotFacetValue</code>.
* May be null if this <code>PivotFacetValue</code> models the count for docs
* "missing" the field value.
*
* @see FacetParams#FACET_MISSING
*/
public Comparable getValue() { return value; }
/** The count corrisponding to the value modeled by this <code>PivotFacetValue</code> */
public int getCount() { return count; }
/**
* The {@link PivotFacetField} corrisponding to the nested child pivot for this
* <code>PivotFacetValue</code>. May be null if this object is the leaf of a pivot.
*/
public PivotFacetField getChildPivot() { return childPivot; }
/**
* A recursive method that walks up the tree of pivot fields/values to build
* a list of the String representations of the values that lead down to this
* PivotFacetValue.
*
* @return a mutable List of the pivot value Strings leading down to and including
* this pivot value, will never be null but may contain nulls
* @see PivotFacetField#getValuePath
*/
public List<String> getValuePath() {
List<String> out = parentPivot.getValuePath();
// Note: this code doesn't play nice with custom FieldTypes -- see SOLR-6330
if (null == value) {
out.add(null);
} else if (value instanceof Date) {
out.add(TrieDateField.formatExternal((Date) value));
} else {
out.add(value.toString());
}
return out;
}
/**
* A recursive method to construct a new <code>PivotFacetValue</code> object from
* the contents of the {@link NamedList} provided by the specified shard, relative
* to the specified field.
*
* If the <code>NamedList</code> contains data for a child {@link PivotFacetField}
* that will be recursively built as well.
*
* @see PivotFacetField#createFromListOfNamedLists
* @param shardNumber the id of the shard that provided this data
* @param rb The response builder of the current request
* @param parentField the parent field in the current pivot associated with this value
* @param pivotData the data from the specified shard for this pivot value
*/
@SuppressWarnings("unchecked")
public static PivotFacetValue createFromNamedList(int shardNumber, ResponseBuilder rb, PivotFacetField parentField, NamedList<Object> pivotData) {
Comparable pivotVal = null;
int pivotCount = 0;
List<NamedList<Object>> childPivotData = null;
for (int i = 0; i < pivotData.size(); i++) {
String key = pivotData.getName(i);
Object value = pivotData.getVal(i);
PivotListEntry entry = PivotListEntry.get(key);
switch (entry) {
case VALUE:
pivotVal = (Comparable)value;
break;
case FIELD:
assert parentField.field.equals(value)
: "Parent Field mismatch: " + parentField.field + "!=" + value;
break;
case COUNT:
pivotCount = (Integer)value;
break;
case PIVOT:
childPivotData = (List<NamedList<Object>>)value;
break;
default:
throw new RuntimeException("PivotListEntry contains unaccounted for item: " + entry);
}
}
PivotFacetValue newPivotFacet = new PivotFacetValue(parentField, pivotVal);
newPivotFacet.count = pivotCount;
newPivotFacet.sourceShards.set(shardNumber);
newPivotFacet.childPivot = PivotFacetField.createFromListOfNamedLists(shardNumber, rb, newPivotFacet, childPivotData);
return newPivotFacet;
}
/**
* A <b>NON-Recursive</b> method indicating if the specified shard has already
* contributed to the count for this value.
*/
public boolean shardHasContributed(int shardNum) {
return sourceShards.get(shardNum);
}
/**
* A recursive method for generating a NamedList from this value suitable for
* including in a pivot facet response to the original distributed request.
*
* @see PivotFacetField#convertToListOfNamedLists
*/
public NamedList<Object> convertToNamedList() {
NamedList<Object> newList = new SimpleOrderedMap<>();
newList.add(PivotListEntry.FIELD.getName(), parentPivot.field);
newList.add(PivotListEntry.VALUE.getName(), value);
newList.add(PivotListEntry.COUNT.getName(), count);
if (childPivot != null && childPivot.convertToListOfNamedLists() != null) {
newList.add(PivotListEntry.PIVOT.getName(), childPivot.convertToListOfNamedLists());
}
return newList;
}
/**
* Merges in the count contributions from the specified shard for each.
* This method is recursive if the shard data includes sub-pivots
*
* @see PivotFacetField#contributeFromShard
* @see PivotFacetField#createFromListOfNamedLists
*/
public void mergeContributionFromShard(int shardNumber, ResponseBuilder rb, NamedList<Object> value) {
assert null != value : "can't merge in null data";
if (!shardHasContributed(shardNumber)) {
sourceShards.set(shardNumber);
count += PivotFacetHelper.getCount(value);
}
List<NamedList<Object>> shardChildPivots = PivotFacetHelper.getPivots(value);
// sub pivot -- we may not have seen this yet depending on refinement
if (null == childPivot) {
childPivot = PivotFacetField.createFromListOfNamedLists(shardNumber, rb, this, shardChildPivots);
} else {
childPivot.contributeFromShard(shardNumber, rb, shardChildPivots);
}
}
public String toString(){
return String.format(Locale.ROOT, "F:%s V:%s Co:%d Ch?:%s",
parentPivot.field, value, count, (this.childPivot !=null));
}
}

View File

@ -39,6 +39,7 @@ public class ShardRequest {
public final static int PURPOSE_GET_TERMS =0x400;
public final static int PURPOSE_GET_TOP_GROUPS =0x800;
public final static int PURPOSE_GET_MLT_RESULTS =0x1000;
public final static int PURPOSE_REFINE_PIVOT_FACETS =0x2000;
public int purpose; // the purpose of this request

View File

@ -332,15 +332,45 @@ public class SimpleFacets {
ENUM, FC, FCS;
}
/**
* Term counts for use in pivot faceting that resepcts the appropriate mincount
* @see FacetParams#FACET_PIVOT_MINCOUNT
*/
public NamedList<Integer> getTermCountsForPivots(String field, DocSet docs) throws IOException {
Integer mincount = params.getFieldInt(field, FacetParams.FACET_PIVOT_MINCOUNT, 1);
return getTermCounts(field, mincount, docs);
}
/**
* Term counts for use in field faceting that resepects the appropriate mincount
*
* @see FacetParams#FACET_MINCOUNT
*/
public NamedList<Integer> getTermCounts(String field) throws IOException {
return getTermCounts(field, this.docs);
}
/**
* Term counts for use in field faceting that resepects the appropriate mincount
*
* @see FacetParams#FACET_MINCOUNT
*/
public NamedList<Integer> getTermCounts(String field, DocSet base) throws IOException {
Integer mincount = params.getFieldInt(field, FacetParams.FACET_MINCOUNT);
return getTermCounts(field, mincount, base);
}
/**
* Term counts for use in field faceting that resepcts the specified mincount -
* if mincount is null, the "zeros" param is consulted for the appropriate backcompat
* default
*
* @see FacetParams#FACET_ZEROS
*/
private NamedList<Integer> getTermCounts(String field, Integer mincount, DocSet base) throws IOException {
int offset = params.getFieldInt(field, FacetParams.FACET_OFFSET, 0);
int limit = params.getFieldInt(field, FacetParams.FACET_LIMIT, 100);
if (limit == 0) return new NamedList<>();
Integer mincount = params.getFieldInt(field, FacetParams.FACET_MINCOUNT);
if (mincount==null) {
Boolean zeros = params.getFieldBool(field, FacetParams.FACET_ZEROS);
// mincount = (zeros!=null && zeros) ? 0 : 1;
@ -554,7 +584,8 @@ public class SimpleFacets {
try {
NamedList<Object> result = new SimpleOrderedMap<>();
if(termList != null) {
result.add(workerKey, getListedTermCounts(workerFacetValue, termList, workerBase));
List<String> terms = StrUtils.splitSmart(termList, ",", true);
result.add(workerKey, getListedTermCounts(workerFacetValue, workerBase, terms));
} else {
result.add(workerKey, getTermCounts(workerFacetValue, workerBase));
}
@ -597,13 +628,25 @@ public class SimpleFacets {
}
/**
* Computes the term-&gt;count counts for the specified termList relative to the
* @param field the name of the field to compute term counts against
* @param termList a comma seperated (and backslash escaped) list of term values (in the specified field) to compute the counts for
* @see StrUtils#splitSmart
*/
private NamedList<Integer> getListedTermCounts(String field, String termList) throws IOException {
return getListedTermCounts(field, termList, this.docs);
List<String> terms = StrUtils.splitSmart(termList, ",", true);
return getListedTermCounts(field, this.docs, terms);
}
private NamedList getListedTermCounts(String field, String termList, DocSet base) throws IOException {
/**
* Computes the term-&gt;count counts for the specified term values relative to the
* @param field the name of the field to compute term counts against
* @param base the docset to compute term counts relative to
* @param terms a list of term values (in the specified field) to compute the counts for
*/
protected NamedList<Integer> getListedTermCounts(String field, DocSet base, List<String> terms) throws IOException {
FieldType ft = searcher.getSchema().getFieldType(field);
List<String> terms = StrUtils.splitSmart(termList, ",", true);
NamedList<Integer> res = new NamedList<>();
for (String term : terms) {
String internal = ft.toInternal(term);

View File

@ -0,0 +1,62 @@
package org.apache.solr.util;
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import java.util.Locale;
/**
* Enum for modeling the elements of a (nested) pivot entry as expressed in a NamedList
*/
public enum PivotListEntry {
FIELD(0),
VALUE(1),
COUNT(2),
PIVOT(3);
// we could just use the ordinal(), but safer to be very explicit
private final int index;
private PivotListEntry(int index) {
this.index = index;
}
/**
* Case-insensitive lookup of PivotListEntry by name
* @see #getName
*/
public static PivotListEntry get(String name) {
return PivotListEntry.valueOf(name.toUpperCase(Locale.ROOT));
}
/**
* Name of this entry when used in response
* @see #get
*/
public String getName() {
return name().toLowerCase(Locale.ROOT);
}
/**
* Indec of this entry when used in response
*/
public int getIndex() {
return index;
}
}

View File

@ -31,7 +31,6 @@ import static org.apache.solr.common.params.CursorMarkParams.CURSOR_MARK_START;
import org.apache.solr.common.SolrException;
import org.apache.solr.common.SolrException.ErrorCode;
import org.apache.solr.request.SolrQueryRequest;
import org.apache.solr.schema.TrieDateField;
import org.apache.solr.search.CursorMark; //jdoc
import org.noggit.ObjectBuilder;
@ -39,12 +38,10 @@ import org.noggit.ObjectBuilder;
import java.nio.ByteBuffer;
import java.util.Arrays;
import java.util.ArrayList;
import java.util.Date;
import java.util.HashMap;
import java.util.List;
import java.util.Collection;
import java.util.Collections;
import java.util.Locale;
import java.util.Map;
import java.util.UUID;
@ -619,11 +616,6 @@ public class CursorPagingTest extends SolrTestCaseJ4 {
return 0 != TestUtil.nextInt(random(), 0, 30);
}
/** returns likely most (1/10) of the time, otherwise unlikely */
private static Object skewed(Object likely, Object unlikely) {
return (0 == TestUtil.nextInt(random(), 0, 9)) ? unlikely : likely;
}
/**
* An immutable list of the fields in the schema that can be used for sorting,
* deterministically random order.
@ -898,7 +890,7 @@ public class CursorPagingTest extends SolrTestCaseJ4 {
1.0D / random().nextInt(37)));
}
if (useField()) {
doc.addField("str", skewed(randomUsableUnicodeString(),
doc.addField("str", skewed(randomXmlUsableUnicodeString(),
TestUtil.randomSimpleString(random(), 1, 1)));
}
if (useField()) {
@ -908,8 +900,7 @@ public class CursorPagingTest extends SolrTestCaseJ4 {
doc.addField("bin", ByteBuffer.wrap(randBytes));
}
if (useField()) {
doc.addField("date", skewed(randomDate(),
dateWithRandomSecondOn2010_10_31_at_10_31()));
doc.addField("date", skewed(randomDate(), randomSkewedDate()));
}
if (useField()) {
doc.addField("uuid", UUID.randomUUID().toString());
@ -949,28 +940,6 @@ public class CursorPagingTest extends SolrTestCaseJ4 {
}
}
/**
* We want "realistic" unicode strings beyond simple ascii, but because our
* updates use XML we need to ensure we don't get "special" code block.
*/
private static String randomUsableUnicodeString() {
String result = TestUtil.randomRealisticUnicodeString(random());
if (result.matches(".*\\p{InSpecials}.*")) {
// oh well
result = TestUtil.randomSimpleString(random());
}
return result;
}
private static String randomDate() {
return TrieDateField.formatExternal(new Date(random().nextLong()));
}
private static String dateWithRandomSecondOn2010_10_31_at_10_31() {
return String.format(Locale.ROOT, "2010-10-31T10:31:%02d.000Z",
TestUtil.nextInt(random(), 0, 59));
}
private static final String[] currencies = { "USD", "EUR", "NOK" };
public static String randomCurrency() {

View File

@ -61,6 +61,7 @@ public class TestDistributedSearch extends BaseDistributedSearchTestCase {
@Override
public void doTest() throws Exception {
QueryResponse rsp = null;
int backupStress = stress; // make a copy so we can restore
@ -174,6 +175,13 @@ public class TestDistributedSearch extends BaseDistributedSearchTestCase {
// a facet query to test out chars out of the ascii range
query("q","*:*", "rows",0, "facet","true", "facet.query","{!term f=foo_s}international\u00ff\u01ff\u2222\u3333");
// simple field facet on date fields
rsp = query("q","*:*", "rows",0, "facet","true", "facet.field", tdate_a);
assertEquals(1, rsp.getFacetFields().size());
rsp = query("q","*:*", "rows",0, "facet","true",
"facet.field", tdate_b, "facet.field", tdate_a);
assertEquals(2, rsp.getFacetFields().size());
// simple date facet on one field
query("q","*:*", "rows",100, "facet","true",
@ -337,7 +345,7 @@ public class TestDistributedSearch extends BaseDistributedSearchTestCase {
q.set("q", "*:*");
q.set(ShardParams.SHARDS_INFO, true);
setDistributedParams(q);
QueryResponse rsp = queryServer(q);
rsp = queryServer(q);
NamedList<?> sinfo = (NamedList<?>) rsp.getResponse().get(ShardParams.SHARDS_INFO);
String shards = getShardsString();
int cnt = StringUtils.countMatches(shards, ",")+1;

View File

@ -0,0 +1,530 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.solr.cloud;
import org.apache.lucene.util.TestUtil;
import org.apache.solr.SolrTestCaseJ4.SuppressSSL;
import org.apache.solr.client.solrj.SolrServerException;
import org.apache.solr.client.solrj.response.QueryResponse;
import org.apache.solr.client.solrj.response.PivotField;
import org.apache.solr.common.SolrInputDocument;
import org.apache.solr.common.util.NamedList;
import org.apache.solr.common.params.SolrParams;
import org.apache.solr.common.params.ModifiableSolrParams;
import org.apache.solr.schema.TrieDateField;
import static org.apache.solr.common.params.FacetParams.*;
import org.apache.commons.lang.StringUtils;
import org.junit.BeforeClass;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.util.Arrays;
import java.util.Set;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Date;
/**
* <p>
* Randomized testing of Pivot Faceting using SolrCloud.
* </p>
* <p>
* After indexing a bunch of random docs, picks some random fields to pivot facet on,
* and then confirms that the resulting counts match the results of filtering on those
* values. This gives us strong assertions on the correctness of the total counts for
* each pivot value, but no assertions that the correct "top" counts were chosen.
* </p>
* <p>
* NOTE: this test ignores the control collection and only deals with the
* CloudSolrServer - this is because the randomized field values make it very easy for
* the term stats to miss values even with the overrequest.
* (because so many values will tie for "1"). What we care about here is
* that the counts we get back are correct and match what we get when filtering on those
* constraints.
* </p>
*
*
*
*/
@SuppressSSL // Too Slow
public class TestCloudPivotFacet extends AbstractFullDistribZkTestBase {
public static Logger log = LoggerFactory.getLogger(TestCloudPivotFacet.class);
// param used by test purely for tracing & validation
private static String TRACE_MIN = "_test_min";
// param used by test purely for tracing & validation
private static String TRACE_MISS = "_test_miss";
// param used by test purely for tracing & validation
private static String TRACE_SORT = "_test_sort";
/**
* Controls the odds of any given doc having a value in any given field -- as this gets lower,
* the counts for "facet.missing" pivots should increase.
* @see #useField()
*/
private static int useFieldRandomizedFactor = -1;
@BeforeClass
public static void initUseFieldRandomizedFactor() {
useFieldRandomizedFactor = TestUtil.nextInt(random(), 2, 30);
log.info("init'ing useFieldRandomizedFactor = {}", useFieldRandomizedFactor);
}
@Override
public void doTest() throws Exception {
handle.clear();
handle.put("QTime", SKIPVAL);
handle.put("timestamp", SKIPVAL);
final Set<String> fieldNameSet = new HashSet<>();
// build up a randomized index
final int numDocs = atLeast(500);
log.info("numDocs: {}", numDocs);
for (int i = 1; i <= numDocs; i++) {
SolrInputDocument doc = buildRandomDocument(i);
// not efficient, but it garuntees that even if people change buildRandomDocument
// we'll always have the full list of fields w/o needing to keep code in sync
fieldNameSet.addAll(doc.getFieldNames());
cloudClient.add(doc);
}
cloudClient.commit();
fieldNameSet.remove("id");
assertTrue("WTF, bogus field exists?", fieldNameSet.add("bogus_not_in_any_doc_s"));
final String[] fieldNames = fieldNameSet.toArray(new String[fieldNameSet.size()]);
Arrays.sort(fieldNames); // need determinism for buildRandomPivot calls
for (int i = 0; i < 5; i++) {
String q = "*:*";
if (random().nextBoolean()) {
q = "id:[* TO " + TestUtil.nextInt(random(),300,numDocs) + "]";
}
ModifiableSolrParams baseP = params("rows", "0", "q", q);
if (random().nextBoolean()) {
baseP.add("fq", "id:[* TO " + TestUtil.nextInt(random(),200,numDocs) + "]");
}
ModifiableSolrParams pivotP = params(FACET,"true",
FACET_PIVOT, buildRandomPivot(fieldNames));
if (random().nextBoolean()) {
pivotP.add(FACET_PIVOT, buildRandomPivot(fieldNames));
}
// keep limit low - lots of unique values, and lots of depth in pivots
pivotP.add(FACET_LIMIT, ""+TestUtil.nextInt(random(),1,17));
// sometimes use an offset
if (random().nextBoolean()) {
pivotP.add(FACET_OFFSET, ""+TestUtil.nextInt(random(),0,7));
}
if (random().nextBoolean()) {
String min = ""+TestUtil.nextInt(random(),0,numDocs+10);
pivotP.add(FACET_PIVOT_MINCOUNT, min);
// trace param for validation
baseP.add(TRACE_MIN, min);
}
if (random().nextBoolean()) {
String missing = ""+random().nextBoolean();
pivotP.add(FACET_MISSING, missing);
// trace param for validation
baseP.add(TRACE_MISS, missing);
}
if (random().nextBoolean()) {
String sort = random().nextBoolean() ? "index" : "count";
pivotP.add(FACET_SORT, sort);
// trace param for validation
baseP.add(TRACE_SORT, sort);
}
// overrequest
//
// NOTE: since this test focuses on accuracy of refinement, and doesn't do
// control collection comparisons, there isn't a lot of need for excessive
// overrequesting -- we focus here on trying to exercise the various edge cases
// involved as different values are used with overrequest
if (0 == TestUtil.nextInt(random(),0,4)) {
// we want a decent chance of no overrequest at all
pivotP.add(FACET_OVERREQUEST_COUNT, "0");
pivotP.add(FACET_OVERREQUEST_RATIO, "0");
} else {
if (random().nextBoolean()) {
pivotP.add(FACET_OVERREQUEST_COUNT, ""+TestUtil.nextInt(random(),0,5));
}
if (random().nextBoolean()) {
// sometimes give a ratio less then 1, code should be smart enough to deal
float ratio = 0.5F + random().nextFloat();
// sometimes go negative
if (random().nextBoolean()) {
ratio *= -1;
}
pivotP.add(FACET_OVERREQUEST_RATIO, ""+ratio);
}
}
assertPivotCountsAreCorrect(baseP, pivotP);
}
}
/**
* Given some query params, executes the request against the cloudClient and
* then walks the pivot facet values in the response, treating each one as a
* filter query to assert the pivot counts are correct.
*/
private void assertPivotCountsAreCorrect(SolrParams baseParams,
SolrParams pivotParams)
throws SolrServerException {
SolrParams initParams = SolrParams.wrapAppended(pivotParams, baseParams);
log.info("Doing full run: {}", initParams);
countNumFoundChecks = 0;
NamedList<List<PivotField>> pivots = null;
try {
QueryResponse initResponse = cloudClient.query(initParams);
pivots = initResponse.getFacetPivot();
assertNotNull(initParams + " has null pivots?", pivots);
assertEquals(initParams + " num pivots",
initParams.getParams("facet.pivot").length, pivots.size());
} catch (Exception e) {
throw new RuntimeException("init query failed: " + initParams + ": " +
e.getMessage(), e);
}
try {
for (Map.Entry<String,List<PivotField>> pivot : pivots) {
final String pivotKey = pivot.getKey();
// :HACK: for counting the max possible pivot depth
final int maxDepth = 1 + pivotKey.length() - pivotKey.replace(",","").length();
assertTraceOk(pivotKey, baseParams, pivot.getValue());
// NOTE: we can't make any assumptions/assertions about the number of
// constraints here because of the random data - which means if pivotting is
// completely broken and there are no constrains this loop could be a No-Op
// but in that case we just have to trust that DistributedFacetPivotTest
// will catch it.
for (PivotField constraint : pivot.getValue()) {
int depth = assertPivotCountsAreCorrect(pivotKey, baseParams, constraint);
// we can't assert that the depth reached is the same as the depth requested
// because the fq and/or mincount may have pruned the tree too much
assertTrue("went too deep: "+depth+": " + pivotKey + " ==> " + pivot,
depth <= maxDepth);
}
}
} catch (AssertionError e) {
throw new AssertionError(initParams + " ==> " + e.getMessage(), e);
} finally {
log.info("Ending full run (countNumFoundChecks={}): {}",
countNumFoundChecks, initParams);
}
}
/**
* Recursive Helper method for asserting that pivot constraint counds match
* results when filtering on those constraints. Returns the recursive depth reached
* (for sanity checking)
*/
private int assertPivotCountsAreCorrect(String pivotName,
SolrParams baseParams,
PivotField constraint)
throws SolrServerException {
SolrParams p = SolrParams.wrapAppended(baseParams,
params("fq", buildFilter(constraint)));
List<PivotField> subPivots = null;
try {
assertNumFound(pivotName, constraint.getCount(), p);
subPivots = constraint.getPivot();
} catch (Exception e) {
throw new RuntimeException(pivotName + ": count query failed: " + p + ": " +
e.getMessage(), e);
}
int depth = 0;
if (null != subPivots) {
assertTraceOk(pivotName, baseParams, subPivots);
for (PivotField subPivot : subPivots) {
depth = assertPivotCountsAreCorrect(pivotName, p, subPivot);
}
}
return depth + 1;
}
/**
* Verify that the PivotFields we're lookin at doesn't violate any of the expected
* behaviors based on the <code>TRACE_*</code> params found in the base params
*/
private void assertTraceOk(String pivotName, SolrParams baseParams, List<PivotField> constraints) {
if (null == constraints || 0 == constraints.size()) {
return;
}
final int maxIdx = constraints.size() - 1;
final int min = baseParams.getInt(TRACE_MIN, -1);
final boolean expectMissing = baseParams.getBool(TRACE_MISS, false);
final boolean checkCount = "count".equals(baseParams.get(TRACE_SORT, "count"));
int prevCount = Integer.MAX_VALUE;
for (int i = 0; i <= maxIdx; i++) {
final PivotField constraint = constraints.get(i);
final int count = constraint.getCount();
if (0 < min) {
assertTrue(pivotName + ": val #"+i +" of " + maxIdx +
": count("+count+") < facet.mincount("+min+"): " + constraint,
min <= count);
}
// missing value must always come last, but only if facet.missing was used
// and may not exist at all (mincount, none missing for this sub-facet, etc...)
if ((i < maxIdx) || (!expectMissing)) {
assertNotNull(pivotName + ": val #"+i +" of " + maxIdx +
" has null value: " + constraint,
constraint.getValue());
}
// if we are expecting count based sort, then the count of each constraint
// must be lt-or-eq the count that came before -- or it must be the last value and
// be "missing"
if (checkCount) {
assertTrue(pivotName + ": val #"+i +" of" + maxIdx +
": count("+count+") > prevCount("+prevCount+"): " + constraint,
((count <= prevCount)
|| (expectMissing && i == maxIdx && null == constraint.getValue())));
prevCount = count;
}
}
}
/**
* Given a PivotField constraint, generate a query for the field+value
* for use in an <code>fq</code> to verify the constraint count
*/
private static String buildFilter(PivotField constraint) {
Object value = constraint.getValue();
if (null == value) {
// facet.missing, exclude any indexed term
return "-" + constraint.getField() + ":[* TO *]";
}
// otherwise, build up a term filter...
String prefix = "{!term f=" + constraint.getField() + "}";
if (value instanceof Date) {
return prefix + TrieDateField.formatExternal((Date)value);
} else {
return prefix + value;
}
}
/**
* Creates a random facet.pivot param string using some of the specified fieldNames
*/
private static String buildRandomPivot(String[] fieldNames) {
final int depth = TestUtil.nextInt(random(), 1, 3);
String [] fields = new String[depth];
for (int i = 0; i < depth; i++) {
// yes this means we might use the same field twice
// makes it a robust test (especially for multi-valued fields)
fields[i] = fieldNames[TestUtil.nextInt(random(),0,fieldNames.length-1)];
}
return StringUtils.join(fields, ",");
}
/**
* Creates a document with randomized field values, some of which be missing values,
* some of which will be multi-valued (per the schema) and some of which will be
* skewed so that small subsets of the ranges will be more common (resulting in an
* increased likelihood of duplicate values)
*
* @see #buildRandomPivot
*/
private static SolrInputDocument buildRandomDocument(int id) {
SolrInputDocument doc = sdoc("id", id);
// most fields are in most docs
// if field is in a doc, then "skewed" chance val is from a dense range
// (hopefully with lots of duplication)
for (String prefix : new String[] { "pivot_i", "pivot_ti" }) {
if (useField()) {
doc.addField(prefix+"1", skewed(TestUtil.nextInt(random(), 20, 50),
random().nextInt()));
}
if (useField()) {
int numMulti = atLeast(1);
while (0 < numMulti--) {
doc.addField(prefix, skewed(TestUtil.nextInt(random(), 20, 50),
random().nextInt()));
}
}
}
for (String prefix : new String[] { "pivot_l", "pivot_tl" }) {
if (useField()) {
doc.addField(prefix+"1", skewed(TestUtil.nextInt(random(), 5000, 5100),
random().nextLong()));
}
if (useField()) {
int numMulti = atLeast(1);
while (0 < numMulti--) {
doc.addField(prefix, skewed(TestUtil.nextInt(random(), 5000, 5100),
random().nextLong()));
}
}
}
for (String prefix : new String[] { "pivot_f", "pivot_tf" }) {
if (useField()) {
doc.addField(prefix+"1", skewed(1.0F / random().nextInt(13),
random().nextFloat() * random().nextInt()));
}
if (useField()) {
int numMulti = atLeast(1);
while (0 < numMulti--) {
doc.addField(prefix, skewed(1.0F / random().nextInt(13),
random().nextFloat() * random().nextInt()));
}
}
}
for (String prefix : new String[] { "pivot_d", "pivot_td" }) {
if (useField()) {
doc.addField(prefix+"1", skewed(1.0D / random().nextInt(19),
random().nextDouble() * random().nextInt()));
}
if (useField()) {
int numMulti = atLeast(1);
while (0 < numMulti--) {
doc.addField(prefix, skewed(1.0D / random().nextInt(19),
random().nextDouble() * random().nextInt()));
}
}
}
for (String prefix : new String[] { "pivot_dt", "pivot_tdt" }) {
if (useField()) {
doc.addField(prefix+"1", skewed(randomSkewedDate(), randomDate()));
}
if (useField()) {
int numMulti = atLeast(1);
while (0 < numMulti--) {
doc.addField(prefix, skewed(randomSkewedDate(), randomDate()));
}
}
}
{
String prefix = "pivot_b";
if (useField()) {
doc.addField(prefix+"1", random().nextBoolean() ? "t" : "f");
}
if (useField()) {
int numMulti = atLeast(1);
while (0 < numMulti--) {
doc.addField(prefix, random().nextBoolean() ? "t" : "f");
}
}
}
for (String prefix : new String[] { "pivot_x_s", "pivot_y_s", "pivot_z_s"}) {
if (useField()) {
doc.addField(prefix+"1", skewed(TestUtil.randomSimpleString(random(), 1, 1),
randomXmlUsableUnicodeString()));
}
if (useField()) {
int numMulti = atLeast(1);
while (0 < numMulti--) {
doc.addField(prefix, skewed(TestUtil.randomSimpleString(random(), 1, 1),
randomXmlUsableUnicodeString()));
}
}
}
//
// for the remaining fields, make every doc have a value in a dense range
//
for (String prefix : new String[] { "dense_pivot_x_s", "dense_pivot_y_s" }) {
if (useField()) {
doc.addField(prefix+"1", TestUtil.randomSimpleString(random(), 1, 1));
}
if (useField()) {
int numMulti = atLeast(1);
while (0 < numMulti--) {
doc.addField(prefix, TestUtil.randomSimpleString(random(), 1, 1));
}
}
}
for (String prefix : new String[] { "dense_pivot_i", "dense_pivot_ti" }) {
if (useField()) {
doc.addField(prefix+"1", TestUtil.nextInt(random(), 20, 50));
}
if (useField()) {
int numMulti = atLeast(1);
while (0 < numMulti--) {
doc.addField(prefix, TestUtil.nextInt(random(), 20, 50));
}
}
}
return doc;
}
/**
* Similar to usually() but we want it to happen just as often regardless
* of test multiplier and nightly status
*
* @see #useFieldRandomizedFactor
*/
private static boolean useField() {
assert 0 < useFieldRandomizedFactor;
return 0 != TestUtil.nextInt(random(), 0, useFieldRandomizedFactor);
}
/**
* Asserts the number of docs matching the SolrParams aganst the cloudClient
*/
private void assertNumFound(String msg, int expected, SolrParams p)
throws SolrServerException {
countNumFoundChecks++;
SolrParams params = SolrParams.wrapDefaults(params("rows","0"), p);
assertEquals(msg + ": " + params,
expected, cloudClient.query(params).getResults().getNumFound());
}
/**
* @see #assertNumFound
* @see #assertPivotCountsAreCorrect(SolrParams,SolrParams)
*/
private int countNumFoundChecks = 0;
}

View File

@ -0,0 +1,762 @@
package org.apache.solr.handler.component;
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import java.util.Date;
import java.util.List;
import java.io.IOException;
import org.apache.solr.BaseDistributedSearchTestCase;
import org.apache.solr.client.solrj.SolrServer;
import org.apache.solr.client.solrj.SolrServerException;
import org.apache.solr.client.solrj.response.PivotField;
import org.apache.solr.client.solrj.response.QueryResponse;
import org.apache.solr.common.SolrInputDocument;
import org.apache.solr.common.params.FacetParams;
import org.apache.solr.common.params.SolrParams;
import junit.framework.AssertionFailedError;
public class DistributedFacetPivotLargeTest extends BaseDistributedSearchTestCase {
public static final String SPECIAL = "";
public DistributedFacetPivotLargeTest() {
this.fixShardCount = true;
this.shardCount = 4; // we leave one empty as an edge case
}
@Override
public void doTest() throws Exception {
this.stress = 0 ;
handle.clear();
handle.put("QTime", SKIPVAL);
handle.put("timestamp", SKIPVAL);
handle.put("maxScore", SKIPVAL);
setupDistributedPivotFacetDocuments();
QueryResponse rsp = null;
List<PivotField> pivots = null;
PivotField firstInt = null;
PivotField firstBool = null;
PivotField firstDate = null;
PivotField firstPlace = null;
PivotField firstCompany = null;
// basic check w/ limit & default sort (count)
rsp = query( "q", "*:*",
"rows", "0",
"facet","true",
"facet.pivot","place_s,company_t",
FacetParams.FACET_LIMIT, "12");
pivots = rsp.getFacetPivot().get("place_s,company_t");
assertEquals(12, pivots.size());
firstPlace = pivots.get(0);
assertPivot("place_s", "cardiff", 257, firstPlace);
assertPivot("company_t", "bbc", 101, firstPlace.getPivot().get(0));
// Microsoft will come back wrong if refinement was not done correctly
assertPivot("company_t", "microsoft", 56, firstPlace.getPivot().get(1));
// trivial mincount=0 check
rsp = query( "q", "does_not_exist_s:foo",
"rows", "0",
"facet","true",
"facet.pivot","company_t",
FacetParams.FACET_LIMIT, "10",
FacetParams.FACET_PIVOT_MINCOUNT,"0");
pivots = rsp.getFacetPivot().get("company_t");
assertEquals(10, pivots.size());
for (PivotField p : pivots) {
assertEquals(0, p.getCount());
}
// sanity check limit=0 w/ mincount=0 & missing=true
//
// SOLR-6328: doesn't work for single node, so can't work for distrib either (yet)
//
// PivotFacetField's init of needRefinementAtThisLevel as needing potential change
//
// rsp = query( "q", "*:*",
// "rows", "0",
// "facet","true",
// "f.company_t.facet.limit", "10",
// "facet.pivot","special_s,bogus_s,company_t",
// "facet.missing", "true",
// FacetParams.FACET_LIMIT, "0",
// FacetParams.FACET_PIVOT_MINCOUNT,"0");
// pivots = rsp.getFacetPivot().get("special_s,bogus_s,company_t");
// assertEquals(1, pivots.size()); // only the missing
// assertPivot("special_s", null, docNumber - 5, pivots.get(0)); // 5 docs w/special_s
// assertEquals(pivots.toString(), 1, pivots.get(0).getPivot());
// assertPivot("bogus_s", null, docNumber, pivots.get(0).getPivot().get(0));
// // TODO: some asserts on company results
// basic check w/ default sort, limit, & mincount==0
rsp = query( "q", "*:*",
"rows", "0",
"facet","true",
"facet.pivot","place_s,company_t",
FacetParams.FACET_LIMIT, "50",
FacetParams.FACET_PIVOT_MINCOUNT,"0");
pivots = rsp.getFacetPivot().get("place_s,company_t");
assertEquals(50, pivots.size());
firstPlace = pivots.get(0);
assertPivot("place_s", "cardiff", 257, firstPlace);
assertPivot("company_t", "bbc", 101, firstPlace.getPivot().get(0));
// Microsoft will come back wrong if refinement was not done correctly
assertPivot("company_t", "microsoft", 56, firstPlace.getPivot().get(1));
// sort=index + offset + limit w/ some variables
for (SolrParams variableParams :
new SolrParams[] { // bother variations should kwrk just as well
// defauts
params(),
// force refinement
params(FacetParams.FACET_OVERREQUEST_RATIO, "1",
FacetParams.FACET_OVERREQUEST_COUNT, "0") }) {
SolrParams p = SolrParams.wrapDefaults( params( "q", "*:*",
"rows", "0",
"facet","true",
"facet.sort","index",
"f.place_s.facet.limit", "20",
"f.place_s.facet.offset", "40",
"facet.pivot", "place_s,company_t"),
variableParams );
try {
rsp = query( p );
pivots = rsp.getFacetPivot().get("place_s,company_t");
assertEquals(20, pivots.size()); // limit
for (int i = 0; i < 10; i++) {
PivotField place = pivots.get(i);
assertTrue(place.toString(), place.getValue().toString().endsWith("placeholder"));
assertEquals(3, place.getPivot().size());
assertPivot("company_t", "bbc", 6, place.getPivot().get(0));
assertPivot("company_t", "microsoft", 6, place.getPivot().get(1));
assertPivot("company_t", "polecat", 6, place.getPivot().get(2));
}
assertPivot("place_s", "cardiff", 257, pivots.get(10));
assertPivot("place_s", "krakaw", 1, pivots.get(11));
assertPivot("place_s", "medical staffing network holdings, inc.", 51, pivots.get(12));
for (int i = 13; i < 20; i++) {
PivotField place = pivots.get(i);
assertTrue(place.toString(), place.getValue().toString().startsWith("placeholder"));
assertEquals(1, place.getPivot().size());
PivotField company = place.getPivot().get(0);
assertTrue(company.toString(), company.getValue().toString().startsWith("compholder"));
assertEquals(company.toString(), 1, company.getCount());
}
} catch (AssertionFailedError ae) {
throw new AssertionError(ae.getMessage() + " <== " + p.toString(), ae);
}
}
// sort=index + mincount=0
//
// SOLR-6329: facet.pivot.mincount=0 doesn't work well with distrib
//
// broken honda
//
// This is tricky, here's what i think is happening....
// - "company:honda" only exists on twoShard, and only w/ "place:cardiff"
// - twoShard has no other places in it's docs
// - twoShard can't return any other places to w/ honda as a count=0 sub-value
// - if we refined all other companies places, would twoShard return honda==0 ?
// ... but there's no refinement since mincount==0
// - would it even matter
//
// should we remove the refinement short circut?
//
// rsp = query( params( "q", "*:*",
// "rows", "0",
// "facet","true",
// "facet.sort","index",
// "f.place_s.facet.limit", "20",
// "f.place_s.facet.offset", "40",
// FacetParams.FACET_PIVOT_MINCOUNT,"0",
// "facet.pivot", "place_s,company_t") );
// // TODO: more asserts
//
//
// really trivial demonstration of the above problem
//
// rsp = query( params( "q", "*:*",
// "rows", "0",
// "facet","true",
// FacetParams.FACET_PIVOT_MINCOUNT,"0",
// "facet.pivot", "top_s,sub_s") );
// basic check w/ limit & index sort
for (SolrParams facetParams :
// results should be the same regardless of wether local params are used
new SolrParams[] {
// Broken: SOLR-6193
// params("facet.pivot","{!facet.limit=4 facet.sort=index}place_s,company_t"),
// params("facet.pivot","{!facet.sort=index}place_s,company_t",
// FacetParams.FACET_LIMIT, "4"),
params("facet.pivot","place_s,company_t",
FacetParams.FACET_LIMIT, "4",
"facet.sort", "index") }) {
SolrParams p = SolrParams.wrapDefaults( params( "q", "*:*",
"rows", "0",
"facet","true"),
facetParams );
try {
rsp = query( p );
pivots = rsp.getFacetPivot().get("place_s,company_t");
assertEquals(4, pivots.size());
firstPlace = pivots.get(0);
assertPivot("place_s", "0placeholder", 6, firstPlace);
firstCompany = firstPlace.getPivot().get(0);
assertPivot("company_t", "bbc", 6, firstCompany);
} catch (AssertionFailedError ae) {
throw new AssertionError(ae.getMessage() + " <== " + p.toString(), ae);
}
}
// Pivot Faceting (combined wtih Field Faceting)
for (SolrParams facetParams :
// with and w/o an excluded fq
// (either way, facet results should be the same)
new SolrParams[] {
params("facet.pivot","place_s,company_t",
"facet.field","place_s"),
params("facet.pivot","{!ex=ok}place_s,company_t",
"facet.field","{!ex=ok}place_s",
"fq","{!tag=ok}place_s:cardiff"),
params("facet.pivot","{!ex=pl,co}place_s,company_t",
"fq","{!tag=pl}place_s:cardiff",
"fq","{!tag=co}company_t:bbc") }) {
// default order (count)
rsp = query( SolrParams.wrapDefaults(params("q", "*:*",
"rows", "0",
"facet","true",
FacetParams.FACET_LIMIT, "4"),
facetParams) );
pivots = rsp.getFacetPivot().get("place_s,company_t");
assertEquals(4, pivots.size());
firstPlace = pivots.get(0);
assertPivot("place_s", "cardiff", 257, firstPlace);
assertEquals(4, firstPlace.getPivot().size());
firstCompany = firstPlace.getPivot().get(0);
assertPivot("company_t", "bbc", 101, firstCompany);
// Index Order
rsp = query( SolrParams.wrapDefaults(params("q", "*:*",
"rows", "0",
"facet","true",
FacetParams.FACET_LIMIT, "4",
"facet.sort", "index"),
facetParams) );
pivots = rsp.getFacetPivot().get("place_s,company_t");
assertEquals(4, pivots.size());
firstPlace = pivots.get(0);
assertPivot("place_s", "0placeholder", 6, firstPlace);
assertEquals(3, firstPlace.getPivot().size()); // num vals in data < limit==3
firstCompany = firstPlace.getPivot().get(0);
assertPivot("company_t", "bbc", 6, firstCompany);
// Field level limits
rsp = query( SolrParams.wrapDefaults(params("q", "*:*",
"rows", "0",
"facet","true",
"f.place_s.facet.limit","2",
"f.company_t.facet.limit","4"),
facetParams) );
pivots = rsp.getFacetPivot().get("place_s,company_t");
assertEquals(2, pivots.size());
firstPlace = pivots.get(0);
assertPivot("place_s", "cardiff", 257, firstPlace);
assertEquals(4, firstPlace.getPivot().size());
firstCompany = firstPlace.getPivot().get(0);
assertPivot("company_t", "bbc", 101, firstCompany);
}
// Pivot Faceting Count w/fq (not excluded)
rsp = query( "q", "*:*",
"rows", "0",
"fq","place_s:cardiff",
"facet","true",
"facet.pivot","place_s,company_t",
FacetParams.FACET_LIMIT, "4");
pivots = rsp.getFacetPivot().get("place_s,company_t");
assertEquals(1, pivots.size());
firstPlace = pivots.get(0);
assertPivot("place_s", "cardiff", 257, firstPlace);
assertEquals(4, firstPlace.getPivot().size());
firstCompany = firstPlace.getPivot().get(0);
assertPivot("company_t", "bbc", 101, firstCompany);
// Same Pivot - one with exclusion and one w/o
rsp = query( "q", "*:*",
"rows", "0",
"fq","{!tag=ff}pay_i:[2000 TO *]",
"facet","true",
"facet.pivot","{!key=filt}place_s,company_t",
"facet.pivot","{!key=nofilt ex=ff}place_s,company_t",
FacetParams.FACET_LIMIT, "4");
pivots = rsp.getFacetPivot().get("filt");
assertEquals(4, pivots.size());
firstPlace = pivots.get(0);
assertPivot("place_s", "cardiff", 105, firstPlace);
assertEquals(4, firstPlace.getPivot().size());
assertPivot("company_t", "bbc", 101, firstPlace.getPivot().get(0));
assertPivot("company_t", "microsoft", 54, firstPlace.getPivot().get(1));
//
pivots = rsp.getFacetPivot().get("nofilt");
assertEquals(4, pivots.size());
firstPlace = pivots.get(0);
assertPivot("place_s", "cardiff", 257, firstPlace);
assertEquals(4, firstPlace.getPivot().size());
assertPivot("company_t", "bbc", 101, firstPlace.getPivot().get(0));
assertPivot("company_t", "microsoft", 56, firstPlace.getPivot().get(1));
// Same Pivot - one in default (count) order and one in index order
//
// Broken: SOLR-6193 - the facet.sort localparam isn't being picked up correctly
//
// rsp = query( "q", "*:*",
// "rows", "0",
// "facet","true",
// "fq","pay_i:[2000 TO *]",
// "facet.pivot","{!key=sc}place_s,company_t",
// "facet.pivot","{!key=si facet.sort=index}place_s,company_t",
// FacetParams.FACET_LIMIT, "4");
// pivots = rsp.getFacetPivot().get("sc");
// assertEquals(4, pivots.size());
// firstPlace = pivots.get(0);
// assertPivot("place_s", "cardiff", 105, firstPlace);
// assertEquals(4, firstPlace.getPivot().size());
// assertPivot("company_t", "bbc", 101, firstPlace.getPivot().get(0));
// assertPivot("company_t", "microsoft", 54, firstPlace.getPivot().get(1));
// //
// pivots = rsp.getFacetPivot().get("si");
// assertEquals(4, pivots.size());
// firstPlace = pivots.get(0);
// assertPivot("place_s", "0placeholder", 6, firstPlace);
// assertEquals(3, firstPlace.getPivot().size()); // only 3 in the data < facet.limit
// assertPivot("company_t", "bbc", 6, firstPlace.getPivot().get(0));
// assertPivot("company_t", "microsoft", 6, firstPlace.getPivot().get(1));
// Field level limits and small offset
rsp = query( "q", "*:*",
"rows", "0",
"facet","true",
"facet.pivot","place_s,company_t",
"f.place_s.facet.limit","2",
"f.company_t.facet.limit","4",
"facet.offset","1");
pivots = rsp.getFacetPivot().get("place_s,company_t");
assertEquals(2, pivots.size());
firstPlace = pivots.get(0);
assertPivot("place_s", "medical staffing network holdings, inc.", 51, firstPlace);
assertEquals(2, firstPlace.getPivot().size()); // num vals in data < limit==4
firstCompany = firstPlace.getPivot().get(0);
assertPivot("company_t", "bbc", 50, firstCompany);
// Field level offsets and limit
rsp = query( "q", "*:*",
"rows", "0",
"fq","{!tag=pl}place_s:cardiff",
"facet","true",
"facet.pivot","{!ex=pl}place_s,company_t",
"f.place_s.facet.offset","1",
"f.company_t.facet.offset","2",
FacetParams.FACET_LIMIT, "4");
pivots = rsp.getFacetPivot().get("place_s,company_t");
assertEquals(4, pivots.size());
firstPlace = pivots.get(0);
assertPivot("place_s", "medical staffing network holdings, inc.", 51, firstPlace);
assertEquals(1, firstPlace.getPivot().size()); // num vals in data < limit==4
firstCompany = firstPlace.getPivot().get(0);
assertPivot("company_t", "polecat", 50, firstCompany);
// datetime
rsp = query( "q", "*:*",
"rows", "0",
"facet","true",
"facet.pivot","hiredate_dt,place_s,company_t",
"f.hiredate_dt.facet.limit","2",
"f.hiredate_dt.facet.offset","1",
FacetParams.FACET_LIMIT, "4");
pivots = rsp.getFacetPivot().get("hiredate_dt,place_s,company_t");
assertEquals(2, pivots.size());
firstDate = pivots.get(0); // 2012-09-01T12:30:00Z
assertPivot("hiredate_dt", new Date(1346502600000L), 200, firstDate);
assertEquals(1, firstDate.getPivot().size()); // num vals in data < limit==4
firstPlace = firstDate.getPivot().get(0);
assertPivot("place_s", "cardiff", 200, firstPlace);
assertEquals(4, firstPlace.getPivot().size());
firstCompany = firstPlace.getPivot().get(0);
assertPivot("company_t", "bbc", 50, firstCompany);
// int
rsp = query( "q", "*:*",
"rows", "0",
"facet","true",
"facet.pivot","pay_i,place_s,company_t",
"f.pay_i.facet.limit","2",
"f.pay_i.facet.offset","1",
FacetParams.FACET_LIMIT, "4");
pivots = rsp.getFacetPivot().get("pay_i,place_s,company_t");
assertEquals(2, pivots.size());
firstInt = pivots.get(0);
assertPivot("pay_i", 2000, 50, firstInt);
assertEquals(4, firstInt.getPivot().size());
firstPlace = firstInt.getPivot().get(0);
assertPivot("place_s", "0placeholder", 1, firstPlace);
assertEquals(3, firstPlace.getPivot().size());
firstCompany = firstPlace.getPivot().get(0);
assertPivot("company_t", "bbc", 1, firstCompany);
// boolean
rsp = query( "q", "*:*",
"rows", "0",
"facet","true",
"facet.pivot","real_b,place_s,company_t",
"f.real_b.facet.missing","true",
"f.real_b.facet.limit","2",
FacetParams.FACET_LIMIT, "4");
pivots = rsp.getFacetPivot().get("real_b,place_s,company_t");
assertEquals(3, pivots.size());
firstBool = pivots.get(0);
assertPivot("real_b", false, 300, firstBool);
assertEquals(4, firstBool.getPivot().size());
firstPlace = firstBool.getPivot().get(0);
assertPivot("place_s", "0placeholder", 6, firstPlace);
assertEquals(3, firstPlace.getPivot().size());
firstCompany = firstPlace.getPivot().get(0);
assertPivot("company_t", "bbc", 6, firstCompany);
// bogus fields
rsp = query( "q", "*:*",
"rows", "0",
"facet","true",
"facet.pivot","doesntexist_t,neitherdoi_i",
FacetParams.FACET_LIMIT, "4");
pivots = rsp.getFacetPivot().get("doesntexist_t,neitherdoi_i");
assertEquals(0, pivots.size());
// bogus fields with facet.missing
rsp = query( "q", "*:*",
"rows", "0",
"facet","true",
"facet.pivot","doesntexist_t,neitherdoi_i",
"facet.missing", "true",
FacetParams.FACET_LIMIT, "4");
pivots = rsp.getFacetPivot().get("doesntexist_t,neitherdoi_i");
assertEquals(1, pivots.size());
assertPivot("doesntexist_t", null, docNumber, pivots.get(0));
assertEquals(1, pivots.get(0).getPivot().size());
assertPivot("neitherdoi_i", null, docNumber, pivots.get(0).getPivot().get(0));
// Negative facet limit
for (SolrParams facetParams :
// results should be the same regardless of wether facet.limit is global,
// a local param, or specified as a per-field override for both fields
new SolrParams[] {
params(FacetParams.FACET_LIMIT, "-1",
"facet.pivot","place_s,company_t"),
// Broken: SOLR-6193
// params("facet.pivot","{!facet.limit=-1}place_s,company_t"),
params("f.place_s.facet.limit", "-1",
"f.company_t.facet.limit", "-1",
"facet.pivot","place_s,company_t") }) {
SolrParams p = SolrParams.wrapDefaults( params( "q", "*:*",
"rows", "0",
"facet","true",
"facet.sort", "count" ),
facetParams);
try {
rsp = query( p );
pivots = rsp.getFacetPivot().get("place_s,company_t");
assertEquals(103, pivots.size());
firstPlace = pivots.get(0);
assertPivot("place_s", "cardiff", 257, firstPlace);
assertEquals(54, firstPlace.getPivot().size());
firstCompany = firstPlace.getPivot().get(0);
assertPivot("company_t","bbc", 101, firstCompany);
} catch (AssertionFailedError ae) {
throw new AssertionError(ae.getMessage() + " <== " + p.toString(), ae);
}
}
// Negative per-field facet limit (outer)
for (SolrParams facetParams :
// results should be the same regardless of wether per-field facet.limit is
// a global or a local param
new SolrParams[] {
// Broken: SOLR-6193
// params( "facet.pivot","{!f.id.facet.limit=-1}place_s,id" ),
params( "facet.pivot","place_s,id",
"f.id.facet.limit", "-1") }) {
SolrParams p = SolrParams.wrapDefaults( params( "q", "*:*",
"rows", "0",
"facet","true",
"facet.sort", "count" ),
facetParams);
try {
rsp = query( p );
pivots = rsp.getFacetPivot().get("place_s,id");
assertEquals(100, pivots.size()); // default
firstPlace = pivots.get(0);
assertPivot("place_s", "cardiff", 257, firstPlace);
assertEquals(257, firstPlace.getPivot().size());
} catch (AssertionFailedError ae) {
throw new AssertionError(ae.getMessage() + " <== " + p.toString(), ae);
}
}
// Negative per-field facet limit (inner)
for (SolrParams facetParams :
// results should be the same regardless of wether per-field facet.limit is
// a global or a local param
new SolrParams[] {
// Broken: SOLR-6193
// params( "facet.pivot","{!f.place_s.facet.limit=-1}place_s,id" ),
params( "facet.pivot","place_s,id",
"f.place_s.facet.limit", "-1") }) {
SolrParams p = SolrParams.wrapDefaults( params( "q", "*:*",
"rows", "0",
"facet","true",
"facet.sort", "count" ),
facetParams);
try {
rsp = query( p );
pivots = rsp.getFacetPivot().get("place_s,id");
assertEquals(103, pivots.size());
firstPlace = pivots.get(0);
assertPivot("place_s", "cardiff", 257, firstPlace);
assertEquals(100, firstPlace.getPivot().size()); // default
} catch (AssertionFailedError ae) {
throw new AssertionError(ae.getMessage() + " <== " + p.toString(), ae);
}
}
// Mincount + facet.pivot 2 different ways (swap field order)
rsp = query( "q", "*:*",
"rows", "0",
"facet","true",
"facet.pivot","place_s,company_t",
"facet.pivot","company_t,place_s",
FacetParams.FACET_PIVOT_MINCOUNT,"6");
pivots = rsp.getFacetPivot().get("place_s,company_t");
assertEquals(52, pivots.size());
firstPlace = pivots.get(0);
assertPivot("place_s", "cardiff", 257, firstPlace);
assertEquals(4, firstPlace.getPivot().size());
firstCompany = firstPlace.getPivot().get(0);
assertPivot("company_t", "bbc", 101, firstCompany);
//
pivots = rsp.getFacetPivot().get("company_t,place_s");
assertEquals(4, pivots.size());
firstCompany = pivots.get(0);
assertPivot("company_t", "bbc", 451, firstCompany);
assertEquals(52, firstCompany.getPivot().size());
firstPlace = firstCompany.getPivot().get(0);
assertPivot("place_s", "cardiff", 101, firstPlace);
// refine on SPECIAL empty string
rsp = query( "q", "*:*",
"fq", "-place_s:0placeholder",
"rows", "0",
"facet","true",
"facet.limit","1",
FacetParams.FACET_OVERREQUEST_RATIO, "0", // force refinement
FacetParams.FACET_OVERREQUEST_COUNT, "1", // force refinement
"facet.pivot","special_s,company_t");
assertEquals(docNumber - 6, rsp.getResults().getNumFound()); // all docs but 0place
pivots = rsp.getFacetPivot().get("special_s,company_t");
assertEquals(1, pivots.size());
firstPlace = pivots.get(0);
assertPivot("special_s", SPECIAL, 3, firstPlace);
assertEquals(1, firstPlace.getPivot().size());
firstCompany = firstPlace.getPivot().get(0);
assertPivot("company_t", "microsoft", 2, firstCompany);
// TODO test "company_t,special_s" as well
// refine on SPECIAL empty string & facet.missing
// Also proves refinement on non-top elements occurs and allows them to get into the top
rsp = query( "q", "*:*",
"fq", "-place_s:0placeholder",
"rows", "0",
"facet","true",
"facet.limit","1",
"facet.missing","true",
FacetParams.FACET_OVERREQUEST_RATIO, "0", // force refinement
FacetParams.FACET_OVERREQUEST_COUNT, "2", // force refinement
"facet.pivot","special_s,company_t");
assertEquals(docNumber - 6, rsp.getResults().getNumFound()); // all docs but 0place
pivots = rsp.getFacetPivot().get("special_s,company_t");
assertEquals(2, pivots.size());
firstPlace = pivots.get(0);
assertPivot("special_s", SPECIAL, 3, firstPlace);
assertEquals(1, firstPlace.getPivot().size());
firstCompany = firstPlace.getPivot().get(0);
assertPivot("company_t", "microsoft", 2, firstCompany);
// last is "missing" val
assertPivot("special_s", null, docNumber -6 -3 -2, pivots.get(1)); // -0place -SPECIAL -xxx
// forced refinement on facet.missing
rsp = query( "q", "*:*",
"rows", "0",
"facet","true",
"f.bogus_x_s.facet.missing","true",
"f.bogus_y_s.facet.missing","true",
"facet.pivot","bogus_x_s,place_s,bogus_y_s,company_t",
FacetParams.FACET_LIMIT, "12");
pivots = rsp.getFacetPivot().get("bogus_x_s,place_s,bogus_y_s,company_t");
assertEquals(1, pivots.size()); // just the missing value for bogus_x_s
assertPivot("bogus_x_s", null, docNumber, pivots.get(0));
pivots = pivots.get(0).getPivot();
assertEquals(12, pivots.size()); // places
firstPlace = pivots.get(0);
assertPivot("place_s", "cardiff", 257, firstPlace);
assertEquals(1, firstPlace.getPivot().size()); // just the missing value for bogus_y_s
assertPivot("bogus_y_s", null, 257, firstPlace.getPivot().get(0));
assertPivot("company_t", "bbc", 101, firstPlace.getPivot().get(0).getPivot().get(0));
// Microsoft will come back wrong if refinement was not done correctly
assertPivot("company_t", "microsoft", 56, firstPlace.getPivot().get(0).getPivot().get(1));
// Overrequesting a lot
this.query( "q", "*:*",
"rows", "0",
"facet", "true",
"facet.pivot","place_s,company_t",
FacetParams.FACET_OVERREQUEST_RATIO, "10",
FacetParams.FACET_OVERREQUEST_COUNT, "100");
// Overrequesting off
this.query( "q", "*:*",
"rows", "0",
"facet", "true",
"facet.pivot","place_s,company_t",
FacetParams.FACET_OVERREQUEST_RATIO, "0",
FacetParams.FACET_OVERREQUEST_COUNT, "0");
}
/**
* asserts that the actual PivotField matches the expected criteria
*/
private void assertPivot(String field, Object value, int count, // int numKids,
PivotField actual) {
assertEquals("FIELD: " + actual.toString(), field, actual.getField());
assertEquals("VALUE: " + actual.toString(), value, actual.getValue());
assertEquals("COUNT: " + actual.toString(), count, actual.getCount());
// TODO: add arg && assert on number of kids
//assertEquals("#KIDS: " + actual.toString(), numKids, actual.getPivot().size());
}
private void setupDistributedPivotFacetDocuments() throws Exception{
//Clear docs
del("*:*");
commit();
final int maxDocs = 50;
final SolrServer zeroShard = clients.get(0);
final SolrServer oneShard = clients.get(1);
final SolrServer twoShard = clients.get(2);
final SolrServer threeShard = clients.get(3); // edge case: never gets any matching docs
for(Integer i=0;i<maxDocs;i++){//50 entries
addPivotDoc(zeroShard, "id", getDocNum(), "place_s", "cardiff", "company_t", "microsoft polecat bbc","pay_i",2400,"hiredate_dt", "2012-07-01T12:30:00Z","real_b","true");
addPivotDoc(zeroShard, "id", getDocNum(), "place_s", "medical staffing network holdings, inc.", "company_t", "microsoft polecat bbc","pay_i",2400,"hiredate_dt", "2012-07-01T12:30:00Z");
addPivotDoc(oneShard, "id", getDocNum(), "place_s", "placeholder"+i, "company_t", "compHolder"+i,"pay_i",24*i,"hiredate_dt", "2012-08-01T12:30:00Z");
addPivotDoc(twoShard, "id", getDocNum(), "place_s", "cardiff", "company_t", "bbc honda","pay_i",2400,"hiredate_dt", "2012-09-01T12:30:00Z","real_b","true");
addPivotDoc(twoShard, "id", getDocNum(), "place_s", "cardiff", "company_t", "compHolder"+i,"pay_i",22*i,"hiredate_dt", "2012-09-01T12:30:00Z","real_b","true");
addPivotDoc(twoShard, "id", getDocNum(), "place_s", "cardiff", "company_t", "compHolder"+i,"pay_i",21*i,"hiredate_dt", "2012-09-01T12:30:00Z","real_b","true");
addPivotDoc(twoShard, "id", getDocNum(), "place_s", "cardiff", "company_t", "compHolder"+i,"pay_i",20*i,"hiredate_dt", "2012-09-01T12:30:00Z","real_b","true");
//For the filler content
//Fifty places with 6 results each
addPivotDoc(oneShard, "id", getDocNum(), "place_s", i+"placeholder", "company_t", "microsoft polecat bbc","pay_i",2400,"hiredate_dt", "2012-10-01T12:30:00Z","real_b","false");
addPivotDoc(oneShard, "id", getDocNum(), "place_s", i+"placeholder", "company_t", "microsoft polecat bbc","pay_i",3100,"hiredate_dt", "2012-10-01T12:30:00Z","real_b","false");
addPivotDoc(oneShard, "id", getDocNum(), "place_s", i+"placeholder", "company_t", "microsoft polecat bbc","pay_i",3400,"hiredate_dt", "2012-10-01T12:30:00Z","real_b","false");
addPivotDoc(oneShard, "id", getDocNum(), "place_s", i+"placeholder", "company_t", "microsoft polecat bbc","pay_i",5400,"hiredate_dt", "2012-10-01T12:30:00Z","real_b","false");
addPivotDoc(oneShard, "id", getDocNum(), "place_s", i+"placeholder", "company_t", "microsoft polecat bbc","pay_i",6400,"hiredate_dt", "2012-10-01T12:30:00Z","real_b","false");
addPivotDoc(oneShard, "id", getDocNum(), "place_s", i+"placeholder", "company_t", "microsoft polecat bbc","pay_i",2000,"hiredate_dt", "2012-10-01T12:30:00Z","real_b","false");
}
addPivotDoc(oneShard, "id", getDocNum(), "place_s", "cardiff", "company_t", "microsoft","pay_i",4367,"hiredate_dt", "2012-11-01T12:30:00Z");
addPivotDoc(oneShard, "id", getDocNum(), "place_s", "cardiff", "company_t", "microsoft bbc","pay_i",8742,"hiredate_dt", "2012-11-01T12:30:00Z");
addPivotDoc(oneShard, "id", getDocNum(), "place_s", "cardiff", "company_t", "microsoft polecat","pay_i",5824,"hiredate_dt", "2012-11-01T12:30:00Z");
addPivotDoc(oneShard, "id", getDocNum(), "place_s", "cardiff", "company_t", "microsoft ","pay_i",6539,"hiredate_dt", "2012-11-01T12:30:00Z");
addPivotDoc(oneShard, "id", getDocNum(), "place_s", "medical staffing network holdings, inc.", "company_t", "microsoft ","pay_i",6539,"hiredate_dt", "2012-11-01T12:30:00Z", "special_s", "xxx");
addPivotDoc(oneShard, "id", getDocNum(), "place_s", "cardiff", "company_t", "polecat","pay_i",4352,"hiredate_dt", "2012-1-01T12:30:00Z", "special_s", "xxx");
addPivotDoc(oneShard, "id", getDocNum(), "place_s", "krakaw", "company_t", "polecat","pay_i",4352,"hiredate_dt", "2012-11-01T12:30:00Z", "special_s", SPECIAL);
addPivotDoc(twoShard, "id", getDocNum(), "place_s", "cardiff", "company_t", "microsoft","pay_i",12,"hiredate_dt", "2012-11-01T12:30:00Z", "special_s", SPECIAL);
addPivotDoc(twoShard, "id", getDocNum(), "place_s", "cardiff", "company_t", "microsoft","pay_i",543,"hiredate_dt", "2012-11-01T12:30:00Z", "special_s", SPECIAL);
// two really trivial documents, unrelated to the rest of the tests,
// for the purpose of demoing the porblem with mincount=0
addPivotDoc(oneShard, "id", getDocNum(), "top_s", "aaa", "sub_s", "bbb" );
addPivotDoc(twoShard, "id", getDocNum(), "top_s", "xxx", "sub_s", "yyy" );
commit();
assertEquals("shard #3 should never have any docs",
0, threeShard.query(params("q", "*:*")).getResults().getNumFound());
}
/**
* Builds up a SolrInputDocument using the specified fields, then adds it to the
* specified client as well as the control client
* @see #indexDoc(SolrServer,SolrParams,SolrInputDocument...)
* @see #sdoc
*/
private void addPivotDoc(SolrServer client, Object... fields)
throws IOException, SolrServerException {
indexDoc(client, params(), sdoc(fields));
}
private int docNumber = 0;
public int getDocNum(){
docNumber++;
return docNumber;
}
}

View File

@ -0,0 +1,289 @@
package org.apache.solr.handler.component;
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import java.util.Date;
import java.util.ArrayList;
import java.util.Collections;
import java.util.Comparator;
import java.util.List;
import java.io.IOException;
import org.apache.solr.BaseDistributedSearchTestCase;
import org.apache.solr.client.solrj.SolrServer;
import org.apache.solr.client.solrj.SolrServerException;
import org.apache.solr.client.solrj.response.PivotField;
import org.apache.solr.client.solrj.response.QueryResponse;
import org.apache.solr.common.SolrInputDocument;
import org.apache.solr.common.params.FacetParams;
import org.apache.solr.common.params.SolrParams;
import org.apache.solr.common.params.ModifiableSolrParams;
/**
* test demonstrating how overrequesting helps finds top-terms in the "long tail"
* of shards that don't have even distributions of terms (something that can be common
* in cases of custom sharding -- even if you don't know that there is a corrolation
* between the property you are sharding on and the property you are faceting on).
*
* NOTE: This test ignores the control collection (in single node mode, there is no
* need for the overrequesting, all the data is local -- so comparisons with it wouldn't
* be valid in the cases we are testing here)
*/
public class DistributedFacetPivotLongTailTest extends BaseDistributedSearchTestCase {
public DistributedFacetPivotLongTailTest(){
this.fixShardCount = true;
this.shardCount = 3;
}
private int docNumber = 0;
public int getDocNum() {
docNumber++;
return docNumber;
}
@Override
public void doTest() throws Exception {
final SolrServer shard0 = clients.get(0);
final SolrServer shard1 = clients.get(1);
final SolrServer shard2 = clients.get(2);
// the 5 top foo_s terms have 100 docs each on every shard
for (int i = 0; i < 100; i++) {
for (int j = 0; j < 5; j++) {
shard0.add(sdoc("id", getDocNum(), "foo_s", "aaa"+j));
shard1.add(sdoc("id", getDocNum(), "foo_s", "aaa"+j));
shard2.add(sdoc("id", getDocNum(), "foo_s", "aaa"+j));
}
}
// 20 foo_s terms that come in "second" with 50 docs each
// on both shard0 & shard1 ("bbb_")
for (int i = 0; i < 50; i++) {
for (int j = 0; j < 20; j++) {
shard0.add(sdoc("id", getDocNum(), "foo_s", "bbb"+j));
shard1.add(sdoc("id", getDocNum(), "foo_s", "bbb"+j));
}
// distracting term appears on only on shard2 50 times
shard2.add(sdoc("id", getDocNum(), "foo_s", "junkA"));
}
// put "bbb0" on shard2 exactly once to sanity check refinement
shard2.add(sdoc("id", getDocNum(), "foo_s", "bbb0"));
// long 'tail' foo_s term appears in 45 docs on every shard
// foo_s:tail is the only term with bar_s sub-pivot terms
for (int i = 0; i < 45; i++) {
// for sub-pivot, shard0 & shard1 have 6 docs each for "tailB"
// but the top 5 terms are ccc(0-4) -- 7 on each shard
// (4 docs each have junk terms)
String sub_term = (i < 35) ? "ccc"+(i % 5) : ((i < 41) ? "tailB" : "junkA");
shard0.add(sdoc("id", getDocNum(), "foo_s", "tail", "bar_s", sub_term));
shard1.add(sdoc("id", getDocNum(), "foo_s", "tail", "bar_s", sub_term));
// shard2's top 5 sub-pivot terms are junk only it has with 8 docs each
// and 5 docs that use "tailB"
sub_term = (i < 40) ? "junkB"+(i % 5) : "tailB";
shard2.add(sdoc("id", getDocNum(), "foo_s", "tail", "bar_s", sub_term));
}
// really long tail uncommon foo_s terms on shard2
for (int i = 0; i < 30; i++) {
shard2.add(sdoc("id", getDocNum(), "foo_s", "zzz"+i));
}
commit();
SolrParams req = params( "q", "*:*",
"distrib", "false",
"facet", "true",
"facet.limit", "10",
"facet.pivot", "foo_s,bar_s");
// sanity check that our expectations about each shard (non-distrib) are correct
PivotField pivot = null;
List<PivotField> pivots = null;
List<PivotField>[] shardPivots = new List[3];
shardPivots[0] = shard0.query( req ).getFacetPivot().get("foo_s,bar_s");
shardPivots[1] = shard1.query( req ).getFacetPivot().get("foo_s,bar_s");
shardPivots[2] = shard2.query( req ).getFacetPivot().get("foo_s,bar_s");
// top 5 same on all shards
for (int i = 0; i < 3; i++) {
assertEquals(10, shardPivots[i].size());
for (int j = 0; j < 5; j++) {
pivot = shardPivots[i].get(j);
assertEquals(pivot.toString(), "aaa"+j, pivot.getValue());
assertEquals(pivot.toString(), 100, pivot.getCount());
}
}
// top 6-10 same on shard0 & shard11
for (int i = 0; i < 2; i++) {
for (int j = 5; j < 10; j++) {
pivot = shardPivots[i].get(j);
assertTrue(pivot.toString(), pivot.getValue().toString().startsWith("bbb"));
assertEquals(pivot.toString(), 50, pivot.getCount());
}
}
// 6-10 on shard2
assertEquals("junkA", shardPivots[2].get(5).getValue());
assertEquals(50, shardPivots[2].get(5).getCount());
assertEquals("tail", shardPivots[2].get(6).getValue());
assertEquals(45, shardPivots[2].get(6).getCount());
assertEquals("bbb0", shardPivots[2].get(7).getValue());
assertEquals(1, shardPivots[2].get(7).getCount());
for (int j = 8; j < 10; j++) {
pivot = shardPivots[2].get(j);
assertTrue(pivot.toString(), pivot.getValue().toString().startsWith("zzz"));
assertEquals(pivot.toString(), 1, pivot.getCount());
}
// check sub-shardPivots on "tail" from shard2
pivots = shardPivots[2].get(6).getPivot();
assertEquals(6, pivots.size());
for (int j = 0; j < 5; j++) {
pivot = pivots.get(j);
assertTrue(pivot.toString(), pivot.getValue().toString().startsWith("junkB"));
assertEquals(pivot.toString(), 8, pivot.getCount());
}
pivot = pivots.get(5);
assertEquals("tailB", pivot.getValue());
assertEquals(5, pivot.getCount());
// if we disable overrequesting, we don't find the long tail
pivots = queryServer( params( "q", "*:*",
"shards", getShardsString(),
FacetParams.FACET_OVERREQUEST_COUNT, "0",
FacetParams.FACET_OVERREQUEST_RATIO, "0",
"facet", "true",
"facet.limit", "6",
"facet.pivot", "foo_s,bar_s" )
).getFacetPivot().get("foo_s,bar_s");
assertEquals(6, pivots.size());
for (int i = 0; i < 5; i++) {
pivot = pivots.get(i);
assertTrue(pivot.toString(), pivot.getValue().toString().startsWith("aaa"));
assertEquals(pivot.toString(), 300, pivot.getCount());
}
// even w/o the long tail, we should have still asked shard2 to refine bbb0
assertTrue(pivots.get(5).toString(), pivots.get(5).getValue().equals("bbb0"));
assertEquals(pivots.get(5).toString(), 101, pivots.get(5).getCount());
// with default overrequesting, we should find the correct top 6 including
// long tail and top sub-pivots
// (even if we disable overrequesting on the sub-pivot)
for (ModifiableSolrParams q : new ModifiableSolrParams[] {
params(),
params("f.bar_s.facet.overrequest.ratio","0",
"f.bar_s.facet.overrequest.count","0") }) {
q.add( params( "q", "*:*",
"shards", getShardsString(),
"facet", "true",
"facet.limit", "6",
"facet.pivot", "foo_s,bar_s" ));
pivots = queryServer( q ).getFacetPivot().get("foo_s,bar_s");
assertEquals(6, pivots.size());
for (int i = 0; i < 5; i++) {
pivot = pivots.get(i);
assertTrue(pivot.toString(), pivot.getValue().toString().startsWith("aaa"));
assertEquals(pivot.toString(), 300, pivot.getCount());
}
pivot = pivots.get(5);
assertEquals(pivot.toString(), "tail", pivot.getValue());
assertEquals(pivot.toString(), 135, pivot.getCount());
// check the sub pivots
pivots = pivot.getPivot();
assertEquals(6, pivots.size());
pivot = pivots.get(0);
assertEquals(pivot.toString(), "tailB", pivot.getValue());
assertEquals(pivot.toString(), 17, pivot.getCount());
for (int i = 1; i < 6; i++) { // ccc(0-4)
pivot = pivots.get(i);
assertTrue(pivot.toString(), pivot.getValue().toString().startsWith("ccc"));
assertEquals(pivot.toString(), 14, pivot.getCount());
}
}
// if we lower the facet.limit on the sub-pivot, overrequesting should still ensure
// that we get the correct top5 including "tailB"
pivots = queryServer( params( "q", "*:*",
"shards", getShardsString(),
"facet", "true",
"facet.limit", "6",
"f.bar_s.facet.limit", "5",
"facet.pivot", "foo_s,bar_s" )
).getFacetPivot().get("foo_s,bar_s");
assertEquals(6, pivots.size());
for (int i = 0; i < 5; i++) {
pivot = pivots.get(i);
assertTrue(pivot.toString(), pivot.getValue().toString().startsWith("aaa"));
assertEquals(pivot.toString(), 300, pivot.getCount());
}
pivot = pivots.get(5);
assertEquals(pivot.toString(), "tail", pivot.getValue());
assertEquals(pivot.toString(), 135, pivot.getCount());
// check the sub pivots
pivots = pivot.getPivot();
assertEquals(5, pivots.size());
pivot = pivots.get(0);
assertEquals(pivot.toString(), "tailB", pivot.getValue());
assertEquals(pivot.toString(), 17, pivot.getCount());
for (int i = 1; i < 5; i++) { // ccc(0-3)
pivot = pivots.get(i);
assertTrue(pivot.toString(), pivot.getValue().toString().startsWith("ccc"));
assertEquals(pivot.toString(), 14, pivot.getCount());
}
// however with a lower limit and overrequesting disabled,
// we're going to miss out on tailB
pivots = queryServer( params( "q", "*:*",
"shards", getShardsString(),
"facet", "true",
"facet.limit", "6",
"f.bar_s.facet.overrequest.ratio", "0",
"f.bar_s.facet.overrequest.count", "0",
"f.bar_s.facet.limit", "5",
"facet.pivot", "foo_s,bar_s" )
).getFacetPivot().get("foo_s,bar_s");
assertEquals(6, pivots.size());
for (int i = 0; i < 5; i++) {
pivot = pivots.get(i);
assertTrue(pivot.toString(), pivot.getValue().toString().startsWith("aaa"));
assertEquals(pivot.toString(), 300, pivot.getCount());
}
pivot = pivots.get(5);
assertEquals(pivot.toString(), "tail", pivot.getValue());
assertEquals(pivot.toString(), 135, pivot.getCount());
// check the sub pivots
pivots = pivot.getPivot();
assertEquals(5, pivots.size());
for (int i = 0; i < 5; i++) { // ccc(0-4)
pivot = pivots.get(i);
assertTrue(pivot.toString(), pivot.getValue().toString().startsWith("ccc"));
assertEquals(pivot.toString(), 14, pivot.getCount());
}
}
}

View File

@ -0,0 +1,439 @@
package org.apache.solr.handler.component;
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import java.util.ArrayList;
import java.util.Collections;
import java.util.Comparator;
import java.util.List;
import org.apache.solr.BaseDistributedSearchTestCase;
import org.apache.solr.client.solrj.response.PivotField;
import org.apache.solr.client.solrj.response.QueryResponse;
import org.apache.solr.common.params.FacetParams;
import org.apache.solr.common.params.SolrParams;
import org.apache.solr.common.params.ModifiableSolrParams;
import junit.framework.AssertionFailedError;
public class DistributedFacetPivotSmallTest extends BaseDistributedSearchTestCase {
public DistributedFacetPivotSmallTest() {
this.fixShardCount = true;
this.shardCount = 4;
}
@Override
public void doTest() throws Exception {
del("*:*");
// NOTE: we use the literal (4 character) string "null" as a company name
// to help ensure there isn't any bugs where the literal string is treated as if it
// were a true NULL value.
index(id, 19, "place_t", "cardiff dublin", "company_t", "microsoft polecat");
index(id, 20, "place_t", "dublin", "company_t", "polecat microsoft null");
index(id, 21, "place_t", "london la dublin", "company_t",
"microsoft fujitsu null polecat");
index(id, 22, "place_t", "krakow london cardiff", "company_t",
"polecat null bbc");
index(id, 23, "place_t", "london", "company_t", "");
index(id, 24, "place_t", "la", "company_t", "");
index(id, 25, "company_t", "microsoft polecat null fujitsu null bbc");
index(id, 26, "place_t", "krakow", "company_t", "null");
index(id, 27, "place_t", "krakow cardiff dublin london la", "company_t",
"null microsoft polecat bbc fujitsu");
index(id, 28, "place_t", "cork", "company_t",
"fujitsu rte");
commit();
handle.clear();
handle.put("QTime", SKIPVAL);
handle.put("timestamp", SKIPVAL);
handle.put("maxScore", SKIPVAL);
final ModifiableSolrParams params = new ModifiableSolrParams();
setDistributedParams(params);
params.add("q", "*:*");
params.add("facet", "true");
params.add("facet.pivot", "place_t,company_t");
QueryResponse rsp = queryServer(params);
List<PivotField> expectedPlacePivots = new UnorderedEqualityArrayList<PivotField>();
List<PivotField> expectedCardiffPivots = new UnorderedEqualityArrayList<PivotField>();
expectedCardiffPivots.add(new ComparablePivotField("company_t", "microsoft", 2, null));
expectedCardiffPivots.add(new ComparablePivotField("company_t", "null", 2, null));
expectedCardiffPivots.add(new ComparablePivotField("company_t", "bbc", 2, null));
expectedCardiffPivots.add(new ComparablePivotField("company_t", "polecat", 3, null));
expectedCardiffPivots.add(new ComparablePivotField("company_t", "fujitsu", 1, null));
List<PivotField> expectedDublinPivots = new UnorderedEqualityArrayList<PivotField>();
expectedDublinPivots.add(new ComparablePivotField("company_t", "polecat", 4, null));
expectedDublinPivots.add(new ComparablePivotField("company_t", "microsoft", 4, null));
expectedDublinPivots.add(new ComparablePivotField("company_t", "null", 3, null));
expectedDublinPivots.add(new ComparablePivotField("company_t", "fujitsu", 2, null));
expectedDublinPivots.add(new ComparablePivotField("company_t", "bbc", 1, null));
List<PivotField> expectedLondonPivots = new UnorderedEqualityArrayList<PivotField>();
expectedLondonPivots.add(new ComparablePivotField("company_t", "polecat", 3, null));
expectedLondonPivots.add(new ComparablePivotField("company_t", "microsoft", 2, null));
expectedLondonPivots.add(new ComparablePivotField("company_t", "fujitsu", 2, null));
expectedLondonPivots.add(new ComparablePivotField("company_t", "null", 3,null));
expectedLondonPivots.add(new ComparablePivotField("company_t", "bbc", 2, null));
List<PivotField> expectedLAPivots = new UnorderedEqualityArrayList<PivotField>();
expectedLAPivots.add(new ComparablePivotField("company_t", "microsoft", 2,null));
expectedLAPivots.add(new ComparablePivotField("company_t", "fujitsu", 2,null));
expectedLAPivots.add(new ComparablePivotField("company_t", "null", 2, null));
expectedLAPivots.add(new ComparablePivotField("company_t", "bbc", 1, null));
expectedLAPivots.add(new ComparablePivotField("company_t", "polecat", 2,null));
List<PivotField> expectedKrakowPivots = new UnorderedEqualityArrayList<PivotField>();
expectedKrakowPivots.add(new ComparablePivotField("company_t", "polecat",2, null));
expectedKrakowPivots.add(new ComparablePivotField("company_t", "bbc", 2, null));
expectedKrakowPivots.add(new ComparablePivotField("company_t", "null", 3,null));
expectedKrakowPivots.add(new ComparablePivotField("company_t", "fujitsu", 1, null));
expectedKrakowPivots.add(new ComparablePivotField("company_t", "microsoft", 1, null));
List<PivotField> expectedCorkPivots = new UnorderedEqualityArrayList<PivotField>();
expectedCorkPivots.add(new ComparablePivotField("company_t", "fujitsu", 1, null));
expectedCorkPivots.add(new ComparablePivotField("company_t", "rte", 1, null));
expectedPlacePivots.add(new ComparablePivotField("place_t", "dublin", 4, expectedDublinPivots));
expectedPlacePivots.add(new ComparablePivotField("place_t", "cardiff", 3, expectedCardiffPivots));
expectedPlacePivots.add(new ComparablePivotField("place_t", "london", 4, expectedLondonPivots));
expectedPlacePivots.add(new ComparablePivotField("place_t", "la", 3, expectedLAPivots));
expectedPlacePivots.add(new ComparablePivotField("place_t", "krakow", 3, expectedKrakowPivots));
expectedPlacePivots.add(new ComparablePivotField("place_t", "cork", 1, expectedCorkPivots));
List<PivotField> placePivots = rsp.getFacetPivot().get("place_t,company_t");
// Useful to check for errors, orders lists and does toString() equality
// check
testOrderedPivotsStringEquality(expectedPlacePivots, placePivots);
assertEquals(expectedPlacePivots, placePivots);
// Test sorting by count
params.set(FacetParams.FACET_SORT, FacetParams.FACET_SORT_COUNT);
rsp = queryServer(params);
placePivots = rsp.getFacetPivot().get("place_t,company_t");
testCountSorting(placePivots);
// Test limit
params.set(FacetParams.FACET_LIMIT, 2);
rsp = queryServer(params);
expectedPlacePivots = new UnorderedEqualityArrayList<PivotField>();
expectedDublinPivots = new UnorderedEqualityArrayList<PivotField>();
expectedDublinPivots.add(new ComparablePivotField("company_t", "polecat",
4, null));
expectedDublinPivots.add(new ComparablePivotField("company_t", "microsoft",
4, null));
expectedLondonPivots = new UnorderedEqualityArrayList<PivotField>();
expectedLondonPivots.add(new ComparablePivotField("company_t", "null", 3,
null));
expectedLondonPivots.add(new ComparablePivotField("company_t", "polecat", 3,
null));
expectedPlacePivots.add(new ComparablePivotField("place_t", "dublin", 4,
expectedDublinPivots));
expectedPlacePivots.add(new ComparablePivotField("place_t", "london", 4,
expectedLondonPivots));
placePivots = rsp.getFacetPivot().get("place_t,company_t");
assertEquals(expectedPlacePivots, placePivots);
// Test individual facet.limit values
params.remove(FacetParams.FACET_LIMIT);
params.set("f.place_t." + FacetParams.FACET_LIMIT, 1);
params.set("f.company_t." + FacetParams.FACET_LIMIT, 4);
rsp = queryServer(params);
expectedPlacePivots = new UnorderedEqualityArrayList<PivotField>();
expectedDublinPivots = new UnorderedEqualityArrayList<PivotField>();
expectedDublinPivots.add(new ComparablePivotField("company_t", "microsoft",4, null));
expectedDublinPivots.add(new ComparablePivotField("company_t", "polecat",4, null));
expectedDublinPivots.add(new ComparablePivotField("company_t", "null",3, null));
expectedDublinPivots.add(new ComparablePivotField("company_t", "fujitsu",2, null));
expectedLondonPivots = new UnorderedEqualityArrayList<PivotField>();
expectedLondonPivots.add(new ComparablePivotField("company_t", "null", 3, null));
expectedLondonPivots.add(new ComparablePivotField("company_t", "polecat", 3, null));
expectedLondonPivots.add(new ComparablePivotField("company_t", "bbc", 2, null));
expectedLondonPivots.add(new ComparablePivotField("company_t", "fujitsu", 2, null));
expectedCardiffPivots = new UnorderedEqualityArrayList<PivotField>();
expectedCardiffPivots.add(new ComparablePivotField("company_t", "polecat", 3, null));
expectedKrakowPivots = new UnorderedEqualityArrayList<PivotField>();
expectedKrakowPivots.add(new ComparablePivotField("company_t", "null", 3, null));
expectedLAPivots = new UnorderedEqualityArrayList<PivotField>();
expectedLAPivots.add(new ComparablePivotField("company_t", "fujitsu", 2, null));
expectedCorkPivots = new UnorderedEqualityArrayList<PivotField>();
expectedCorkPivots.add(new ComparablePivotField("company_t", "fujitsu", 1, null));
expectedPlacePivots.add(new ComparablePivotField("place_t", "dublin", 4, expectedDublinPivots));
placePivots = rsp.getFacetPivot().get("place_t,company_t");
assertEquals(expectedPlacePivots, placePivots);
params.remove("f.company_t." + FacetParams.FACET_LIMIT);
params.remove("f.place_t." + FacetParams.FACET_LIMIT);
params.set(FacetParams.FACET_LIMIT, 2);
// Test facet.missing=true with diff sorts
index("id",777); // NOTE: id=25 has no place as well
commit();
SolrParams missingA = params( "q", "*:*",
"rows", "0",
"facet","true",
"facet.pivot","place_t,company_t",
// default facet.sort
FacetParams.FACET_MISSING, "true" );
SolrParams missingB = SolrParams.wrapDefaults(missingA,
params(FacetParams.FACET_LIMIT, "4",
"facet.sort", "index"));
for (SolrParams p : new SolrParams[] { missingA, missingB }) {
// in either case, the last pivot option should be the same
rsp = query( p );
placePivots = rsp.getFacetPivot().get("place_t,company_t");
assertTrue("not enough values for pivot: " + p + " => " + placePivots,
1 < placePivots.size());
PivotField missing = placePivots.get(placePivots.size()-1);
assertNull("not the missing place value: " + p, missing.getValue());
assertEquals("wrong missing place count: " + p, 2, missing.getCount());
assertTrue("not enough sub-pivots for missing place: "+ p +" => " + missing.getPivot(),
1 < missing.getPivot().size());
missing = missing.getPivot().get(missing.getPivot().size()-1);
assertNull("not the missing company value: " + p, missing.getValue());
assertEquals("wrong missing company count: " + p, 1, missing.getCount());
assertNull("company shouldn't have sub-pivots: " + p, missing.getPivot());
}
// sort=index + mincount + limit
for (SolrParams variableParams : new SolrParams[] {
// we should get the same results regardless of overrequest
params("facet.overrequest.count","0",
"facet.overrequest.ratio","0"),
params() }) {
SolrParams p = SolrParams.wrapDefaults( params( "q", "*:*",
"rows", "0",
"facet","true",
"facet.pivot","company_t",
"facet.sort", "index",
"facet.pivot.mincount", "4",
"facet.limit", "4"),
variableParams );
try {
List<PivotField> pivots = query( p ).getFacetPivot().get("company_t");
assertEquals(4, pivots.size());
assertEquals("fujitsu", pivots.get(0).getValue());
assertEquals(4, pivots.get(0).getCount());
assertEquals("microsoft", pivots.get(1).getValue());
assertEquals(5, pivots.get(1).getCount());
assertEquals("null", pivots.get(2).getValue());
assertEquals(6, pivots.get(2).getCount());
assertEquals("polecat", pivots.get(3).getValue());
assertEquals(6, pivots.get(3).getCount());
} catch (AssertionFailedError ae) {
throw new AssertionError(ae.getMessage() + " <== " + p.toString(), ae);
}
}
// sort=index + mincount + limit + offset
for (SolrParams variableParams : new SolrParams[] {
// we should get the same results regardless of overrequest
params("facet.overrequest.count","0",
"facet.overrequest.ratio","0"),
params() }) {
SolrParams p = SolrParams.wrapDefaults( params( "q", "*:*",
"rows", "0",
"facet","true",
"facet.pivot","company_t",
"facet.sort", "index",
"facet.pivot.mincount", "4",
"facet.offset", "1",
"facet.limit", "4"),
variableParams );
try {
List<PivotField> pivots = query( p ).getFacetPivot().get("company_t");
assertEquals(3, pivots.size()); // asked for 4, but not enough meet the mincount
assertEquals("microsoft", pivots.get(0).getValue());
assertEquals(5, pivots.get(0).getCount());
assertEquals("null", pivots.get(1).getValue());
assertEquals(6, pivots.get(1).getCount());
assertEquals("polecat", pivots.get(2).getValue());
assertEquals(6, pivots.get(2).getCount());
} catch (AssertionFailedError ae) {
throw new AssertionError(ae.getMessage() + " <== " + p.toString(), ae);
}
}
// sort=index + mincount + limit + offset (more permutations)
for (SolrParams variableParams : new SolrParams[] {
// all of these combinations should result in the same first value
params("facet.pivot.mincount", "4",
"facet.offset", "2"),
params("facet.pivot.mincount", "5",
"facet.offset", "1"),
params("facet.pivot.mincount", "6",
"facet.offset", "0" ) }) {
SolrParams p = SolrParams.wrapDefaults( params( "q", "*:*",
"rows", "0",
"facet","true",
"facet.limit","1",
"facet.sort","index",
"facet.overrequest.ratio","0",
"facet.pivot", "company_t"),
variableParams );
try {
List<PivotField> pivots = query( p ).getFacetPivot().get("company_t");
assertEquals(1, pivots.size());
assertEquals(pivots.toString(), "null", pivots.get(0).getValue());
assertEquals(pivots.toString(), 6, pivots.get(0).getCount());
} catch (AssertionFailedError ae) {
throw new AssertionError(ae.getMessage() + " <== " + p.toString(), ae);
}
}
}
// Useful to check for errors, orders lists and does toString() equality check
private void testOrderedPivotsStringEquality(
List<PivotField> expectedPlacePivots, List<PivotField> placePivots) {
Collections.sort(expectedPlacePivots, new PivotFieldComparator());
for (PivotField expectedPivot : expectedPlacePivots) {
if (expectedPivot.getPivot() != null) {
Collections.sort(expectedPivot.getPivot(), new PivotFieldComparator());
}
}
Collections.sort(placePivots, new PivotFieldComparator());
for (PivotField pivot : placePivots) {
if (pivot.getPivot() != null) {
Collections.sort(pivot.getPivot(), new PivotFieldComparator());
}
}
assertEquals(expectedPlacePivots.toString(), placePivots.toString());
}
private void testCountSorting(List<PivotField> pivots) {
Integer lastCount = null;
for (PivotField pivot : pivots) {
if (lastCount != null) {
assertTrue(pivot.getCount() <= lastCount);
}
lastCount = pivot.getCount();
if (pivot.getPivot() != null) {
testCountSorting(pivot.getPivot());
}
}
}
public static class ComparablePivotField extends PivotField {
public ComparablePivotField(String f, Object v, int count,
List<PivotField> pivot) {
super(f,v,count,pivot);
}
@Override
public boolean equals(Object obj) {
if (this == obj) return true;
if (obj == null) return false;
if (!obj.getClass().isAssignableFrom(PivotField.class)) return false;
PivotField other = (PivotField) obj;
if (getCount() != other.getCount()) return false;
if (getField() == null) {
if (other.getField() != null) return false;
} else if (!getField().equals(other.getField())) return false;
if (getPivot() == null) {
if (other.getPivot() != null) return false;
} else if (!getPivot().equals(other.getPivot())) return false;
if (getValue() == null) {
if (other.getValue() != null) return false;
} else if (!getValue().equals(other.getValue())) return false;
return true;
}
}
public static class UnorderedEqualityArrayList<T> extends ArrayList<T> {
@Override
public boolean equals(Object o) {
boolean equal = false;
if (o instanceof ArrayList) {
List<?> otherList = (List<?>) o;
if (size() == otherList.size()) {
equal = true;
for (Object objectInOtherList : otherList) {
if (!contains(objectInOtherList)) {
equal = false;
}
}
}
}
return equal;
}
public int indexOf(Object o) {
for (int i = 0; i < size(); i++) {
if (get(i).equals(o)) {
return i;
}
}
return -1;
}
}
public class PivotFieldComparator implements Comparator<PivotField> {
@Override
public int compare(PivotField o1, PivotField o2) {
Integer compare = (Integer.valueOf(o2.getCount())).compareTo(Integer
.valueOf(o1.getCount()));
if (compare == 0) {
compare = ((String) o2.getValue()).compareTo((String) o1.getValue());
}
return compare;
}
}
}

View File

@ -0,0 +1,118 @@
package org.apache.solr.handler.component;
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import org.apache.solr.SolrTestCaseJ4;
import org.apache.solr.handler.component.PivotFacetField;
import org.apache.lucene.util.TestUtil;
import java.util.List;
import java.util.Arrays;
import java.util.ArrayList;
import java.util.Collections;
/**
* A light weight test of various helper methods used in pivot faceting
*
**/
public class TestPivotHelperCode extends SolrTestCaseJ4{
/**
* test refinement encoding/decoding matches specific expected encoded values
* @see PivotFacetHelper#encodeRefinementValuePath
* @see PivotFacetHelper#decodeRefinementValuePath
*/
public void testRefinementStringEncodingWhiteBox() {
// trivial example with some basci escaping of an embedded comma
assertBiDirectionalEncoding(strs("foo,bar","yak","zat"), "~foo\\,bar,~yak,~zat");
// simple single valued case
assertBiDirectionalEncoding( strs("foo"), "~foo");
// special case: empty list
assertBiDirectionalEncoding(strs(), "");
// special case: single element list containing empty string
assertBiDirectionalEncoding(strs(""), "~");
// special case: single element list containing null
assertBiDirectionalEncoding(strs((String)null), "^");
// mix of empty strings & null with other values
assertBiDirectionalEncoding(strs("", "foo", "", "", null, "bar"),
"~,~foo,~,~,^,~bar");
}
/**
* test refinement encoding/decoding of random sets of values can be round tripped,
* w/o worrying about what the actual encoding looks like
*
* @see PivotFacetHelper#encodeRefinementValuePath
* @see PivotFacetHelper#decodeRefinementValuePath
*/
public void testRefinementStringEncodingBlockBoxRoundTrip() {
// random data: we should be able to round trip any set of random strings
final int numIters = atLeast(100);
for (int i = 0; i < numIters; i++) {
final int numStrs = atLeast(1);
List<String> data = new ArrayList<String>(numStrs);
for (int j = 0; j < numStrs; j++) {
// :TODO: mix in nulls
data.add(TestUtil.randomUnicodeString(random()));
}
String encoded = PivotFacetHelper.encodeRefinementValuePath(data);
List<String> decoded = PivotFacetHelper.decodeRefinementValuePath(encoded);
assertEquals(data, decoded);
}
}
private void assertBiDirectionalEncoding(List<String> data, String encoded) {
assertEquals(data, PivotFacetHelper.decodeRefinementValuePath(encoded));
assertEquals(encoded, PivotFacetHelper.encodeRefinementValuePath(data));
}
public void testCompareWithNullLast() throws Exception {
Long a = random().nextLong();
Long b = random().nextLong();
assertEquals(a.compareTo(b), PivotFacetFieldValueCollection.compareWithNullLast(a, b));
assertEquals(b.compareTo(a), PivotFacetFieldValueCollection.compareWithNullLast(b, a));
Long bb = new Long(b.longValue());
assertEquals(0, PivotFacetFieldValueCollection.compareWithNullLast(b, bb));
assertEquals(0, PivotFacetFieldValueCollection.compareWithNullLast(null, null));
assertTrue( PivotFacetFieldValueCollection.compareWithNullLast(a, null) < 0 );
assertTrue( PivotFacetFieldValueCollection.compareWithNullLast(b, null) < 0 );
assertTrue( 0 < PivotFacetFieldValueCollection.compareWithNullLast(null, a) );
assertTrue( 0 < PivotFacetFieldValueCollection.compareWithNullLast(null, b) );
}
private List<String> strs(String... strs) {
return Arrays.<String>asList(strs);
}
}

View File

@ -17,6 +17,7 @@
package org.apache.solr.util;
import java.util.Arrays;
import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
@ -31,6 +32,29 @@ import org.junit.Assert;
*
*/
public class TestUtils extends LuceneTestCase {
public void testJoin() {
assertEquals("a|b|c", StrUtils.join(Arrays.asList("a","b","c"), '|'));
assertEquals("a,b,c", StrUtils.join(Arrays.asList("a","b","c"), ','));
assertEquals("a\\,b,c", StrUtils.join(Arrays.asList("a,b","c"), ','));
assertEquals("a,b|c", StrUtils.join(Arrays.asList("a,b","c"), '|'));
assertEquals("a\\\\b|c", StrUtils.join(Arrays.asList("a\\b","c"), '|'));
}
public void testEscapeTextWithSeparator() {
assertEquals("a", StrUtils.escapeTextWithSeparator("a", '|'));
assertEquals("a", StrUtils.escapeTextWithSeparator("a", ','));
assertEquals("a\\|b", StrUtils.escapeTextWithSeparator("a|b", '|'));
assertEquals("a|b", StrUtils.escapeTextWithSeparator("a|b", ','));
assertEquals("a,b", StrUtils.escapeTextWithSeparator("a,b", '|'));
assertEquals("a\\,b", StrUtils.escapeTextWithSeparator("a,b", ','));
assertEquals("a\\\\b", StrUtils.escapeTextWithSeparator("a\\b", ','));
assertEquals("a\\\\\\,b", StrUtils.escapeTextWithSeparator("a\\,b", ','));
}
public void testSplitEscaping() {
List<String> arr = StrUtils.splitSmart("\\r\\n:\\t\\f\\b", ":", true);
assertEquals(2,arr.size());

View File

@ -390,10 +390,19 @@ public class QueryResponse extends SolrResponseBase
ArrayList<PivotField> values = new ArrayList<>( list.size() );
for( NamedList nl : list ) {
// NOTE, this is cheating, but we know the order they are written in, so no need to check
assert "field".equals(nl.getName(0));
String f = (String)nl.getVal( 0 );
assert "value".equals(nl.getName(1));
Object v = nl.getVal( 1 );
assert "count".equals(nl.getName(2));
int cnt = ((Integer)nl.getVal( 2 )).intValue();
List<PivotField> p = (nl.size()<4)?null:readPivots((List<NamedList>)nl.getVal(3) );
List<PivotField> p = null;
if (4 <= nl.size()) {
assert "pivot".equals(nl.getName(3));
Object subPiv = nl.getVal(3);
assert null != subPiv : "Server sent back 'null' for sub pivots?";
p = readPivots( (List<NamedList>) subPiv );
}
values.add( new PivotField( f, v, cnt, p ) );
}
return values;

View File

@ -99,6 +99,24 @@ public interface FacetParams {
*/
public static final String FACET_MISSING = FACET + ".missing";
static final String FACET_OVERREQUEST = FACET + ".overrequest";
/**
* The percentage to over-request by when performing initial distributed requests.
*
* default value is 1.5
*/
public static final String FACET_OVERREQUEST_RATIO = FACET_OVERREQUEST + ".ratio";
/**
* An additional amount to over-request by when performing initial distributed requests. This
* value will be added after accounting for the over-request ratio.
*
* default value is 10
*/
public static final String FACET_OVERREQUEST_COUNT = FACET_OVERREQUEST + ".count";
/**
* Comma separated list of fields to pivot

View File

@ -143,7 +143,10 @@ public class StrUtils {
return result;
}
/** Creates a backslash escaped string, joining all the items. */
/**
* Creates a backslash escaped string, joining all the items.
* @see #escapeTextWithSeparator
*/
public static String join(List<?> items, char separator) {
StringBuilder sb = new StringBuilder(items.size() << 3);
boolean first=true;
@ -154,13 +157,7 @@ public class StrUtils {
} else {
sb.append(separator);
}
for (int i=0; i<item.length(); i++) {
char ch = item.charAt(i);
if (ch=='\\' || ch == separator) {
sb.append('\\');
}
sb.append(ch);
}
appendEscapedTextToBuilder(sb, item, separator);
}
return sb.toString();
}
@ -283,4 +280,31 @@ public class StrUtils {
}
}
/**
* Creates a new copy of the string with the separator backslash escaped.
* @see #join
*/
public static String escapeTextWithSeparator(String item, char separator) {
StringBuilder sb = new StringBuilder(item.length() * 2);
appendEscapedTextToBuilder(sb, item, separator);
return sb.toString();
}
/**
* writes chars from item to out, backslash escaping as needed based on separator --
* but does not append the seperator itself
*/
public static void appendEscapedTextToBuilder(StringBuilder out,
String item,
char separator) {
for (int i = 0; i < item.length(); i++) {
char ch = item.charAt(i);
if (ch == '\\' || ch == separator) {
out.append('\\');
}
out.append(ch);
}
}
}

View File

@ -58,6 +58,7 @@ import org.apache.solr.request.SolrQueryRequest;
import org.apache.solr.request.SolrRequestHandler;
import org.apache.solr.schema.IndexSchema;
import org.apache.solr.schema.SchemaField;
import org.apache.solr.schema.TrieDateField;
import org.apache.solr.search.SolrIndexSearcher;
import org.apache.solr.servlet.DirectSolrConnection;
import org.apache.solr.util.AbstractSolrTestCase;
@ -93,11 +94,13 @@ import java.net.URL;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Comparator;
import java.util.Date;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Locale;
import java.util.Map;
import java.util.Properties;
import java.util.logging.ConsoleHandler;
@ -2050,5 +2053,44 @@ public abstract class SolrTestCaseJ4 extends LuceneTestCase {
return true;
}
/**
* Returns <code>likely</code> most (1/10) of the time, otherwise <code>unlikely</code>
*/
public static Object skewed(Object likely, Object unlikely) {
return (0 == TestUtil.nextInt(random(), 0, 9)) ? unlikely : likely;
}
/**
* Returns a randomly generated Date in the appropriate Solr external (input) format
* @see #randomSkewedDate
*/
public static String randomDate() {
return TrieDateField.formatExternal(new Date(random().nextLong()));
}
/**
* Returns a Date such that all results from this method always have the same values for
* year+month+day+hour+minute but the seconds are randomized. This can be helpful for
* indexing documents with random date values that are biased for a narrow window
* (one day) to test collisions/overlaps
*
* @see #randomDate
*/
public static String randomSkewedDate() {
return String.format(Locale.ROOT, "2010-10-31T10:31:%02d.000Z",
TestUtil.nextInt(random(), 0, 59));
}
/**
* We want "realistic" unicode strings beyond simple ascii, but because our
* updates use XML we need to ensure we don't get "special" code block.
*/
public static String randomXmlUsableUnicodeString() {
String result = TestUtil.randomRealisticUnicodeString(random());
if (result.matches(".*\\p{InSpecials}.*")) {
result = TestUtil.randomSimpleString(random());
}
return result;
}
}