SOLR-5743: introducing BlockJoinFacet*Component which are acting on child.facet.field request parameters

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1721644 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Mikhail Khludnev 2015-12-24 17:04:56 +00:00
parent ef2aa314c5
commit 2897b29d53
14 changed files with 1865 additions and 9 deletions

View File

@ -190,8 +190,40 @@ public class ToParentBlockJoinQuery extends Query {
return Explanation.noMatch("Not a match");
}
}
static class BlockJoinScorer extends Scorer {
/**
* Ascendant for {@link ToParentBlockJoinQuery}'s scorer.
* @lucene.experimental it might be removed at <b>6.0</b>
* */
public static abstract class ChildrenMatchesScorer extends Scorer{
/** inherited constructor */
protected ChildrenMatchesScorer(Weight weight) {
super(weight);
}
/**
* enables children matches recording
* */
public abstract void trackPendingChildHits() ;
/**
* reports matched children
* @return number of recorded matched children docs
* */
public abstract int getChildCount() ;
/**
* reports matched children
* @param other array for recording matching children docs of next parent,
* it might be null (that's slower) or the same array which was returned
* from the previous call
* @return array with {@link #getChildCount()} matched children docnums
* */
public abstract int[] swapChildDocs(int[] other);
}
static class BlockJoinScorer extends ChildrenMatchesScorer{
private final Scorer childScorer;
private final BitSet parentBits;
private final ScoreMode scoreMode;
@ -218,7 +250,8 @@ public class ToParentBlockJoinQuery extends Query {
return Collections.singleton(new ChildScorer(childScorer, "BLOCK_JOIN"));
}
int getChildCount() {
@Override
public int getChildCount() {
return childDocUpto;
}
@ -226,7 +259,8 @@ public class ToParentBlockJoinQuery extends Query {
return parentDoc;
}
int[] swapChildDocs(int[] other) {
@Override
public int[] swapChildDocs(int[] other) {
final int[] ret = pendingChildDocs;
if (other == null) {
pendingChildDocs = new int[5];
@ -413,6 +447,7 @@ public class ToParentBlockJoinQuery extends Query {
/**
* Instructs this scorer to keep track of the child docIds and score ids for retrieval purposes.
*/
@Override
public void trackPendingChildHits() {
pendingChildDocs = new int[5];
if (scoreMode != ScoreMode.None) {

View File

@ -242,6 +242,9 @@ New Features
* SOLR-8428: RuleBasedAuthorizationPlugin adds an 'all' permission (noble)
* SOLR-5743: BlockJoinFacetComponent and BlockJoinDocSetFacetComponent for calculating facets by
child.facet.field parameter with {!parent ..}.. query. They count facets on children documents
aggregating (deduplicating) counts by parent documents (Dr. Oleg Savrasov via Mikhail Khludnev)
Bug Fixes
----------------------

View File

@ -1375,7 +1375,7 @@ public class SolrIndexSearcher extends IndexSearcher implements Closeable,SolrIn
return qr.getDocList();
}
static final int NO_CHECK_QCACHE = 0x80000000;
public static final int NO_CHECK_QCACHE = 0x80000000;
public static final int GET_DOCSET = 0x40000000;
static final int NO_CHECK_FILTERCACHE = 0x20000000;
static final int NO_SET_QCACHE = 0x10000000;

View File

@ -0,0 +1,220 @@
package org.apache.solr.search.join;
import java.io.IOException;
import java.util.List;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.search.BooleanClause;
import org.apache.lucene.search.BooleanQuery;
import org.apache.lucene.search.Collector;
import org.apache.lucene.search.DocIdSet;
import org.apache.lucene.search.DocIdSetIterator;
import org.apache.lucene.search.LeafCollector;
import org.apache.lucene.search.MatchAllDocsQuery;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.join.ToParentBlockJoinQuery;
import org.apache.solr.common.SolrException;
import org.apache.solr.handler.component.ResponseBuilder;
import org.apache.solr.request.SolrQueryRequest;
import org.apache.solr.search.BitDocSet;
import org.apache.solr.search.DocSet;
import org.apache.solr.search.Filter;
import org.apache.solr.search.QueryContext;
import org.apache.solr.search.facet.BlockJoin;
import org.apache.solr.search.join.BlockJoinFieldFacetAccumulator.AggregatableDocIter;
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/**
* It does the same as BlockJoinFacetComponent, but operates on docsets,
* it should be faster for static mostly indexes. This component doesn't impact
* query result caching, but hits filter cache to retrieve docsets.
* */
public class BlockJoinDocSetFacetComponent extends BlockJoinFacetComponent {
private final String bjqKey = this.getClass().getSimpleName()+".bjq";
private static final class SegmentChildren implements AggregatableDocIter {
private final BitDocSet allParentsBitsDocSet;
private int nextDoc = DocIdSetIterator.NO_MORE_DOCS;
private DocIdSetIterator disi;
private int currentParent=-1;
final LeafReaderContext segment;
final DocIdSet childrenMatches;
private SegmentChildren(LeafReaderContext subCtx, DocIdSet dis, BitDocSet allParentsBitsDocSet) {
this.allParentsBitsDocSet = allParentsBitsDocSet;
this.childrenMatches = dis;
this.segment = subCtx;
reset();
}
@Override
public Integer next() {
return nextDoc();
}
@Override
public boolean hasNext() {
return nextDoc != DocIdSetIterator.NO_MORE_DOCS;
}
@Override
public float score() {
return 0;
}
@Override
public int nextDoc() {
int lastDoc = nextDoc;
assert nextDoc != DocIdSetIterator.NO_MORE_DOCS;
if (lastDoc>currentParent) { // we passed the previous block, and need to reevaluate a parent
currentParent = allParentsBitsDocSet.getBits().nextSetBit(lastDoc+segment.docBase)-segment.docBase;
}
try {
nextDoc = disi.nextDoc();
} catch (IOException e) {
throw new RuntimeException(e);
}
return lastDoc;
}
@Override
public void reset() {
currentParent=-1;
try {
disi = childrenMatches.iterator();
if (disi != null) {
nextDoc = disi.nextDoc();
}else{
nextDoc = DocIdSetIterator.NO_MORE_DOCS;
}
} catch (IOException e) {
throw new RuntimeException(e);
}
}
@Override
public int getAggKey() {
return currentParent;
}
}
private static final class NoDelegateFacetCollector extends BlockJoinFacetCollector {
{
setDelegate(new Collector() {
@Override
public boolean needsScores() {
return false;
}
@Override
public LeafCollector getLeafCollector(LeafReaderContext context) throws IOException {
return null;
}
});
}
private NoDelegateFacetCollector(SolrQueryRequest req) throws IOException {
super(req);
}
}
public BlockJoinDocSetFacetComponent() {}
@Override
public void prepare(ResponseBuilder rb) throws IOException {
if (getChildFacetFields(rb.req) != null) {
validateQuery(rb.getQuery());
rb.setNeedDocSet(true);
rb.req.getContext().put(bjqKey, extractChildQuery(rb.getQuery()));
}
}
private ToParentBlockJoinQuery extractChildQuery(Query query) {
if (!(query instanceof ToParentBlockJoinQuery)) {
if (query instanceof BooleanQuery) {
List<BooleanClause> clauses = ((BooleanQuery) query).clauses();
ToParentBlockJoinQuery once = null;
for (BooleanClause clause : clauses) {
if (clause.getQuery() instanceof ToParentBlockJoinQuery) {
if (once==null) {
once = (ToParentBlockJoinQuery) clause.getQuery();
} else {
throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "can't choose between " +
once + " and " + clause.getQuery());
}
}
}
if (once!=null) {
return once;
}
}
throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, NO_TO_PARENT_BJQ_MESSAGE);
}
else{
return (ToParentBlockJoinQuery) query;
}
}
@Override
public void process(ResponseBuilder rb) throws IOException {
final BlockJoinParentQParser.AllParentsAware bjq =
(BlockJoinParentQParser.AllParentsAware) rb.req.getContext().get(bjqKey);
if(bjq!=null){
final DocSet parentResult = rb.getResults().docSet;
final BitDocSet allParentsBitsDocSet = rb.req.getSearcher().getDocSetBits(bjq.getParentQuery());
final DocSet allChildren = BlockJoin.toChildren(parentResult,
allParentsBitsDocSet,
rb.req.getSearcher().getDocSetBits( new MatchAllDocsQuery() ),
QueryContext.newContext(rb.req.getSearcher()));
final DocSet childQueryDocSet = rb.req.getSearcher().getDocSet(bjq.getChildQuery());
final DocSet selectedChildren = allChildren.intersection(childQueryDocSet);
// don't include parent into facet counts
//childResult = childResult.union(parentResult);// just to mimic the current logic
final List<LeafReaderContext> leaves = rb.req.getSearcher().getIndexReader().leaves();
Filter filter = selectedChildren.getTopFilter();
final BlockJoinFacetCollector facetCounter = new NoDelegateFacetCollector(rb.req);
for (int subIdx = 0; subIdx < leaves.size(); subIdx++) {
LeafReaderContext subCtx = leaves.get(subIdx);
DocIdSet dis = filter.getDocIdSet(subCtx, null); // solr docsets already exclude any deleted docs
AggregatableDocIter iter = new SegmentChildren(subCtx, dis, allParentsBitsDocSet);
if (iter.hasNext()){
facetCounter.doSetNextReader(subCtx);
facetCounter.countFacets(iter);
}
}
facetCounter.finish();
rb.req.getContext().put(COLLECTOR_CONTEXT_PARAM,facetCounter);
super.process(rb);
}
}
}

View File

@ -0,0 +1,132 @@
package org.apache.solr.search.join;
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import java.io.IOException;
import java.util.LinkedList;
import java.util.Queue;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.search.Scorer;
import org.apache.lucene.search.join.ToParentBlockJoinQuery.ChildrenMatchesScorer;
import org.apache.solr.common.util.NamedList;
import org.apache.solr.request.SolrQueryRequest;
import org.apache.solr.search.DelegatingCollector;
import org.apache.solr.search.join.BlockJoinFieldFacetAccumulator.AggregatableDocIter;
import org.apache.solr.search.join.BlockJoinFieldFacetAccumulator.SortedIntsAggDocIterator;
/**
* For each collected parent document creates matched block, which is a docSet with matched children and parent doc
* itself. Then updates each BlockJoinFieldFacetAccumulator with the created matched block.
*/
class BlockJoinFacetCollector extends DelegatingCollector {
private BlockJoinFieldFacetAccumulator[] blockJoinFieldFacetAccumulators;
private boolean firstSegment = true;
private ChildrenMatchesScorer blockJoinScorer;
private int[] childDocs = new int[0];
BlockJoinFacetCollector(SolrQueryRequest req) throws IOException {
String[] facetFieldNames = BlockJoinFacetComponent.getChildFacetFields(req);
assert facetFieldNames != null;
blockJoinFieldFacetAccumulators = new BlockJoinFieldFacetAccumulator[facetFieldNames.length];
for (int i = 0; i < facetFieldNames.length; i++) {
blockJoinFieldFacetAccumulators[i] = new BlockJoinFieldFacetAccumulator(facetFieldNames[i], req.getSearcher());
}
}
@Override
public void setScorer(Scorer scorer) throws IOException {
super.setScorer(scorer);
blockJoinScorer = getToParentScorer(scorer, new LinkedList<Scorer>());
if (blockJoinScorer != null) {
// instruct scorer to keep track of the child docIds for retrieval purposes.
blockJoinScorer.trackPendingChildHits();
}
}
private ChildrenMatchesScorer getToParentScorer(Scorer scorer, Queue<Scorer> queue) {
if (scorer == null || scorer instanceof ChildrenMatchesScorer) {
return (ChildrenMatchesScorer) scorer;
} else {
for (Scorer.ChildScorer child : scorer.getChildren()) {
queue.add(child.child);
}
return getToParentScorer(queue.poll(), queue);
}
}
@Override
protected void doSetNextReader(LeafReaderContext context) throws IOException {
for (BlockJoinFieldFacetAccumulator blockJoinFieldFacetAccumulator : blockJoinFieldFacetAccumulators) {
if(!firstSegment){
blockJoinFieldFacetAccumulator.migrateGlobal();
}
blockJoinFieldFacetAccumulator.setNextReader(context);
}
firstSegment = false;
super.doSetNextReader(context);
}
@Override
public void collect(int doc) throws IOException {
incrementFacets(doc);
super.collect(doc);
}
@Override
public void finish() throws IOException {
for (BlockJoinFieldFacetAccumulator blockJoinFieldFacetAccumulator : blockJoinFieldFacetAccumulators) {
blockJoinFieldFacetAccumulator.migrateGlobal();
}
super.finish();
}
protected void incrementFacets(int parent) throws IOException {
final int[] docNums = blockJoinScorer.swapChildDocs(childDocs);
// now we don't
//includeParentDoc(parent);
//final int childCountPlusParent = childTracking.getChildCount()+1;
final int childCountNoParent = blockJoinScorer.getChildCount();
final SortedIntsAggDocIterator iter = new SortedIntsAggDocIterator(docNums, childCountNoParent, parent);
countFacets(iter);
}
/** is not used
protected int[] includeParentDoc(int parent) {
final int[] docNums = ArrayUtil.grow(childTracking.getChildDocs(), childTracking.getChildCount()+1);
childTracking.setChildDocs(docNums); // we include parent into block, I'm not sure whether it makes sense
docNums[childTracking.getChildCount()]=parent;
return docNums;
}*/
protected void countFacets(final AggregatableDocIter iter) throws IOException {
for (BlockJoinFieldFacetAccumulator blockJoinFieldFacetAccumulator : blockJoinFieldFacetAccumulators) {
blockJoinFieldFacetAccumulator.updateCountsWithMatchedBlock( iter);
}
}
NamedList getFacets() {
NamedList<NamedList<Integer>> facets = new NamedList<>(blockJoinFieldFacetAccumulators.length);
for (BlockJoinFieldFacetAccumulator state : blockJoinFieldFacetAccumulators) {
facets.add(state.getFieldName(), state.getFacetValue());
}
return facets;
}
}

View File

@ -0,0 +1,183 @@
package org.apache.solr.search.join;
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import java.io.IOException;
import java.util.LinkedList;
import java.util.List;
import java.util.Map;
import org.apache.lucene.search.BooleanClause;
import org.apache.lucene.search.BooleanQuery;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.join.ToParentBlockJoinQuery;
import org.apache.solr.common.SolrException;
import org.apache.solr.common.params.ShardParams;
import org.apache.solr.common.util.NamedList;
import org.apache.solr.handler.component.ResponseBuilder;
import org.apache.solr.handler.component.SearchComponent;
import org.apache.solr.handler.component.ShardRequest;
import org.apache.solr.handler.component.ShardResponse;
import org.apache.solr.request.SolrQueryRequest;
import org.apache.solr.search.DelegatingCollector;
import org.apache.solr.search.SolrIndexSearcher;
/**
* Calculates facets on children documents and aggregates hits by parent documents.
* Enables when child.facet.field parameter specifies a field name for faceting.
* So far it supports string fields only. It requires to search by {@link ToParentBlockJoinQuery}.
* It disables query result cache but only when it's ebaled for request by child.facet.field parameter
* */
public class BlockJoinFacetComponent extends SearchComponent {
public static final String CHILD_FACET_FIELD_PARAMETER = "child.facet.field";
public static final String NO_TO_PARENT_BJQ_MESSAGE = "Block join faceting is allowed with ToParentBlockJoinQuery only";
public static final String COLLECTOR_CONTEXT_PARAM = "blockJoinFacetCollector";
@Override
public void prepare(ResponseBuilder rb) throws IOException {
if (getChildFacetFields(rb.req) != null) {
validateQuery(rb.getQuery());
// we count facets only when searching
rb.setFieldFlags(rb.getFieldFlags() | SolrIndexSearcher.NO_CHECK_QCACHE);
if (rb.getFilters() == null) {
rb.setFilters(new LinkedList<Query>());
}
DelegatingCollector blockJoinFacetCollector = new BlockJoinFacetCollector(rb.req);
rb.req.getContext().put(COLLECTOR_CONTEXT_PARAM, blockJoinFacetCollector);
rb.getFilters().add(new BlockJoinFacetFilter(blockJoinFacetCollector));
}
}
protected void validateQuery(Query query) {
if (!(query instanceof ToParentBlockJoinQuery)) {
if (query instanceof BooleanQuery) {
List<BooleanClause> clauses = ((BooleanQuery) query).clauses();
for (BooleanClause clause : clauses) {
if (clause.getQuery() instanceof ToParentBlockJoinQuery) {
return;
}
}
}
throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, NO_TO_PARENT_BJQ_MESSAGE);
}
}
static String[] getChildFacetFields(SolrQueryRequest req) {
return req.getParams().getParams(CHILD_FACET_FIELD_PARAMETER);
}
@Override
public void process(ResponseBuilder rb) throws IOException {
if (getChildFacetFields(rb.req) != null) {
BlockJoinFacetCollector blockJoinFacetCollector = (BlockJoinFacetCollector) rb.req.getContext().get(COLLECTOR_CONTEXT_PARAM);
assert blockJoinFacetCollector != null;
NamedList output;
if (isShard(rb)) {
// distributed search, put results into own cell in order not to clash with facet component
output = getChildFacetFields(rb.rsp.getValues(), true);
} else {
// normal process, put results into standard response
output = getFacetFieldsList(rb);
}
mergeFacets(output, blockJoinFacetCollector.getFacets());
}
}
private boolean isShard(ResponseBuilder rb) {
return "true".equals(rb.req.getParams().get(ShardParams.IS_SHARD));
}
private NamedList getChildFacetFields(NamedList responseValues, boolean createIfAbsent) {
return getNamedListFromList(responseValues, "child_facet_fields", createIfAbsent);
}
private void mergeFacets(NamedList childFacetFields, NamedList shardFacets) {
if (shardFacets != null) {
for (Map.Entry<String, NamedList<Integer>> nextShardFacet : (Iterable<Map.Entry<String, NamedList<Integer>>>) shardFacets) {
String fieldName = nextShardFacet.getKey();
NamedList<Integer> collectedFacet = (NamedList<Integer>) childFacetFields.get(fieldName);
NamedList<Integer> shardFacet = nextShardFacet.getValue();
if (collectedFacet == null) {
childFacetFields.add(fieldName, shardFacet);
} else {
mergeFacetValues(collectedFacet, shardFacet);
}
}
}
}
private void mergeFacetValues(NamedList<Integer> collectedFacetValue, NamedList<Integer> shardFacetValue) {
for (Map.Entry<String, Integer> nextShardValue : shardFacetValue) {
String facetValue = nextShardValue.getKey();
Integer shardCount = nextShardValue.getValue();
int indexOfCollectedValue = collectedFacetValue.indexOf(facetValue, 0);
if (indexOfCollectedValue == -1) {
collectedFacetValue.add(facetValue, shardCount);
} else {
int newCount = collectedFacetValue.getVal(indexOfCollectedValue) + shardCount;
collectedFacetValue.setVal(indexOfCollectedValue, newCount);
}
}
}
private NamedList getNamedListFromList(NamedList parentList, String name, boolean createIfAbsent) {
NamedList result = null;
if (parentList != null) {
result = (NamedList) parentList.get(name);
if (result == null && createIfAbsent) {
result = new NamedList();
parentList.add(name, result);
}
}
return result;
}
@Override
public void handleResponses(ResponseBuilder rb, ShardRequest sreq) {
NamedList collectedChildFacetFields = getChildFacetFields(rb.rsp.getValues(), true);
List<ShardResponse> responses = sreq.responses;
for (ShardResponse shardResponse : responses) {
NamedList shardChildFacetFields = getChildFacetFields(shardResponse.getSolrResponse().getResponse(), false);
mergeFacets(collectedChildFacetFields, shardChildFacetFields);
}
}
@Override
public void finishStage(ResponseBuilder rb) {
if (rb.stage != ResponseBuilder.STAGE_GET_FIELDS) return;
NamedList childFacetFields = getChildFacetFields(rb.rsp.getValues(), true);
NamedList facetFields = getFacetFieldsList(rb);
for (Map.Entry<String, NamedList> childFacetField : (Iterable<Map.Entry<String, NamedList>>) childFacetFields) {
facetFields.add(childFacetField.getKey(), childFacetField.getValue());
}
rb.rsp.getValues().remove("child_facet_fields");
}
private NamedList getFacetFieldsList(ResponseBuilder rb) {
NamedList facetCounts = getNamedListFromList(rb.rsp.getValues(), "facet_counts", true);
return getNamedListFromList(facetCounts, "facet_fields", true);
}
@Override
public String getDescription() {
return "BlockJoin facet component";
}
}

View File

@ -0,0 +1,74 @@
package org.apache.solr.search.join;
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.solr.search.DelegatingCollector;
import org.apache.solr.search.PostFilter;
class BlockJoinFacetFilter extends Query implements PostFilter {
public static final int COST = 120;
private DelegatingCollector blockJoinFacetCollector;
public BlockJoinFacetFilter(DelegatingCollector blockJoinFacetCollector) {
super();
this.blockJoinFacetCollector = blockJoinFacetCollector;
}
@Override
public String toString(String field) {
return null;
}
@Override
public DelegatingCollector getFilterCollector(IndexSearcher searcher) {
return blockJoinFacetCollector;
}
@Override
public boolean getCache() {
return false;
}
@Override
public void setCache(boolean cache) {
}
@Override
public int getCost() {
return COST;
}
@Override
public void setCost(int cost) {
}
@Override
public boolean getCacheSep() {
return false;
}
@Override
public void setCacheSep(boolean cacheSep) {
}
}

View File

@ -0,0 +1,221 @@
package org.apache.solr.search.join;
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import java.io.IOException;
import java.util.Arrays;
import org.apache.lucene.index.DocValues;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.index.MultiDocValues;
import org.apache.lucene.index.MultiDocValues.OrdinalMap;
import org.apache.lucene.index.SortedDocValues;
import org.apache.lucene.index.SortedSetDocValues;
import org.apache.lucene.util.ArrayUtil;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.CharsRefBuilder;
import org.apache.lucene.util.LongValues;
import org.apache.solr.common.util.NamedList;
import org.apache.solr.request.DocValuesFacets;
import org.apache.solr.schema.FieldType;
import org.apache.solr.schema.SchemaField;
import org.apache.solr.search.DocIterator;
import org.apache.solr.search.SolrIndexSearcher;
/**
* This class is responsible for collecting block join facet counts for particular field
*/
class BlockJoinFieldFacetAccumulator {
private String fieldName;
private FieldType fieldType;
private int currentSegment = -1;
// for term lookups only
private SortedSetDocValues topSSDV;
private int[] globalCounts;
private SortedSetDocValues segmentSSDV;
// elems are : facet value counter<<32 | last parent doc num
private long[] segmentAccums = new long[0];
// for mapping per-segment ords to global ones
private MultiDocValues.OrdinalMap ordinalMap;
private SchemaField schemaField;
private SortedDocValues segmentSDV;
BlockJoinFieldFacetAccumulator(String fieldName, SolrIndexSearcher searcher) throws IOException {
this.fieldName = fieldName;
schemaField = searcher.getSchema().getField(fieldName);
fieldType = schemaField.getType();
ordinalMap = null;
if (schemaField.multiValued()) {
topSSDV = searcher.getLeafReader().getSortedSetDocValues(fieldName);
if (topSSDV instanceof MultiDocValues.MultiSortedSetDocValues) {
ordinalMap = ((MultiDocValues.MultiSortedSetDocValues) topSSDV).mapping;
}
} else {
SortedDocValues single = searcher.getLeafReader().getSortedDocValues(fieldName);
topSSDV = single == null ? null : DocValues.singleton(single);// npe friendly code
if (single instanceof MultiDocValues.MultiSortedDocValues) {
ordinalMap = ((MultiDocValues.MultiSortedDocValues) single).mapping;
}
}
}
private boolean initSegmentData(String fieldName, LeafReaderContext leaf) throws IOException {
segmentSSDV = DocValues.getSortedSet(leaf.reader(), fieldName);
segmentAccums = ArrayUtil.grow(segmentAccums, (int)segmentSSDV.getValueCount()+1);//+1
// zero counts, -1 parent
Arrays.fill(segmentAccums,0,(int)segmentSSDV.getValueCount()+1, 0x00000000ffffffffL);
segmentSDV = DocValues.unwrapSingleton(segmentSSDV);
return segmentSSDV.getValueCount()!=0;// perhaps we need to count "missings"??
}
interface AggregatableDocIter extends DocIterator {
void reset();
/** a key to aggregate the current document */
int getAggKey();
}
static class SortedIntsAggDocIterator implements AggregatableDocIter {
private int[] childDocs;
private int childCount;
private int parentDoc;
private int pos=-1;
public SortedIntsAggDocIterator(int[] childDocs, int childCount, int parentDoc) {
this.childDocs = childDocs;
this.childCount = childCount;
this.parentDoc = parentDoc;
}
@Override
public boolean hasNext() {
return pos<childCount;
}
@Override
public Integer next() {
return nextDoc();
}
@Override
public int nextDoc() {
return childDocs[pos++];
}
@Override
public float score() {
return 0;
}
@Override
public void reset() {
pos=0;
}
@Override
public int getAggKey(){
return parentDoc;
}
}
void updateCountsWithMatchedBlock(AggregatableDocIter iter) throws IOException {
if (segmentSDV != null) {
// some codecs may optimize SORTED_SET storage for single-valued fields
for (iter.reset(); iter.hasNext(); ) {
final int docNum = iter.nextDoc();
int term = segmentSDV.getOrd(docNum);
accumulateTermOrd(term, iter.getAggKey());
//System.out.println("doc# "+docNum+" "+fieldName+" term# "+term+" tick "+Long.toHexString(segmentAccums[1+term]));
}
} else {
for (iter.reset(); iter.hasNext(); ) {
final int docNum = iter.nextDoc();
segmentSSDV.setDocument(docNum);
int term = (int) segmentSSDV.nextOrd();
do { // absent values are designated by term=-1, first iteration counts [0] as "missing", and exit, otherwise it spins
accumulateTermOrd(term, iter.getAggKey());
} while (term>=0 && (term = (int) segmentSSDV.nextOrd()) >= 0);
}
}
}
String getFieldName() {
return fieldName;
}
/** copy paste from {@link DocValuesFacets} */
NamedList<Integer> getFacetValue() {
NamedList<Integer> facetValue = new NamedList<>();
final CharsRefBuilder charsRef = new CharsRefBuilder(); // if there is no globs, take segment's ones
for (int i = 1; i< (globalCounts!=null ? globalCounts.length: segmentAccums.length); i++) {
int count = globalCounts!=null ? globalCounts[i] : (int)(segmentAccums [i]>>32);
if (count > 0) {
BytesRef term = topSSDV.lookupOrd(-1 + i);
fieldType.indexedToReadable(term, charsRef);
facetValue.add(charsRef.toString(), count);
}
}
return facetValue;
}
// @todo we can track in max term nums to loop only changed range while migrating and labeling
private void accumulateTermOrd(int term, int parentDoc) {
long accum = segmentAccums[1+term];
if(((int)(accum & 0xffffffffL))!=parentDoc)
{// incrementing older 32, reset smaller 32, set them to the new parent
segmentAccums[1+term] = ((accum +(0x1L<<32))&0xffffffffL<<32)|parentDoc;
}
}
void setNextReader(LeafReaderContext context) throws IOException {
initSegmentData(fieldName, context);
currentSegment = context.ord;
}
void migrateGlobal(){
if (currentSegment<0 // no hits
|| segmentAccums.length==0
|| ordinalMap==null) { // single segment
return;
}
if(globalCounts==null){
// it might be just a single segment
globalCounts = new int[(int) ordinalMap.getValueCount()+ /*[0] for missing*/1];
}else{
assert currentSegment>=0;
}
migrateGlobal(globalCounts, segmentAccums, currentSegment, ordinalMap);
}
/** folds counts in segment ordinal space (segCounts) into global ordinal space (counts)
* copy paste-from {@link DocValuesFacets#migrateGlobal(int[], int[], int, OrdinalMap)}*/
void migrateGlobal(int counts[], long segCounts[], int subIndex, OrdinalMap map) {
final LongValues ordMap = map.getGlobalOrds(subIndex);
// missing count
counts[0] += (int) (segCounts[0]>>32);
// migrate actual ordinals
for (int ord = 1; ord <= segmentSSDV.getValueCount(); ord++) {
int count = (int) (segCounts[ord]>>32);
if (count != 0) {
counts[1+(int) ordMap.get(ord-1)] += count;
}
}
}
}

View File

@ -39,7 +39,7 @@ import org.apache.solr.search.SolrCache;
import org.apache.solr.search.SolrConstantScoreQuery;
import org.apache.solr.search.SyntaxError;
class BlockJoinParentQParser extends QParser {
public class BlockJoinParentQParser extends QParser {
/** implementation detail subject to change */
public String CACHE_NAME="perSegFilter";
@ -70,9 +70,8 @@ class BlockJoinParentQParser extends QParser {
return createQuery(parentQ, childrenQuery, scoreMode);
}
protected Query createQuery(Query parentList, Query query, String scoreMode) throws SyntaxError {
return new ToParentBlockJoinQuery(query, getFilter(parentList).filter,
ScoreModeParser.parse(scoreMode));
protected Query createQuery(final Query parentList, Query query, String scoreMode) throws SyntaxError {
return new AllParentsAware(query, getFilter(parentList).filter, ScoreModeParser.parse(scoreMode), parentList);
}
BitDocIdSetFilterWrapper getFilter(Query parentList) {
@ -98,6 +97,20 @@ class BlockJoinParentQParser extends QParser {
return new QueryBitSetProducer(parentQ);
}
static final class AllParentsAware extends ToParentBlockJoinQuery {
private final Query parentQuery;
private AllParentsAware(Query childQuery, BitSetProducer parentsFilter, ScoreMode scoreMode,
Query parentList) {
super(childQuery, parentsFilter, scoreMode);
parentQuery = parentList;
}
public Query getParentQuery(){
return parentQuery;
}
}
// We need this wrapper since BitDocIdSetFilter does not extend Filter
static class BitDocIdSetFilterWrapper extends Filter {

View File

@ -0,0 +1,46 @@
<?xml version="1.0" ?>
<!--
Licensed to the Apache Software Foundation (ASF) under one or more
contributor license agreements. See the NOTICE file distributed with
this work for additional information regarding copyright ownership.
The ASF licenses this file to You under the Apache License, Version 2.0
(the "License"); you may not use this file except in compliance with
the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
-->
<schema name="test" version="1.0">
<types>
<fieldType name="int" class="solr.TrieIntField" precisionStep="0" positionIncrementGap="0"/>
<fieldType name="float" class="solr.TrieFloatField" precisionStep="0" positionIncrementGap="0"/>
<fieldType name="long" class="solr.TrieLongField" precisionStep="0" omitNorms="true" positionIncrementGap="0"/>
<fieldtype name="string" class="solr.StrField" sortMissingLast="true"/>
</types>
<fields>
<field name="id" type="int" indexed="true" stored="true" multiValued="false" required="false"/>
<field name="_version_" type="long" indexed="true" stored="true" multiValued="false"/>
<field name="_root_" type="int" indexed="true" stored="true" multiValued="false" required="false"/>
<field name="name" type="string" indexed="true" stored="true"/>
<dynamicField name="*_s" type="string" indexed="true" stored="true" multiValued="false"/>
<!-- facet docValues fields -->
<dynamicField name="*_s_single" type="string" indexed="true" stored="true" docValues="true" multiValued="false"/>
<dynamicField name="*_s_multi" type="string" indexed="true" stored="true" docValues="true" multiValued="true"/>
<dynamicField name="*_i_multi" type="int" indexed="true" stored="true" docValues="true" multiValued="true"/>
<dynamicField name="*_f_multi" type="float" indexed="true" stored="true" docValues="true" multiValued="true"/>
</fields>
<defaultSearchField>name</defaultSearchField>
<uniqueKey>id</uniqueKey>
</schema>

View File

@ -0,0 +1,59 @@
<?xml version="1.0" ?>
<!--
Licensed to the Apache Software Foundation (ASF) under one or more
contributor license agreements. See the NOTICE file distributed with
this work for additional information regarding copyright ownership.
The ASF licenses this file to You under the Apache License, Version 2.0
(the "License"); you may not use this file except in compliance with
the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
-->
<config>
<luceneMatchVersion>${tests.luceneMatchVersion:LATEST}</luceneMatchVersion>
<directoryFactory name="DirectoryFactory" class="${solr.directoryFactory:solr.RAMDirectoryFactory}"/>
<schemaFactory class="ClassicIndexSchemaFactory"/>
<dataDir>${solr.data.dir:}</dataDir>
<requestHandler name="standard" class="solr.StandardRequestHandler">
</requestHandler>
<requestHandler name="/replication" class="solr.ReplicationHandler" startup="lazy" />
<updateHandler class="solr.DirectUpdateHandler2">
<updateLog>
<str name="dir">${solr.ulog.dir:}</str>
</updateLog>
</updateHandler>
<searchComponent name="blockJoinFacet" class="org.apache.solr.search.join.BlockJoinFacetComponent"/>
<searchComponent name="blockJoinDocSetFacet" class="org.apache.solr.search.join.BlockJoinDocSetFacetComponent"/>
<requestHandler name="blockJoinFacetRH" class="org.apache.solr.handler.component.SearchHandler">
<lst name="defaults">
<str name="shards.qt">blockJoinFacetRH</str>
</lst>
<arr name="last-components">
<str>blockJoinFacet</str>
</arr>
</requestHandler>
<requestHandler name="blockJoinDocSetFacetRH" class="org.apache.solr.handler.component.SearchHandler">
<lst name="defaults">
<str name="shards.qt">blockJoinDocSetFacetRH</str>
</lst>
<arr name="last-components">
<str>blockJoinDocSetFacet</str>
</arr>
</requestHandler>
</config>

View File

@ -0,0 +1,137 @@
package org.apache.solr.search.join;
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import java.util.Arrays;
import java.util.Collections;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;
import org.apache.lucene.util.LuceneTestCase.Slow;
import org.apache.solr.BaseDistributedSearchTestCase;
import org.apache.solr.client.solrj.response.FacetField;
import org.apache.solr.client.solrj.response.FacetField.Count;
import org.apache.solr.client.solrj.response.QueryResponse;
import org.apache.solr.common.SolrInputDocument;
import org.apache.solr.common.util.NamedList;
import org.junit.BeforeClass;
@Slow
public class BlockJoinFacetDistribTest extends BaseDistributedSearchTestCase {
@BeforeClass
public static void beforeSuperClass() throws Exception {
schemaString = "schema-blockjoinfacetcomponent.xml";
configString = "solrconfig-blockjoinfacetcomponent.xml";
}
@ShardsFixed(num = 3)
public void test() throws Exception {
testBJQFacetComponent();
}
final static List<String> colors = Arrays.asList("red","blue","brown","white","black","yellow","cyan","magenta","blur",
"fuchsia", "light","dark","green","grey","don't","know","any","more" );
final static List<String> sizes = Arrays.asList("s","m","l","xl","xxl","xml","xxxl","3","4","5","6","petite","maxi");
private void testBJQFacetComponent() throws Exception {
assert ! colors.removeAll(sizes): "there is no colors in sizes";
Collections.shuffle(colors,random());
List<String> matchingColors = colors.subList(0, Math.min(atLeast(random(), 2), colors.size()));
Map<String, Set<Integer>> parentIdsByAttrValue = new HashMap<String, Set<Integer>>(){
@Override
public Set<Integer> get(Object key) {
return super.get(key)==null && put((String)key, new HashSet<>())==null?super.get(key):super.get(key);
}
};
final int parents = atLeast(10);
boolean aggregationOccurs = false;
for(int parent=0; parent<parents || !aggregationOccurs;parent++){
assert parent < 2000000 : "parent num "+parent+
" aggregationOccurs:"+aggregationOccurs+". Sorry! too tricky loop condition.";
SolrInputDocument pdoc = new SolrInputDocument();
pdoc.addField("id", parent);
pdoc.addField("type_s", "parent");
final String parentBrand = "brand"+(random().nextInt(5));
pdoc.addField("BRAND_s", parentBrand);
for(int child=0; child<atLeast(colors.size()/2);child++){
SolrInputDocument childDoc= new SolrInputDocument();
final String color = colors.get(random().nextInt(colors.size()));
childDoc.addField("COLOR_s", color);
final String size = sizes.get(random().nextInt(sizes.size()));
childDoc.addField("SIZE_s", size);
if(matchingColors.contains(color)){
final boolean colorDupe = !parentIdsByAttrValue.get(color).add(parent);
final boolean sizeDupe = !parentIdsByAttrValue.get(size).add(parent);
aggregationOccurs |= colorDupe || sizeDupe;
}
pdoc.addChildDocument(childDoc);
}
indexDoc(pdoc);
}
commit();
//handle.clear();
handle.put("timestamp", SKIPVAL);
handle.put("_version_", SKIPVAL); // not a cloud test, but may use updateLog
handle.put("maxScore", SKIP);// see org.apache.solr.TestDistributedSearch.test()
handle.put("shards", SKIP);
handle.put("distrib", SKIP);
handle.put("rid", SKIP);
handle.put("track", SKIP);
handle.put("facet_fields", UNORDERED);
handle.put("SIZE_s", UNORDERED);
handle.put("COLOR_s", UNORDERED);
// to parent query
final String childQueryClause = "COLOR_s:("+(matchingColors.toString().replaceAll("[,\\[\\]]", " "))+")";
QueryResponse results = query("q", "{!parent which=\"type_s:parent\"}"+childQueryClause,
"facet", random().nextBoolean() ? "true":"false",
"qt", random().nextBoolean() ? "blockJoinDocSetFacetRH" : "blockJoinFacetRH",
"child.facet.field", "COLOR_s",
"child.facet.field", "SIZE_s",
"rows","0" // we care only abt results
);
NamedList<Object> resultsResponse = results.getResponse();
assertNotNull(resultsResponse);
FacetField color_s = results.getFacetField("COLOR_s");
FacetField size_s = results.getFacetField("SIZE_s");
String msg = ""+parentIdsByAttrValue+" "+color_s+" "+size_s;
for (FacetField facet: new FacetField[]{color_s, size_s}) {
for (Count c : facet.getValues()) {
assertEquals(c.getName()+"("+msg+")", parentIdsByAttrValue.get(c.getName()).size(), c.getCount());
}
}
assertEquals(msg , parentIdsByAttrValue.size(),color_s.getValueCount() + size_s.getValueCount());
// }
}
protected String getCloudSolrConfig() {
return configString;
}
}

View File

@ -0,0 +1,635 @@
package org.apache.solr.search.join;
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.HashSet;
import java.util.LinkedList;
import java.util.List;
import java.util.Set;
import org.apache.solr.SolrTestCaseJ4;
import org.apache.solr.common.SolrException;
import org.junit.AfterClass;
import org.junit.BeforeClass;
import org.junit.Test;
public class BlockJoinFacetRandomTest extends SolrTestCaseJ4 {
private static String handler;
private static final int NUMBER_OF_PARENTS = 10;
private static final int NUMBER_OF_VALUES = 5;
private static final int NUMBER_OF_CHILDREN = 5;
private static final String[] facetFields = {"brand", "category", "color", "size", "type"};
private static final String[] otherValues = {"x_", "y_", "z_"};
public static final String PARENT_VALUE_PREFIX = "prn_";
public static final String CHILD_VALUE_PREFIX = "chd_";
private static Facet[] facets;
@BeforeClass
public static void beforeClass() throws Exception {
initCore("solrconfig-blockjoinfacetcomponent.xml", "schema-blockjoinfacetcomponent.xml");
handler = random().nextBoolean() ? "blockJoinDocSetFacetRH":"blockJoinFacetRH";
facets = createFacets();
createIndex();
}
public static void createIndex() throws Exception {
int i = 0;
List<List<List<String>>> blocks = createBlocks();
for (List<List<String>> block : blocks) {
List<XmlDoc> updBlock = new ArrayList<>();
for (List<String> blockFields : block) {
blockFields.add("id");
blockFields.add(Integer.toString(i));
updBlock.add(doc(blockFields.toArray(new String[blockFields.size()])));
i++;
}
//got xmls for every doc. now nest all into the last one
XmlDoc parentDoc = updBlock.get(updBlock.size() - 1);
parentDoc.xml = parentDoc.xml.replace("</doc>",
updBlock.subList(0, updBlock.size() - 1).toString().replaceAll("[\\[\\]]", "") + "</doc>");
assertU(add(parentDoc));
if (random().nextBoolean()) {
assertU(commit());
// force empty segment (actually, this will no longer create an empty segment, only a new segments_n)
if (random().nextBoolean()) {
assertU(commit());
}
}
}
assertU(commit());
assertQ(req("q", "*:*"), "//*[@numFound='" + i + "']");
}
private static List<List<List<String>>> createBlocks() {
List<List<List<String>>> blocks = new ArrayList<>();
for (int i = 0; i < NUMBER_OF_PARENTS; i++) {
List<List<String>> block = createChildrenBlock(i, facets);
List<String> fieldsList = new LinkedList<>();
fieldsList.add("parent_s");
fieldsList.add(parent(i));
for (Facet facet : facets) {
for (RandomFacetValue facetValue : facet.facetValues) {
RandomParentPosting posting = facetValue.postings[i];
if (posting.parentHasOwnValue) {
fieldsList.add(facet.getFieldNameForIndex());
fieldsList.add(facetValue.facetValue);
} else if (facet.multiValued && random().nextBoolean()) {
fieldsList.add(facet.getFieldNameForIndex());
fieldsList.add(someOtherValue(facet.fieldType));
}
}
if (facet.additionalValueIsAllowedForParent(i)&&random().nextBoolean()) {
fieldsList.add(facet.getFieldNameForIndex());
fieldsList.add(someOtherValue(facet.fieldType));
}
}
block.add(fieldsList);
blocks.add(block);
}
Collections.shuffle(blocks, random());
return blocks;
}
private static List<List<String>> createChildrenBlock(int parentIndex, Facet[] facets) {
List<List<String>> block = new ArrayList<>();
for (int i = 0; i < NUMBER_OF_CHILDREN; i++) {
List<String> fieldsList = new LinkedList<>();
fieldsList.add("child_s");
fieldsList.add(child(i));
fieldsList.add("parentchild_s");
fieldsList.add(parentChild(parentIndex, i));
for (Facet facet : facets) {
for (RandomFacetValue facetValue : facet.facetValues) {
RandomParentPosting posting = facetValue.postings[parentIndex];
if (posting.childrenHaveValue[i]) {
fieldsList.add(facet.getFieldNameForIndex());
fieldsList.add(facetValue.facetValue);
} else if (facet.multiValued && random().nextBoolean()) {
fieldsList.add(facet.getFieldNameForIndex());
fieldsList.add(someOtherValue(facet.fieldType));
}
}
if (facet.additionalValueIsAllowedForChild(parentIndex,i)&&random().nextBoolean()) {
fieldsList.add(facet.getFieldNameForIndex());
fieldsList.add(someOtherValue(facet.fieldType));
}
}
block.add(fieldsList);
}
Collections.shuffle(block, random());
return block;
}
private static String parent(int docNumber) {
return fieldValue(PARENT_VALUE_PREFIX, docNumber);
}
private static String child(int docNumber) {
return fieldValue(CHILD_VALUE_PREFIX, docNumber);
}
private static String someOtherValue(FieldType fieldType) {
int randomValue = random().nextInt(NUMBER_OF_VALUES) + NUMBER_OF_VALUES;
switch (fieldType) {
case String :
int index = random().nextInt(otherValues.length);
return otherValues[index]+randomValue;
case Float:
return createFloatValue(randomValue);
default:
return String.valueOf(randomValue);
}
}
private static String createFloatValue(int intValue) {
return intValue + ".01";
}
private static String fieldValue(String valuePrefix, int docNumber) {
return valuePrefix + docNumber;
}
private static String parentChild(int parentIndex, int childIndex) {
return parent(parentIndex) + "_" + child(childIndex);
}
@AfterClass
public static void cleanUp() throws Exception {
assertU(delQ("*:*"));
optimize();
assertU((commit()));
}
@Test
public void testValidation() throws Exception {
assertQ("Component is ignored",
req("q", "+parent_s:(prn_1 prn_2)", "qt", handler)
, "//*[@numFound='2']"
, "//doc/str[@name=\"parent_s\"]='prn_1'"
, "//doc/str[@name=\"parent_s\"]='prn_2'"
);
assertQEx("Validation exception is expected because query is not ToParentBlockJoinQuery",
BlockJoinFacetComponent.NO_TO_PARENT_BJQ_MESSAGE,
req(
"q", "t",
"qt", handler,
BlockJoinFacetComponent.CHILD_FACET_FIELD_PARAMETER, facetFields[0]
),
SolrException.ErrorCode.BAD_REQUEST
);
assertQEx("Validation exception is expected because facet field is not defined in schema",
req(
"q", "{!parent which=\"parent_s:[* TO *]\"}child_s:chd_1",
"qt", handler,
BlockJoinFacetComponent.CHILD_FACET_FIELD_PARAMETER, "undefinedField"
),
SolrException.ErrorCode.BAD_REQUEST
);
}
@Test
public void testAllDocs() throws Exception {
int[] randomFacets = getRandomArray(facets.length);
assertQ("Random facets for all docs should be calculated",
req(randomFacetsRequest(null, null, null, null, null, randomFacets)),
expectedResponse(null, null, randomFacets));
}
@Test
public void testRandomParentsAllChildren() throws Exception {
int[] randomParents = getRandomArray(NUMBER_OF_PARENTS);
int[] randomFacets = getRandomArray(facets.length);
assertQ("Random facets for random parents should be calculated",
req(randomFacetsRequest(randomParents, null, null, null, null, randomFacets)),
expectedResponse(randomParents, null, randomFacets));
}
@Test
public void testRandomChildrenAllParents() throws Exception {
int[] randomChildren = getRandomArray(NUMBER_OF_CHILDREN);
int[] randomFacets = getRandomArray(facets.length);
assertQ("Random facets for all parent docs should be calculated",
req(randomFacetsRequest(null, randomChildren, null, null, null, randomFacets)),
expectedResponse(null, randomChildren, randomFacets));
}
@Test
public void testRandomChildrenRandomParents() throws Exception {
int[] randomParents = getRandomArray(NUMBER_OF_PARENTS);
int[] randomChildren = getRandomArray(NUMBER_OF_CHILDREN);
int[] randomFacets = getRandomArray(facets.length);
assertQ("Random facets for all parent docs should be calculated",
req(randomFacetsRequest(randomParents, randomChildren, null, null, null, randomFacets)),
expectedResponse(randomParents, randomChildren, randomFacets));
}
@Test
public void testRandomChildrenRandomParentsRandomRelations() throws Exception {
int[] randomParents = getRandomArray(NUMBER_OF_PARENTS);
int[] randomChildren = getRandomArray(NUMBER_OF_CHILDREN);
int[] parentRelations = getRandomArray(NUMBER_OF_PARENTS);
int[] childRelations = getRandomArray(NUMBER_OF_CHILDREN);
int[] randomFacets = getRandomArray(facets.length);
assertQ("Random facets for all parent docs should be calculated",
req(randomFacetsRequest(randomParents, randomChildren, parentRelations, childRelations, null, randomFacets)),
expectedResponse(intersection(randomParents, parentRelations),
intersection(randomChildren, childRelations), randomFacets));
}
@Test
public void testRandomFilters() throws Exception {
int[] randomParents = getRandomArray(NUMBER_OF_PARENTS);
int[] randomChildren = getRandomArray(NUMBER_OF_CHILDREN);
int[] parentRelations = getRandomArray(NUMBER_OF_PARENTS);
int[] childRelations = getRandomArray(NUMBER_OF_CHILDREN);
int[] randomParentFilters = getRandomArray(NUMBER_OF_PARENTS);
int[] randomFacets = getRandomArray(facets.length);
assertQ("Random facets for all parent docs should be calculated",
req(randomFacetsRequest(randomParents, randomChildren, parentRelations, childRelations, randomParentFilters, randomFacets)),
expectedResponse(intersection(intersection(randomParents, parentRelations), randomParentFilters),
intersection(randomChildren, childRelations), randomFacets));
}
private int[] intersection(int[] firstArray, int[] secondArray) {
Set<Integer> firstSet = new HashSet<>();
for (int i : firstArray) {
firstSet.add(i);
}
Set<Integer> secondSet = new HashSet<>();
for (int i : secondArray) {
secondSet.add(i);
}
firstSet.retainAll(secondSet);
int[] result = new int[firstSet.size()];
int i = 0;
for (Integer integer : firstSet) {
result[i++] = integer;
}
return result;
}
private String[] randomFacetsRequest(int[] parents, int[] children,
int[] parentRelations, int[] childRelations,
int[] parentFilters, int[] facetNumbers) {
List<String> params = new ArrayList<>(Arrays.asList(
"q", parentsQuery(parents),
"qt",handler,
"pq","parent_s:[* TO *]",
"chq", childrenQuery(children, parentRelations, childRelations),
"fq", flatQuery(parentFilters, "parent_s", PARENT_VALUE_PREFIX)
));
for (int facetNumber : facetNumbers) {
params .add(BlockJoinFacetComponent.CHILD_FACET_FIELD_PARAMETER);
params .add(facets[facetNumber].getFieldNameForIndex());
}
return params.toArray(new String[params.size()]);
}
private String parentsQuery(int[] parents) {
String result;
if (parents == null) {
result = "{!parent which=$pq v=$chq}";
} else {
result = flatQuery(parents, "parent_s", PARENT_VALUE_PREFIX) + " +_query_:\"{!parent which=$pq v=$chq}\"";
}
return result;
}
private String flatQuery(int[] docNumbers, final String fieldName, String fieldValuePrefix) {
String result;
if (docNumbers == null) {
result = "+" + fieldName + ":[* TO *]";
} else {
StringBuilder builder = new StringBuilder("+" + fieldName +":(");
if (docNumbers.length == 0) {
builder.append("match_nothing_value");
} else {
for (int docNumber : docNumbers) {
builder.append(fieldValue(fieldValuePrefix, docNumber));
builder.append(" ");
}
builder.deleteCharAt(builder.length() - 1);
}
builder.append(")");
result = builder.toString();
}
return result;
}
private String childrenQuery(int[] children, int[] parentRelations, int[] childRelations) {
StringBuilder builder = new StringBuilder();
builder.append(flatQuery(children, "child_s", CHILD_VALUE_PREFIX));
if (parentRelations == null) {
if (childRelations == null) {
builder.append(" +parentchild_s:[* TO *]");
} else {
builder.append(" +parentchild_s:(");
if (childRelations.length == 0) {
builder.append("match_nothing_value");
} else {
for (int childRelation : childRelations) {
for (int i = 0; i < NUMBER_OF_PARENTS; i++) {
builder.append(parentChild(i, childRelation));
builder.append(" ");
}
}
builder.deleteCharAt(builder.length() - 1);
}
builder.append(")");
}
} else {
builder.append(" +parentchild_s:(");
if (parentRelations.length == 0) {
builder.append("match_nothing_value");
} else {
if (childRelations == null) {
for (int parentRelation : parentRelations) {
for (int i = 0; i < NUMBER_OF_CHILDREN; i++) {
builder.append(parentChild(parentRelation, i));
builder.append(" ");
}
}
} else if (childRelations.length == 0) {
builder.append("match_nothing_value");
} else {
for (int parentRelation : parentRelations) {
for (int childRelation : childRelations) {
builder.append(parentChild(parentRelation, childRelation));
builder.append(" ");
}
}
builder.deleteCharAt(builder.length() - 1);
}
}
builder.append(")");
}
return builder.toString();
}
private String[] expectedResponse(int[] parents, int[] children, int[] facetNumbers) {
List<String> result = new LinkedList<>();
if (children != null && children.length == 0) {
result.add("//*[@numFound='" + 0 + "']");
} else {
if (parents == null) {
result.add("//*[@numFound='" + NUMBER_OF_PARENTS + "']");
for (int i = 0; i < NUMBER_OF_PARENTS; i++) {
result.add("//doc/str[@name=\"parent_s\"]='" + parent(i) + "'");
}
} else {
result.add("//*[@numFound='" + parents.length + "']");
for (int parent : parents) {
result.add("//doc/str[@name=\"parent_s\"]='" + parent(parent) + "'");
}
}
}
if (facetNumbers != null) {
for (int facetNumber : facetNumbers) {
result.add("//lst[@name='facet_counts']/lst[@name='facet_fields']/lst[@name='" + facets[facetNumber].getFieldNameForIndex() + "']");
RandomFacetValue[] facetValues = facets[facetNumber].facetValues;
for (RandomFacetValue facetValue : facetValues) {
int expectedFacetCount = facetValue.getFacetCount(parents, children);
if (expectedFacetCount > 0) {
result.add("//lst[@name='facet_counts']/lst[@name='facet_fields']/lst[@name='" +
facets[facetNumber].getFieldNameForIndex() + "']/int[@name='" +
facetValue.facetValue + "' and text()='" + expectedFacetCount + "']");
}
}
}
}
return result.toArray(new String[result.size()]);
}
private static Facet[] createFacets() {
int[] facetsToCreate = getRandomArray(facetFields.length);
Facet[] facets = new Facet[facetsToCreate.length];
int i = 0;
for (int facetNumber : facetsToCreate) {
facets[i++] = new Facet(facetFields[facetNumber]);
}
return facets;
}
private static int[] getRandomArray(int maxNumber) {
int[] buffer = new int[maxNumber];
int count = 0;
for (int i = 0; i < maxNumber; i++) {
if (random().nextBoolean()) {
buffer[count++] = i;
}
}
int[] result = new int[count];
System.arraycopy(buffer, 0, result, 0, count);
return result;
}
private static class Facet {
private String fieldName;
private boolean multiValued = true;
FieldType fieldType;
RandomFacetValue[] facetValues;
Facet(String fieldName) {
this.fieldName = fieldName;
fieldType = FieldType.values()[random().nextInt(FieldType.values().length)];
if ( FieldType.String.equals(fieldType)) {
// sortedDocValues are supported for string fields only
multiValued = random().nextBoolean();
}
fieldType = FieldType.String;
facetValues = new RandomFacetValue[NUMBER_OF_VALUES];
for (int i = 0; i < NUMBER_OF_VALUES; i++) {
String value = createRandomValue(i);
facetValues[i] = new RandomFacetValue(value);
}
if (!multiValued) {
makeValuesSingle();
}
}
private String createRandomValue(int i) {
switch( fieldType ) {
case String:
return fieldName.substring(0, 2) + "_" + i;
case Float:
return createFloatValue(i);
default:
return String.valueOf(i);
}
}
String getFieldNameForIndex() {
String multiValuedPostfix = multiValued ? "_multi" : "_single";
return fieldName + fieldType.fieldPostfix + multiValuedPostfix;
}
private void makeValuesSingle() {
for ( int i = 0; i < NUMBER_OF_PARENTS; i++) {
List<Integer> values = getValuesForParent(i);
if ( values.size() > 0) {
int singleValueOrd = values.get(random().nextInt(values.size()));
setSingleValueForParent(i,singleValueOrd);
}
for ( int j=0; j < NUMBER_OF_CHILDREN; j++) {
values = getValuesForChild(i,j);
if ( values.size() > 0 ) {
int singleValueOrd = values.get(random().nextInt(values.size()));
setSingleValueForChild(i, j, singleValueOrd);
}
}
}
}
private List<Integer> getValuesForParent(int parentNumber) {
List<Integer> result = new ArrayList<>();
for (int i = 0; i<NUMBER_OF_VALUES; i++) {
if (facetValues[i].postings[parentNumber].parentHasOwnValue) {
result.add(i);
}
}
return result;
}
private void setSingleValueForParent(int parentNumber, int valueOrd) {
for (int i = 0; i<NUMBER_OF_VALUES; i++) {
facetValues[i].postings[parentNumber].parentHasOwnValue = (i == valueOrd);
}
}
boolean additionalValueIsAllowedForParent(int parentNumber) {
return multiValued || getValuesForParent(parentNumber).size() == 0;
}
private List<Integer> getValuesForChild(int parentNumber, int childNumber) {
List<Integer> result = new ArrayList<>();
for (int i = 0; i<NUMBER_OF_VALUES; i++) {
if (facetValues[i].postings[parentNumber].childrenHaveValue[childNumber]) {
result.add(i);
}
}
return result;
}
private void setSingleValueForChild(int parentNumber, int childNumber, int valueOrd) {
for (int i = 0; i<NUMBER_OF_VALUES; i++) {
facetValues[i].postings[parentNumber].childrenHaveValue[childNumber] = (i == valueOrd);
}
}
boolean additionalValueIsAllowedForChild(int parentNumber, int childNumber) {
return multiValued || getValuesForChild(parentNumber,childNumber).size() == 0;
}
}
private static class RandomFacetValue {
final String facetValue;
// rootDoc, level, docsOnLevel
RandomParentPosting[] postings;
public RandomFacetValue(String facetValue) {
this.facetValue = facetValue;
postings = new RandomParentPosting[NUMBER_OF_PARENTS];
for (int i = 0; i < NUMBER_OF_PARENTS; i++) {
postings[i] = new RandomParentPosting(random().nextBoolean());
}
}
int getFacetCount(int[] parentNumbers, int[] childNumbers) {
int result = 0;
if (parentNumbers != null) {
for (int parentNumber : parentNumbers) {
if (postings[parentNumber].isMatched(childNumbers)) {
result++;
}
}
} else {
for (int i = 0; i < NUMBER_OF_PARENTS; i++) {
if (postings[i].isMatched(childNumbers)) {
result++;
}
}
}
return result;
}
}
private enum FieldType {
Integer("_i"),
Float("_f"),
String("_s");
private final String fieldPostfix;
FieldType(String fieldPostfix) {
this.fieldPostfix = fieldPostfix;
}
}
private static class RandomParentPosting {
boolean parentHasOwnValue;
boolean[] childrenHaveValue;
RandomParentPosting(boolean expected) {
childrenHaveValue = new boolean[NUMBER_OF_CHILDREN];
if (expected) {
// don't count parents
parentHasOwnValue = false;// random().nextBoolean();
if (random().nextBoolean()) {
for (int i = 0; i < NUMBER_OF_CHILDREN; i++) {
childrenHaveValue[i] = random().nextBoolean();
}
}
}
}
boolean isMatched(int[] childNumbers) {
boolean result = parentHasOwnValue && (childNumbers == null || childNumbers.length > 0);
if (!result) {
if (childNumbers == null) {
for (boolean childHasValue : childrenHaveValue) {
result = childHasValue;
if (result) {
break;
}
}
} else {
for (int child : childNumbers) {
result = childrenHaveValue[child];
if (result) {
break;
}
}
}
}
return result;
}
}
}

View File

@ -0,0 +1,98 @@
package org.apache.solr.search.join;
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.List;
import org.apache.solr.SolrTestCaseJ4;
import org.junit.BeforeClass;
import org.junit.Test;
public class BlockJoinFacetSimpleTest extends SolrTestCaseJ4 {
private static String handler;
@BeforeClass
public static void beforeClass() throws Exception {
initCore("solrconfig-blockjoinfacetcomponent.xml", "schema-blockjoinfacetcomponent.xml");
handler = random().nextBoolean() ? "blockJoinDocSetFacetRH":"blockJoinFacetRH";
createIndex();
}
public static void createIndex() throws Exception {
final String match;
List<String> docs = Arrays.asList(
// match
match = adoc("id", "10","type_s", "parent","BRAND_s", "Nike").replace("</doc>",
""+
doc("id", "11","type_s", "child","COLOR_s", "Red","SIZE_s", "XL")+// matches child filter
doc("id", "12","type_s", "child","COLOR_s", "Red","SIZE_s", "XL")+// matches child filter
doc("id", "13","type_s", "child","COLOR_s", "Blue","SIZE_s", "XL")+"</doc>"),
// mismatch
adoc("id", "100","type_s", "parent","BRAND_s", "Reebok").replace("</doc>",
""+doc("id", "101","type_s", "child","COLOR_s", "Red","SIZE_s", "M")+
doc("id", "102","type_s", "child","COLOR_s", "Blue","SIZE_s", "XL")+
doc("id", "104","type_s", "child","COLOR_s", "While","SIZE_s", "XL")+
doc("id", "105","type_s", "child","COLOR_s", "Green","SIZE_s", "XXXL")+
"</doc>"));
Collections.shuffle(docs, random());
for(String d : docs){
assertU(d);
}
if(random().nextBoolean()){// let's have a deleted doc
if(random().nextBoolean()){
assertU("let's have two segs",commit());
}
assertU("overriding matching doc",match);
}
assertU(commit());
assertQ(req("q", "*:*"), "//*[@numFound='" + 9 + "']");
}
@Test
public void testSimple() throws Exception {
//query
// parents
assertQ(req("q", "type_s:parent"), "//*[@numFound='" + 2 + "']");
String alt[][] ={ {"q", "{!parent which=\"type_s:parent\"}+COLOR_s:Red +SIZE_s:XL"},
{"q", "+{!parent which=\"type_s:parent\"}+COLOR_s:Red +BRAND_s:Nike"},
{"q", "{!parent which=\"type_s:parent\"}+COLOR_s:Red", "fq", "BRAND_s:Nike"}};
for(String param[] : alt){
final List<String> reqParams = new ArrayList<>(Arrays.asList(param));
reqParams.addAll(Arrays.asList("qt",handler,
"facet", (random().nextBoolean() ? "true":"false"),// it's indifferent to
"child.facet.field", "COLOR_s",
"child.facet.field", "SIZE_s"));
assertQ(req(reqParams.toArray(new String[0])),
"//*[@numFound='" + 1 + "']",
"//lst[@name='COLOR_s']/int[@name='Red'][.='1']",
// "//lst[@name='COLOR_s']/int[@name='Blue'][.='1']",
"count(//lst[@name='COLOR_s']/int)=1",
"//lst[@name='SIZE_s']/int[@name='XL'][.='1']",
"count(//lst[@name='SIZE_s']/int)=1");
}
}
}