diff --git a/lucene/join/src/java/org/apache/lucene/search/join/ToParentBlockJoinQuery.java b/lucene/join/src/java/org/apache/lucene/search/join/ToParentBlockJoinQuery.java index 22ff78aa716..b6f41b25751 100644 --- a/lucene/join/src/java/org/apache/lucene/search/join/ToParentBlockJoinQuery.java +++ b/lucene/join/src/java/org/apache/lucene/search/join/ToParentBlockJoinQuery.java @@ -190,8 +190,40 @@ public class ToParentBlockJoinQuery extends Query { return Explanation.noMatch("Not a match"); } } - - static class BlockJoinScorer extends Scorer { + + /** + * Ascendant for {@link ToParentBlockJoinQuery}'s scorer. + * @lucene.experimental it might be removed at 6.0 + * */ + public static abstract class ChildrenMatchesScorer extends Scorer{ + + /** inherited constructor */ + protected ChildrenMatchesScorer(Weight weight) { + super(weight); + } + + /** + * enables children matches recording + * */ + public abstract void trackPendingChildHits() ; + + /** + * reports matched children + * @return number of recorded matched children docs + * */ + public abstract int getChildCount() ; + + /** + * reports matched children + * @param other array for recording matching children docs of next parent, + * it might be null (that's slower) or the same array which was returned + * from the previous call + * @return array with {@link #getChildCount()} matched children docnums + * */ + public abstract int[] swapChildDocs(int[] other); + } + + static class BlockJoinScorer extends ChildrenMatchesScorer{ private final Scorer childScorer; private final BitSet parentBits; private final ScoreMode scoreMode; @@ -218,7 +250,8 @@ public class ToParentBlockJoinQuery extends Query { return Collections.singleton(new ChildScorer(childScorer, "BLOCK_JOIN")); } - int getChildCount() { + @Override + public int getChildCount() { return childDocUpto; } @@ -226,7 +259,8 @@ public class ToParentBlockJoinQuery extends Query { return parentDoc; } - int[] swapChildDocs(int[] other) { + @Override + public int[] swapChildDocs(int[] other) { final int[] ret = pendingChildDocs; if (other == null) { pendingChildDocs = new int[5]; @@ -413,6 +447,7 @@ public class ToParentBlockJoinQuery extends Query { /** * Instructs this scorer to keep track of the child docIds and score ids for retrieval purposes. */ + @Override public void trackPendingChildHits() { pendingChildDocs = new int[5]; if (scoreMode != ScoreMode.None) { diff --git a/solr/CHANGES.txt b/solr/CHANGES.txt index 1b74c036c91..f540292d9f8 100644 --- a/solr/CHANGES.txt +++ b/solr/CHANGES.txt @@ -242,6 +242,9 @@ New Features * SOLR-8428: RuleBasedAuthorizationPlugin adds an 'all' permission (noble) +* SOLR-5743: BlockJoinFacetComponent and BlockJoinDocSetFacetComponent for calculating facets by + child.facet.field parameter with {!parent ..}.. query. They count facets on children documents + aggregating (deduplicating) counts by parent documents (Dr. Oleg Savrasov via Mikhail Khludnev) Bug Fixes ---------------------- diff --git a/solr/core/src/java/org/apache/solr/search/SolrIndexSearcher.java b/solr/core/src/java/org/apache/solr/search/SolrIndexSearcher.java index ca5e6953745..2ff4572d23d 100644 --- a/solr/core/src/java/org/apache/solr/search/SolrIndexSearcher.java +++ b/solr/core/src/java/org/apache/solr/search/SolrIndexSearcher.java @@ -1375,7 +1375,7 @@ public class SolrIndexSearcher extends IndexSearcher implements Closeable,SolrIn return qr.getDocList(); } - static final int NO_CHECK_QCACHE = 0x80000000; + public static final int NO_CHECK_QCACHE = 0x80000000; public static final int GET_DOCSET = 0x40000000; static final int NO_CHECK_FILTERCACHE = 0x20000000; static final int NO_SET_QCACHE = 0x10000000; diff --git a/solr/core/src/java/org/apache/solr/search/join/BlockJoinDocSetFacetComponent.java b/solr/core/src/java/org/apache/solr/search/join/BlockJoinDocSetFacetComponent.java new file mode 100644 index 00000000000..dfb62b8c33b --- /dev/null +++ b/solr/core/src/java/org/apache/solr/search/join/BlockJoinDocSetFacetComponent.java @@ -0,0 +1,220 @@ +package org.apache.solr.search.join; + +import java.io.IOException; +import java.util.List; + +import org.apache.lucene.index.LeafReaderContext; +import org.apache.lucene.search.BooleanClause; +import org.apache.lucene.search.BooleanQuery; +import org.apache.lucene.search.Collector; +import org.apache.lucene.search.DocIdSet; +import org.apache.lucene.search.DocIdSetIterator; +import org.apache.lucene.search.LeafCollector; +import org.apache.lucene.search.MatchAllDocsQuery; +import org.apache.lucene.search.Query; +import org.apache.lucene.search.join.ToParentBlockJoinQuery; +import org.apache.solr.common.SolrException; +import org.apache.solr.handler.component.ResponseBuilder; +import org.apache.solr.request.SolrQueryRequest; +import org.apache.solr.search.BitDocSet; +import org.apache.solr.search.DocSet; +import org.apache.solr.search.Filter; +import org.apache.solr.search.QueryContext; +import org.apache.solr.search.facet.BlockJoin; +import org.apache.solr.search.join.BlockJoinFieldFacetAccumulator.AggregatableDocIter; + +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/** + * It does the same as BlockJoinFacetComponent, but operates on docsets, + * it should be faster for static mostly indexes. This component doesn't impact + * query result caching, but hits filter cache to retrieve docsets. + * */ +public class BlockJoinDocSetFacetComponent extends BlockJoinFacetComponent { + + private final String bjqKey = this.getClass().getSimpleName()+".bjq"; + + private static final class SegmentChildren implements AggregatableDocIter { + + private final BitDocSet allParentsBitsDocSet; + private int nextDoc = DocIdSetIterator.NO_MORE_DOCS; + private DocIdSetIterator disi; + private int currentParent=-1; + final LeafReaderContext segment; + final DocIdSet childrenMatches; + + private SegmentChildren(LeafReaderContext subCtx, DocIdSet dis, BitDocSet allParentsBitsDocSet) { + this.allParentsBitsDocSet = allParentsBitsDocSet; + this.childrenMatches = dis; + this.segment = subCtx; + reset(); + } + + @Override + public Integer next() { + return nextDoc(); + } + + @Override + public boolean hasNext() { + return nextDoc != DocIdSetIterator.NO_MORE_DOCS; + } + + @Override + public float score() { + return 0; + } + + @Override + public int nextDoc() { + int lastDoc = nextDoc; + assert nextDoc != DocIdSetIterator.NO_MORE_DOCS; + if (lastDoc>currentParent) { // we passed the previous block, and need to reevaluate a parent + currentParent = allParentsBitsDocSet.getBits().nextSetBit(lastDoc+segment.docBase)-segment.docBase; + } + try { + nextDoc = disi.nextDoc(); + } catch (IOException e) { + throw new RuntimeException(e); + } + return lastDoc; + } + + @Override + public void reset() { + currentParent=-1; + try { + disi = childrenMatches.iterator(); + if (disi != null) { + nextDoc = disi.nextDoc(); + }else{ + nextDoc = DocIdSetIterator.NO_MORE_DOCS; + } + } catch (IOException e) { + throw new RuntimeException(e); + } + } + + @Override + public int getAggKey() { + return currentParent; + } + } + + private static final class NoDelegateFacetCollector extends BlockJoinFacetCollector { + { + setDelegate(new Collector() { + + @Override + public boolean needsScores() { + return false; + } + + @Override + public LeafCollector getLeafCollector(LeafReaderContext context) throws IOException { + return null; + } + }); + } + + private NoDelegateFacetCollector(SolrQueryRequest req) throws IOException { + super(req); + } + } + + public BlockJoinDocSetFacetComponent() {} + + @Override + public void prepare(ResponseBuilder rb) throws IOException { + + if (getChildFacetFields(rb.req) != null) { + validateQuery(rb.getQuery()); + rb.setNeedDocSet(true); + rb.req.getContext().put(bjqKey, extractChildQuery(rb.getQuery())); + } + } + + private ToParentBlockJoinQuery extractChildQuery(Query query) { + if (!(query instanceof ToParentBlockJoinQuery)) { + if (query instanceof BooleanQuery) { + List clauses = ((BooleanQuery) query).clauses(); + ToParentBlockJoinQuery once = null; + for (BooleanClause clause : clauses) { + if (clause.getQuery() instanceof ToParentBlockJoinQuery) { + if (once==null) { + once = (ToParentBlockJoinQuery) clause.getQuery(); + } else { + throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "can't choose between " + + once + " and " + clause.getQuery()); + } + } + } + if (once!=null) { + return once; + } + } + throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, NO_TO_PARENT_BJQ_MESSAGE); + } + else{ + return (ToParentBlockJoinQuery) query; + } + } + + @Override + public void process(ResponseBuilder rb) throws IOException { + final BlockJoinParentQParser.AllParentsAware bjq = + (BlockJoinParentQParser.AllParentsAware) rb.req.getContext().get(bjqKey); + if(bjq!=null){ + final DocSet parentResult = rb.getResults().docSet; + final BitDocSet allParentsBitsDocSet = rb.req.getSearcher().getDocSetBits(bjq.getParentQuery()); + final DocSet allChildren = BlockJoin.toChildren(parentResult, + allParentsBitsDocSet, + rb.req.getSearcher().getDocSetBits( new MatchAllDocsQuery() ), + QueryContext.newContext(rb.req.getSearcher())); + + final DocSet childQueryDocSet = rb.req.getSearcher().getDocSet(bjq.getChildQuery()); + final DocSet selectedChildren = allChildren.intersection(childQueryDocSet); + + // don't include parent into facet counts + //childResult = childResult.union(parentResult);// just to mimic the current logic + + final List leaves = rb.req.getSearcher().getIndexReader().leaves(); + + Filter filter = selectedChildren.getTopFilter(); + + final BlockJoinFacetCollector facetCounter = new NoDelegateFacetCollector(rb.req); + + for (int subIdx = 0; subIdx < leaves.size(); subIdx++) { + LeafReaderContext subCtx = leaves.get(subIdx); + DocIdSet dis = filter.getDocIdSet(subCtx, null); // solr docsets already exclude any deleted docs + + AggregatableDocIter iter = new SegmentChildren(subCtx, dis, allParentsBitsDocSet); + + if (iter.hasNext()){ + facetCounter.doSetNextReader(subCtx); + facetCounter.countFacets(iter); + } + } + facetCounter.finish(); + + rb.req.getContext().put(COLLECTOR_CONTEXT_PARAM,facetCounter); + super.process(rb); + } + + } +} diff --git a/solr/core/src/java/org/apache/solr/search/join/BlockJoinFacetCollector.java b/solr/core/src/java/org/apache/solr/search/join/BlockJoinFacetCollector.java new file mode 100644 index 00000000000..4ed2b83d46a --- /dev/null +++ b/solr/core/src/java/org/apache/solr/search/join/BlockJoinFacetCollector.java @@ -0,0 +1,132 @@ +package org.apache.solr.search.join; + +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.io.IOException; +import java.util.LinkedList; +import java.util.Queue; + +import org.apache.lucene.index.LeafReaderContext; +import org.apache.lucene.search.Scorer; +import org.apache.lucene.search.join.ToParentBlockJoinQuery.ChildrenMatchesScorer; +import org.apache.solr.common.util.NamedList; +import org.apache.solr.request.SolrQueryRequest; +import org.apache.solr.search.DelegatingCollector; +import org.apache.solr.search.join.BlockJoinFieldFacetAccumulator.AggregatableDocIter; +import org.apache.solr.search.join.BlockJoinFieldFacetAccumulator.SortedIntsAggDocIterator; + +/** + * For each collected parent document creates matched block, which is a docSet with matched children and parent doc + * itself. Then updates each BlockJoinFieldFacetAccumulator with the created matched block. + */ +class BlockJoinFacetCollector extends DelegatingCollector { + private BlockJoinFieldFacetAccumulator[] blockJoinFieldFacetAccumulators; + private boolean firstSegment = true; + private ChildrenMatchesScorer blockJoinScorer; + private int[] childDocs = new int[0]; + + BlockJoinFacetCollector(SolrQueryRequest req) throws IOException { + String[] facetFieldNames = BlockJoinFacetComponent.getChildFacetFields(req); + assert facetFieldNames != null; + blockJoinFieldFacetAccumulators = new BlockJoinFieldFacetAccumulator[facetFieldNames.length]; + for (int i = 0; i < facetFieldNames.length; i++) { + blockJoinFieldFacetAccumulators[i] = new BlockJoinFieldFacetAccumulator(facetFieldNames[i], req.getSearcher()); + } + } + + @Override + public void setScorer(Scorer scorer) throws IOException { + super.setScorer(scorer); + blockJoinScorer = getToParentScorer(scorer, new LinkedList()); + if (blockJoinScorer != null) { + // instruct scorer to keep track of the child docIds for retrieval purposes. + blockJoinScorer.trackPendingChildHits(); + } + } + + private ChildrenMatchesScorer getToParentScorer(Scorer scorer, Queue queue) { + if (scorer == null || scorer instanceof ChildrenMatchesScorer) { + return (ChildrenMatchesScorer) scorer; + } else { + for (Scorer.ChildScorer child : scorer.getChildren()) { + queue.add(child.child); + } + return getToParentScorer(queue.poll(), queue); + } + } + + @Override + protected void doSetNextReader(LeafReaderContext context) throws IOException { + for (BlockJoinFieldFacetAccumulator blockJoinFieldFacetAccumulator : blockJoinFieldFacetAccumulators) { + if(!firstSegment){ + blockJoinFieldFacetAccumulator.migrateGlobal(); + } + blockJoinFieldFacetAccumulator.setNextReader(context); + } + firstSegment = false; + super.doSetNextReader(context); + } + + @Override + public void collect(int doc) throws IOException { + incrementFacets(doc); + super.collect(doc); + } + + @Override + public void finish() throws IOException { + for (BlockJoinFieldFacetAccumulator blockJoinFieldFacetAccumulator : blockJoinFieldFacetAccumulators) { + blockJoinFieldFacetAccumulator.migrateGlobal(); + } + super.finish(); + } + + protected void incrementFacets(int parent) throws IOException { + final int[] docNums = blockJoinScorer.swapChildDocs(childDocs); + // now we don't + //includeParentDoc(parent); + //final int childCountPlusParent = childTracking.getChildCount()+1; + final int childCountNoParent = blockJoinScorer.getChildCount(); + final SortedIntsAggDocIterator iter = new SortedIntsAggDocIterator(docNums, childCountNoParent, parent); + countFacets(iter); + } + + /** is not used + protected int[] includeParentDoc(int parent) { + final int[] docNums = ArrayUtil.grow(childTracking.getChildDocs(), childTracking.getChildCount()+1); + childTracking.setChildDocs(docNums); // we include parent into block, I'm not sure whether it makes sense + docNums[childTracking.getChildCount()]=parent; + return docNums; + }*/ + + protected void countFacets(final AggregatableDocIter iter) throws IOException { + for (BlockJoinFieldFacetAccumulator blockJoinFieldFacetAccumulator : blockJoinFieldFacetAccumulators) { + blockJoinFieldFacetAccumulator.updateCountsWithMatchedBlock( iter); + } + } + + NamedList getFacets() { + NamedList> facets = new NamedList<>(blockJoinFieldFacetAccumulators.length); + for (BlockJoinFieldFacetAccumulator state : blockJoinFieldFacetAccumulators) { + facets.add(state.getFieldName(), state.getFacetValue()); + } + return facets; + } + + +} diff --git a/solr/core/src/java/org/apache/solr/search/join/BlockJoinFacetComponent.java b/solr/core/src/java/org/apache/solr/search/join/BlockJoinFacetComponent.java new file mode 100644 index 00000000000..70d08e75951 --- /dev/null +++ b/solr/core/src/java/org/apache/solr/search/join/BlockJoinFacetComponent.java @@ -0,0 +1,183 @@ +package org.apache.solr.search.join; + +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.io.IOException; +import java.util.LinkedList; +import java.util.List; +import java.util.Map; + +import org.apache.lucene.search.BooleanClause; +import org.apache.lucene.search.BooleanQuery; +import org.apache.lucene.search.Query; +import org.apache.lucene.search.join.ToParentBlockJoinQuery; +import org.apache.solr.common.SolrException; +import org.apache.solr.common.params.ShardParams; +import org.apache.solr.common.util.NamedList; +import org.apache.solr.handler.component.ResponseBuilder; +import org.apache.solr.handler.component.SearchComponent; +import org.apache.solr.handler.component.ShardRequest; +import org.apache.solr.handler.component.ShardResponse; +import org.apache.solr.request.SolrQueryRequest; +import org.apache.solr.search.DelegatingCollector; +import org.apache.solr.search.SolrIndexSearcher; + + +/** + * Calculates facets on children documents and aggregates hits by parent documents. + * Enables when child.facet.field parameter specifies a field name for faceting. + * So far it supports string fields only. It requires to search by {@link ToParentBlockJoinQuery}. + * It disables query result cache but only when it's ebaled for request by child.facet.field parameter + * */ +public class BlockJoinFacetComponent extends SearchComponent { + public static final String CHILD_FACET_FIELD_PARAMETER = "child.facet.field"; + public static final String NO_TO_PARENT_BJQ_MESSAGE = "Block join faceting is allowed with ToParentBlockJoinQuery only"; + public static final String COLLECTOR_CONTEXT_PARAM = "blockJoinFacetCollector"; + + @Override + public void prepare(ResponseBuilder rb) throws IOException { + + if (getChildFacetFields(rb.req) != null) { + validateQuery(rb.getQuery()); + // we count facets only when searching + rb.setFieldFlags(rb.getFieldFlags() | SolrIndexSearcher.NO_CHECK_QCACHE); + if (rb.getFilters() == null) { + rb.setFilters(new LinkedList()); + } + DelegatingCollector blockJoinFacetCollector = new BlockJoinFacetCollector(rb.req); + rb.req.getContext().put(COLLECTOR_CONTEXT_PARAM, blockJoinFacetCollector); + rb.getFilters().add(new BlockJoinFacetFilter(blockJoinFacetCollector)); + } + } + + protected void validateQuery(Query query) { + if (!(query instanceof ToParentBlockJoinQuery)) { + if (query instanceof BooleanQuery) { + List clauses = ((BooleanQuery) query).clauses(); + for (BooleanClause clause : clauses) { + if (clause.getQuery() instanceof ToParentBlockJoinQuery) { + return; + } + } + } + throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, NO_TO_PARENT_BJQ_MESSAGE); + } + } + + static String[] getChildFacetFields(SolrQueryRequest req) { + return req.getParams().getParams(CHILD_FACET_FIELD_PARAMETER); + } + + @Override + public void process(ResponseBuilder rb) throws IOException { + if (getChildFacetFields(rb.req) != null) { + BlockJoinFacetCollector blockJoinFacetCollector = (BlockJoinFacetCollector) rb.req.getContext().get(COLLECTOR_CONTEXT_PARAM); + assert blockJoinFacetCollector != null; + NamedList output; + if (isShard(rb)) { + // distributed search, put results into own cell in order not to clash with facet component + output = getChildFacetFields(rb.rsp.getValues(), true); + } else { + // normal process, put results into standard response + output = getFacetFieldsList(rb); + } + mergeFacets(output, blockJoinFacetCollector.getFacets()); + } + } + + private boolean isShard(ResponseBuilder rb) { + return "true".equals(rb.req.getParams().get(ShardParams.IS_SHARD)); + } + + private NamedList getChildFacetFields(NamedList responseValues, boolean createIfAbsent) { + return getNamedListFromList(responseValues, "child_facet_fields", createIfAbsent); + } + + private void mergeFacets(NamedList childFacetFields, NamedList shardFacets) { + if (shardFacets != null) { + for (Map.Entry> nextShardFacet : (Iterable>>) shardFacets) { + String fieldName = nextShardFacet.getKey(); + NamedList collectedFacet = (NamedList) childFacetFields.get(fieldName); + NamedList shardFacet = nextShardFacet.getValue(); + if (collectedFacet == null) { + childFacetFields.add(fieldName, shardFacet); + } else { + mergeFacetValues(collectedFacet, shardFacet); + } + } + } + } + + private void mergeFacetValues(NamedList collectedFacetValue, NamedList shardFacetValue) { + for (Map.Entry nextShardValue : shardFacetValue) { + String facetValue = nextShardValue.getKey(); + Integer shardCount = nextShardValue.getValue(); + int indexOfCollectedValue = collectedFacetValue.indexOf(facetValue, 0); + if (indexOfCollectedValue == -1) { + collectedFacetValue.add(facetValue, shardCount); + } else { + int newCount = collectedFacetValue.getVal(indexOfCollectedValue) + shardCount; + collectedFacetValue.setVal(indexOfCollectedValue, newCount); + } + } + } + + private NamedList getNamedListFromList(NamedList parentList, String name, boolean createIfAbsent) { + NamedList result = null; + if (parentList != null) { + result = (NamedList) parentList.get(name); + if (result == null && createIfAbsent) { + result = new NamedList(); + parentList.add(name, result); + } + } + return result; + } + + @Override + public void handleResponses(ResponseBuilder rb, ShardRequest sreq) { + NamedList collectedChildFacetFields = getChildFacetFields(rb.rsp.getValues(), true); + List responses = sreq.responses; + for (ShardResponse shardResponse : responses) { + NamedList shardChildFacetFields = getChildFacetFields(shardResponse.getSolrResponse().getResponse(), false); + mergeFacets(collectedChildFacetFields, shardChildFacetFields); + } + } + + @Override + public void finishStage(ResponseBuilder rb) { + if (rb.stage != ResponseBuilder.STAGE_GET_FIELDS) return; + NamedList childFacetFields = getChildFacetFields(rb.rsp.getValues(), true); + NamedList facetFields = getFacetFieldsList(rb); + for (Map.Entry childFacetField : (Iterable>) childFacetFields) { + facetFields.add(childFacetField.getKey(), childFacetField.getValue()); + } + rb.rsp.getValues().remove("child_facet_fields"); + } + + private NamedList getFacetFieldsList(ResponseBuilder rb) { + NamedList facetCounts = getNamedListFromList(rb.rsp.getValues(), "facet_counts", true); + return getNamedListFromList(facetCounts, "facet_fields", true); + } + + + @Override + public String getDescription() { + return "BlockJoin facet component"; + } +} diff --git a/solr/core/src/java/org/apache/solr/search/join/BlockJoinFacetFilter.java b/solr/core/src/java/org/apache/solr/search/join/BlockJoinFacetFilter.java new file mode 100644 index 00000000000..a286c96385c --- /dev/null +++ b/solr/core/src/java/org/apache/solr/search/join/BlockJoinFacetFilter.java @@ -0,0 +1,74 @@ +package org.apache.solr.search.join; + +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import org.apache.lucene.search.IndexSearcher; +import org.apache.lucene.search.Query; +import org.apache.solr.search.DelegatingCollector; +import org.apache.solr.search.PostFilter; + +class BlockJoinFacetFilter extends Query implements PostFilter { + + public static final int COST = 120; + private DelegatingCollector blockJoinFacetCollector; + + public BlockJoinFacetFilter(DelegatingCollector blockJoinFacetCollector) { + super(); + this.blockJoinFacetCollector = blockJoinFacetCollector; + } + + @Override + public String toString(String field) { + return null; + } + + @Override + public DelegatingCollector getFilterCollector(IndexSearcher searcher) { + return blockJoinFacetCollector; + } + + @Override + public boolean getCache() { + return false; + } + + @Override + public void setCache(boolean cache) { + + } + + @Override + public int getCost() { + return COST; + } + + @Override + public void setCost(int cost) { + + } + + @Override + public boolean getCacheSep() { + return false; + } + + @Override + public void setCacheSep(boolean cacheSep) { + + } +} diff --git a/solr/core/src/java/org/apache/solr/search/join/BlockJoinFieldFacetAccumulator.java b/solr/core/src/java/org/apache/solr/search/join/BlockJoinFieldFacetAccumulator.java new file mode 100644 index 00000000000..02712dbfee7 --- /dev/null +++ b/solr/core/src/java/org/apache/solr/search/join/BlockJoinFieldFacetAccumulator.java @@ -0,0 +1,221 @@ +package org.apache.solr.search.join; + +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.io.IOException; +import java.util.Arrays; + +import org.apache.lucene.index.DocValues; +import org.apache.lucene.index.LeafReaderContext; +import org.apache.lucene.index.MultiDocValues; +import org.apache.lucene.index.MultiDocValues.OrdinalMap; +import org.apache.lucene.index.SortedDocValues; +import org.apache.lucene.index.SortedSetDocValues; +import org.apache.lucene.util.ArrayUtil; +import org.apache.lucene.util.BytesRef; +import org.apache.lucene.util.CharsRefBuilder; +import org.apache.lucene.util.LongValues; +import org.apache.solr.common.util.NamedList; +import org.apache.solr.request.DocValuesFacets; +import org.apache.solr.schema.FieldType; +import org.apache.solr.schema.SchemaField; +import org.apache.solr.search.DocIterator; +import org.apache.solr.search.SolrIndexSearcher; + +/** + * This class is responsible for collecting block join facet counts for particular field + */ +class BlockJoinFieldFacetAccumulator { + private String fieldName; + private FieldType fieldType; + private int currentSegment = -1; + // for term lookups only + private SortedSetDocValues topSSDV; + private int[] globalCounts; + private SortedSetDocValues segmentSSDV; + // elems are : facet value counter<<32 | last parent doc num + private long[] segmentAccums = new long[0]; + // for mapping per-segment ords to global ones + private MultiDocValues.OrdinalMap ordinalMap; + private SchemaField schemaField; + private SortedDocValues segmentSDV; + + BlockJoinFieldFacetAccumulator(String fieldName, SolrIndexSearcher searcher) throws IOException { + this.fieldName = fieldName; + schemaField = searcher.getSchema().getField(fieldName); + fieldType = schemaField.getType(); + ordinalMap = null; + if (schemaField.multiValued()) { + topSSDV = searcher.getLeafReader().getSortedSetDocValues(fieldName); + if (topSSDV instanceof MultiDocValues.MultiSortedSetDocValues) { + ordinalMap = ((MultiDocValues.MultiSortedSetDocValues) topSSDV).mapping; + } + } else { + SortedDocValues single = searcher.getLeafReader().getSortedDocValues(fieldName); + topSSDV = single == null ? null : DocValues.singleton(single);// npe friendly code + if (single instanceof MultiDocValues.MultiSortedDocValues) { + ordinalMap = ((MultiDocValues.MultiSortedDocValues) single).mapping; + } + } + } + + private boolean initSegmentData(String fieldName, LeafReaderContext leaf) throws IOException { + segmentSSDV = DocValues.getSortedSet(leaf.reader(), fieldName); + segmentAccums = ArrayUtil.grow(segmentAccums, (int)segmentSSDV.getValueCount()+1);//+1 + // zero counts, -1 parent + Arrays.fill(segmentAccums,0,(int)segmentSSDV.getValueCount()+1, 0x00000000ffffffffL); + segmentSDV = DocValues.unwrapSingleton(segmentSSDV); + return segmentSSDV.getValueCount()!=0;// perhaps we need to count "missings"?? + } + + interface AggregatableDocIter extends DocIterator { + void reset(); + /** a key to aggregate the current document */ + int getAggKey(); + + } + static class SortedIntsAggDocIterator implements AggregatableDocIter { + private int[] childDocs; + private int childCount; + private int parentDoc; + private int pos=-1; + + public SortedIntsAggDocIterator(int[] childDocs, int childCount, int parentDoc) { + this.childDocs = childDocs; + this.childCount = childCount; + this.parentDoc = parentDoc; + } + + + @Override + public boolean hasNext() { + return pos=0 && (term = (int) segmentSSDV.nextOrd()) >= 0); + } + } + } + + String getFieldName() { + return fieldName; + } + + /** copy paste from {@link DocValuesFacets} */ + NamedList getFacetValue() { + NamedList facetValue = new NamedList<>(); + final CharsRefBuilder charsRef = new CharsRefBuilder(); // if there is no globs, take segment's ones + for (int i = 1; i< (globalCounts!=null ? globalCounts.length: segmentAccums.length); i++) { + int count = globalCounts!=null ? globalCounts[i] : (int)(segmentAccums [i]>>32); + if (count > 0) { + BytesRef term = topSSDV.lookupOrd(-1 + i); + fieldType.indexedToReadable(term, charsRef); + facetValue.add(charsRef.toString(), count); + } + } + return facetValue; + } + + // @todo we can track in max term nums to loop only changed range while migrating and labeling + private void accumulateTermOrd(int term, int parentDoc) { + long accum = segmentAccums[1+term]; + if(((int)(accum & 0xffffffffL))!=parentDoc) + {// incrementing older 32, reset smaller 32, set them to the new parent + segmentAccums[1+term] = ((accum +(0x1L<<32))&0xffffffffL<<32)|parentDoc; + } + } + + void setNextReader(LeafReaderContext context) throws IOException { + initSegmentData(fieldName, context); + currentSegment = context.ord; + } + + void migrateGlobal(){ + if (currentSegment<0 // no hits + || segmentAccums.length==0 + || ordinalMap==null) { // single segment + return; + } + + if(globalCounts==null){ + // it might be just a single segment + globalCounts = new int[(int) ordinalMap.getValueCount()+ /*[0] for missing*/1]; + }else{ + assert currentSegment>=0; + } + + migrateGlobal(globalCounts, segmentAccums, currentSegment, ordinalMap); + } + + /** folds counts in segment ordinal space (segCounts) into global ordinal space (counts) + * copy paste-from {@link DocValuesFacets#migrateGlobal(int[], int[], int, OrdinalMap)}*/ + void migrateGlobal(int counts[], long segCounts[], int subIndex, OrdinalMap map) { + + final LongValues ordMap = map.getGlobalOrds(subIndex); + // missing count + counts[0] += (int) (segCounts[0]>>32); + + // migrate actual ordinals + for (int ord = 1; ord <= segmentSSDV.getValueCount(); ord++) { + int count = (int) (segCounts[ord]>>32); + if (count != 0) { + counts[1+(int) ordMap.get(ord-1)] += count; + } + } + } +} diff --git a/solr/core/src/java/org/apache/solr/search/join/BlockJoinParentQParser.java b/solr/core/src/java/org/apache/solr/search/join/BlockJoinParentQParser.java index c93e48270a1..c78463749dd 100644 --- a/solr/core/src/java/org/apache/solr/search/join/BlockJoinParentQParser.java +++ b/solr/core/src/java/org/apache/solr/search/join/BlockJoinParentQParser.java @@ -39,7 +39,7 @@ import org.apache.solr.search.SolrCache; import org.apache.solr.search.SolrConstantScoreQuery; import org.apache.solr.search.SyntaxError; -class BlockJoinParentQParser extends QParser { +public class BlockJoinParentQParser extends QParser { /** implementation detail subject to change */ public String CACHE_NAME="perSegFilter"; @@ -70,9 +70,8 @@ class BlockJoinParentQParser extends QParser { return createQuery(parentQ, childrenQuery, scoreMode); } - protected Query createQuery(Query parentList, Query query, String scoreMode) throws SyntaxError { - return new ToParentBlockJoinQuery(query, getFilter(parentList).filter, - ScoreModeParser.parse(scoreMode)); + protected Query createQuery(final Query parentList, Query query, String scoreMode) throws SyntaxError { + return new AllParentsAware(query, getFilter(parentList).filter, ScoreModeParser.parse(scoreMode), parentList); } BitDocIdSetFilterWrapper getFilter(Query parentList) { @@ -98,6 +97,20 @@ class BlockJoinParentQParser extends QParser { return new QueryBitSetProducer(parentQ); } + static final class AllParentsAware extends ToParentBlockJoinQuery { + private final Query parentQuery; + + private AllParentsAware(Query childQuery, BitSetProducer parentsFilter, ScoreMode scoreMode, + Query parentList) { + super(childQuery, parentsFilter, scoreMode); + parentQuery = parentList; + } + + public Query getParentQuery(){ + return parentQuery; + } + } + // We need this wrapper since BitDocIdSetFilter does not extend Filter static class BitDocIdSetFilterWrapper extends Filter { diff --git a/solr/core/src/test-files/solr/collection1/conf/schema-blockjoinfacetcomponent.xml b/solr/core/src/test-files/solr/collection1/conf/schema-blockjoinfacetcomponent.xml new file mode 100644 index 00000000000..5c300125803 --- /dev/null +++ b/solr/core/src/test-files/solr/collection1/conf/schema-blockjoinfacetcomponent.xml @@ -0,0 +1,46 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + name + id + diff --git a/solr/core/src/test-files/solr/collection1/conf/solrconfig-blockjoinfacetcomponent.xml b/solr/core/src/test-files/solr/collection1/conf/solrconfig-blockjoinfacetcomponent.xml new file mode 100644 index 00000000000..b22269397ad --- /dev/null +++ b/solr/core/src/test-files/solr/collection1/conf/solrconfig-blockjoinfacetcomponent.xml @@ -0,0 +1,59 @@ + + + + + + ${tests.luceneMatchVersion:LATEST} + + + + ${solr.data.dir:} + + + + + + + + + ${solr.ulog.dir:} + + + + + + + + + blockJoinFacetRH + + + blockJoinFacet + + + + + + blockJoinDocSetFacetRH + + + blockJoinDocSetFacet + + + + diff --git a/solr/core/src/test/org/apache/solr/search/join/BlockJoinFacetDistribTest.java b/solr/core/src/test/org/apache/solr/search/join/BlockJoinFacetDistribTest.java new file mode 100644 index 00000000000..c74e7c04376 --- /dev/null +++ b/solr/core/src/test/org/apache/solr/search/join/BlockJoinFacetDistribTest.java @@ -0,0 +1,137 @@ +package org.apache.solr.search.join; + +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.util.Arrays; +import java.util.Collections; +import java.util.HashMap; +import java.util.HashSet; +import java.util.List; +import java.util.Map; +import java.util.Set; + +import org.apache.lucene.util.LuceneTestCase.Slow; +import org.apache.solr.BaseDistributedSearchTestCase; +import org.apache.solr.client.solrj.response.FacetField; +import org.apache.solr.client.solrj.response.FacetField.Count; +import org.apache.solr.client.solrj.response.QueryResponse; +import org.apache.solr.common.SolrInputDocument; +import org.apache.solr.common.util.NamedList; +import org.junit.BeforeClass; + +@Slow +public class BlockJoinFacetDistribTest extends BaseDistributedSearchTestCase { + + @BeforeClass + public static void beforeSuperClass() throws Exception { + schemaString = "schema-blockjoinfacetcomponent.xml"; + configString = "solrconfig-blockjoinfacetcomponent.xml"; + } + + @ShardsFixed(num = 3) + public void test() throws Exception { + testBJQFacetComponent(); + } + + final static List colors = Arrays.asList("red","blue","brown","white","black","yellow","cyan","magenta","blur", + "fuchsia", "light","dark","green","grey","don't","know","any","more" ); + final static List sizes = Arrays.asList("s","m","l","xl","xxl","xml","xxxl","3","4","5","6","petite","maxi"); + + private void testBJQFacetComponent() throws Exception { + + assert ! colors.removeAll(sizes): "there is no colors in sizes"; + Collections.shuffle(colors,random()); + List matchingColors = colors.subList(0, Math.min(atLeast(random(), 2), colors.size())); + + Map> parentIdsByAttrValue = new HashMap>(){ + @Override + public Set get(Object key) { + return super.get(key)==null && put((String)key, new HashSet<>())==null?super.get(key):super.get(key); + } + }; + + final int parents = atLeast(10); + boolean aggregationOccurs = false; + for(int parent=0; parent resultsResponse = results.getResponse(); + assertNotNull(resultsResponse); + FacetField color_s = results.getFacetField("COLOR_s"); + FacetField size_s = results.getFacetField("SIZE_s"); + + String msg = ""+parentIdsByAttrValue+" "+color_s+" "+size_s; + for (FacetField facet: new FacetField[]{color_s, size_s}) { + for (Count c : facet.getValues()) { + assertEquals(c.getName()+"("+msg+")", parentIdsByAttrValue.get(c.getName()).size(), c.getCount()); + } + } + + assertEquals(msg , parentIdsByAttrValue.size(),color_s.getValueCount() + size_s.getValueCount()); + // } + } + + protected String getCloudSolrConfig() { + return configString; + } +} diff --git a/solr/core/src/test/org/apache/solr/search/join/BlockJoinFacetRandomTest.java b/solr/core/src/test/org/apache/solr/search/join/BlockJoinFacetRandomTest.java new file mode 100644 index 00000000000..0ebbb9d9ea3 --- /dev/null +++ b/solr/core/src/test/org/apache/solr/search/join/BlockJoinFacetRandomTest.java @@ -0,0 +1,635 @@ +package org.apache.solr.search.join; + +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Collections; +import java.util.HashSet; +import java.util.LinkedList; +import java.util.List; +import java.util.Set; + +import org.apache.solr.SolrTestCaseJ4; +import org.apache.solr.common.SolrException; +import org.junit.AfterClass; +import org.junit.BeforeClass; +import org.junit.Test; + +public class BlockJoinFacetRandomTest extends SolrTestCaseJ4 { + private static String handler; + private static final int NUMBER_OF_PARENTS = 10; + private static final int NUMBER_OF_VALUES = 5; + private static final int NUMBER_OF_CHILDREN = 5; + private static final String[] facetFields = {"brand", "category", "color", "size", "type"}; + private static final String[] otherValues = {"x_", "y_", "z_"}; + public static final String PARENT_VALUE_PREFIX = "prn_"; + public static final String CHILD_VALUE_PREFIX = "chd_"; + + + private static Facet[] facets; + + @BeforeClass + public static void beforeClass() throws Exception { + initCore("solrconfig-blockjoinfacetcomponent.xml", "schema-blockjoinfacetcomponent.xml"); + handler = random().nextBoolean() ? "blockJoinDocSetFacetRH":"blockJoinFacetRH"; + facets = createFacets(); + createIndex(); + } + + public static void createIndex() throws Exception { + int i = 0; + List>> blocks = createBlocks(); + for (List> block : blocks) { + List updBlock = new ArrayList<>(); + for (List blockFields : block) { + blockFields.add("id"); + blockFields.add(Integer.toString(i)); + updBlock.add(doc(blockFields.toArray(new String[blockFields.size()]))); + i++; + } + //got xmls for every doc. now nest all into the last one + XmlDoc parentDoc = updBlock.get(updBlock.size() - 1); + parentDoc.xml = parentDoc.xml.replace("", + updBlock.subList(0, updBlock.size() - 1).toString().replaceAll("[\\[\\]]", "") + ""); + assertU(add(parentDoc)); + + if (random().nextBoolean()) { + assertU(commit()); + // force empty segment (actually, this will no longer create an empty segment, only a new segments_n) + if (random().nextBoolean()) { + assertU(commit()); + } + } + } + assertU(commit()); + assertQ(req("q", "*:*"), "//*[@numFound='" + i + "']"); + } + + private static List>> createBlocks() { + List>> blocks = new ArrayList<>(); + for (int i = 0; i < NUMBER_OF_PARENTS; i++) { + List> block = createChildrenBlock(i, facets); + List fieldsList = new LinkedList<>(); + fieldsList.add("parent_s"); + fieldsList.add(parent(i)); + for (Facet facet : facets) { + for (RandomFacetValue facetValue : facet.facetValues) { + RandomParentPosting posting = facetValue.postings[i]; + if (posting.parentHasOwnValue) { + fieldsList.add(facet.getFieldNameForIndex()); + fieldsList.add(facetValue.facetValue); + } else if (facet.multiValued && random().nextBoolean()) { + fieldsList.add(facet.getFieldNameForIndex()); + fieldsList.add(someOtherValue(facet.fieldType)); + } + } + if (facet.additionalValueIsAllowedForParent(i)&&random().nextBoolean()) { + fieldsList.add(facet.getFieldNameForIndex()); + fieldsList.add(someOtherValue(facet.fieldType)); + } + } + block.add(fieldsList); + blocks.add(block); + } + Collections.shuffle(blocks, random()); + return blocks; + } + + private static List> createChildrenBlock(int parentIndex, Facet[] facets) { + List> block = new ArrayList<>(); + for (int i = 0; i < NUMBER_OF_CHILDREN; i++) { + List fieldsList = new LinkedList<>(); + + fieldsList.add("child_s"); + fieldsList.add(child(i)); + fieldsList.add("parentchild_s"); + fieldsList.add(parentChild(parentIndex, i)); + for (Facet facet : facets) { + for (RandomFacetValue facetValue : facet.facetValues) { + RandomParentPosting posting = facetValue.postings[parentIndex]; + if (posting.childrenHaveValue[i]) { + fieldsList.add(facet.getFieldNameForIndex()); + fieldsList.add(facetValue.facetValue); + } else if (facet.multiValued && random().nextBoolean()) { + fieldsList.add(facet.getFieldNameForIndex()); + fieldsList.add(someOtherValue(facet.fieldType)); + } + } + if (facet.additionalValueIsAllowedForChild(parentIndex,i)&&random().nextBoolean()) { + fieldsList.add(facet.getFieldNameForIndex()); + fieldsList.add(someOtherValue(facet.fieldType)); + } + } + block.add(fieldsList); + } + Collections.shuffle(block, random()); + return block; + } + + private static String parent(int docNumber) { + return fieldValue(PARENT_VALUE_PREFIX, docNumber); + } + + private static String child(int docNumber) { + return fieldValue(CHILD_VALUE_PREFIX, docNumber); + } + + private static String someOtherValue(FieldType fieldType) { + int randomValue = random().nextInt(NUMBER_OF_VALUES) + NUMBER_OF_VALUES; + switch (fieldType) { + case String : + int index = random().nextInt(otherValues.length); + return otherValues[index]+randomValue; + case Float: + return createFloatValue(randomValue); + default: + return String.valueOf(randomValue); + + } + + } + + private static String createFloatValue(int intValue) { + return intValue + ".01"; + } + + private static String fieldValue(String valuePrefix, int docNumber) { + return valuePrefix + docNumber; + } + + private static String parentChild(int parentIndex, int childIndex) { + return parent(parentIndex) + "_" + child(childIndex); + } + + @AfterClass + public static void cleanUp() throws Exception { + assertU(delQ("*:*")); + optimize(); + assertU((commit())); + } + + @Test + public void testValidation() throws Exception { + assertQ("Component is ignored", + req("q", "+parent_s:(prn_1 prn_2)", "qt", handler) + , "//*[@numFound='2']" + , "//doc/str[@name=\"parent_s\"]='prn_1'" + , "//doc/str[@name=\"parent_s\"]='prn_2'" + ); + + assertQEx("Validation exception is expected because query is not ToParentBlockJoinQuery", + BlockJoinFacetComponent.NO_TO_PARENT_BJQ_MESSAGE, + req( + "q", "t", + "qt", handler, + BlockJoinFacetComponent.CHILD_FACET_FIELD_PARAMETER, facetFields[0] + ), + SolrException.ErrorCode.BAD_REQUEST + ); + + assertQEx("Validation exception is expected because facet field is not defined in schema", + req( + "q", "{!parent which=\"parent_s:[* TO *]\"}child_s:chd_1", + "qt", handler, + BlockJoinFacetComponent.CHILD_FACET_FIELD_PARAMETER, "undefinedField" + ), + SolrException.ErrorCode.BAD_REQUEST + ); + } + + @Test + public void testAllDocs() throws Exception { + int[] randomFacets = getRandomArray(facets.length); + assertQ("Random facets for all docs should be calculated", + req(randomFacetsRequest(null, null, null, null, null, randomFacets)), + expectedResponse(null, null, randomFacets)); + } + + @Test + public void testRandomParentsAllChildren() throws Exception { + int[] randomParents = getRandomArray(NUMBER_OF_PARENTS); + int[] randomFacets = getRandomArray(facets.length); + assertQ("Random facets for random parents should be calculated", + req(randomFacetsRequest(randomParents, null, null, null, null, randomFacets)), + expectedResponse(randomParents, null, randomFacets)); + } + + @Test + public void testRandomChildrenAllParents() throws Exception { + int[] randomChildren = getRandomArray(NUMBER_OF_CHILDREN); + int[] randomFacets = getRandomArray(facets.length); + assertQ("Random facets for all parent docs should be calculated", + req(randomFacetsRequest(null, randomChildren, null, null, null, randomFacets)), + expectedResponse(null, randomChildren, randomFacets)); + } + + @Test + public void testRandomChildrenRandomParents() throws Exception { + int[] randomParents = getRandomArray(NUMBER_OF_PARENTS); + int[] randomChildren = getRandomArray(NUMBER_OF_CHILDREN); + int[] randomFacets = getRandomArray(facets.length); + assertQ("Random facets for all parent docs should be calculated", + req(randomFacetsRequest(randomParents, randomChildren, null, null, null, randomFacets)), + expectedResponse(randomParents, randomChildren, randomFacets)); + } + + @Test + public void testRandomChildrenRandomParentsRandomRelations() throws Exception { + int[] randomParents = getRandomArray(NUMBER_OF_PARENTS); + int[] randomChildren = getRandomArray(NUMBER_OF_CHILDREN); + int[] parentRelations = getRandomArray(NUMBER_OF_PARENTS); + int[] childRelations = getRandomArray(NUMBER_OF_CHILDREN); + int[] randomFacets = getRandomArray(facets.length); + assertQ("Random facets for all parent docs should be calculated", + req(randomFacetsRequest(randomParents, randomChildren, parentRelations, childRelations, null, randomFacets)), + expectedResponse(intersection(randomParents, parentRelations), + intersection(randomChildren, childRelations), randomFacets)); + } + + @Test + public void testRandomFilters() throws Exception { + int[] randomParents = getRandomArray(NUMBER_OF_PARENTS); + int[] randomChildren = getRandomArray(NUMBER_OF_CHILDREN); + int[] parentRelations = getRandomArray(NUMBER_OF_PARENTS); + int[] childRelations = getRandomArray(NUMBER_OF_CHILDREN); + int[] randomParentFilters = getRandomArray(NUMBER_OF_PARENTS); + int[] randomFacets = getRandomArray(facets.length); + assertQ("Random facets for all parent docs should be calculated", + req(randomFacetsRequest(randomParents, randomChildren, parentRelations, childRelations, randomParentFilters, randomFacets)), + expectedResponse(intersection(intersection(randomParents, parentRelations), randomParentFilters), + intersection(randomChildren, childRelations), randomFacets)); + } + + private int[] intersection(int[] firstArray, int[] secondArray) { + Set firstSet = new HashSet<>(); + for (int i : firstArray) { + firstSet.add(i); + } + Set secondSet = new HashSet<>(); + for (int i : secondArray) { + secondSet.add(i); + } + firstSet.retainAll(secondSet); + int[] result = new int[firstSet.size()]; + int i = 0; + for (Integer integer : firstSet) { + result[i++] = integer; + } + return result; + } + + private String[] randomFacetsRequest(int[] parents, int[] children, + int[] parentRelations, int[] childRelations, + int[] parentFilters, int[] facetNumbers) { + List params = new ArrayList<>(Arrays.asList( + "q", parentsQuery(parents), + "qt",handler, + "pq","parent_s:[* TO *]", + "chq", childrenQuery(children, parentRelations, childRelations), + "fq", flatQuery(parentFilters, "parent_s", PARENT_VALUE_PREFIX) + )); + for (int facetNumber : facetNumbers) { + params .add(BlockJoinFacetComponent.CHILD_FACET_FIELD_PARAMETER); + params .add(facets[facetNumber].getFieldNameForIndex()); + } + return params.toArray(new String[params.size()]); + } + + private String parentsQuery(int[] parents) { + String result; + if (parents == null) { + result = "{!parent which=$pq v=$chq}"; + } else { + result = flatQuery(parents, "parent_s", PARENT_VALUE_PREFIX) + " +_query_:\"{!parent which=$pq v=$chq}\""; + } + return result; + } + + private String flatQuery(int[] docNumbers, final String fieldName, String fieldValuePrefix) { + String result; + if (docNumbers == null) { + result = "+" + fieldName + ":[* TO *]"; + } else { + StringBuilder builder = new StringBuilder("+" + fieldName +":("); + if (docNumbers.length == 0) { + builder.append("match_nothing_value"); + } else { + for (int docNumber : docNumbers) { + builder.append(fieldValue(fieldValuePrefix, docNumber)); + builder.append(" "); + } + builder.deleteCharAt(builder.length() - 1); + } + builder.append(")"); + result = builder.toString(); + } + return result; + } + + private String childrenQuery(int[] children, int[] parentRelations, int[] childRelations) { + StringBuilder builder = new StringBuilder(); + builder.append(flatQuery(children, "child_s", CHILD_VALUE_PREFIX)); + if (parentRelations == null) { + if (childRelations == null) { + builder.append(" +parentchild_s:[* TO *]"); + } else { + builder.append(" +parentchild_s:("); + if (childRelations.length == 0) { + builder.append("match_nothing_value"); + } else { + for (int childRelation : childRelations) { + for (int i = 0; i < NUMBER_OF_PARENTS; i++) { + builder.append(parentChild(i, childRelation)); + builder.append(" "); + } + } + builder.deleteCharAt(builder.length() - 1); + } + builder.append(")"); + } + } else { + builder.append(" +parentchild_s:("); + if (parentRelations.length == 0) { + builder.append("match_nothing_value"); + } else { + if (childRelations == null) { + for (int parentRelation : parentRelations) { + for (int i = 0; i < NUMBER_OF_CHILDREN; i++) { + builder.append(parentChild(parentRelation, i)); + builder.append(" "); + } + } + } else if (childRelations.length == 0) { + builder.append("match_nothing_value"); + } else { + for (int parentRelation : parentRelations) { + + for (int childRelation : childRelations) { + builder.append(parentChild(parentRelation, childRelation)); + builder.append(" "); + } + } + builder.deleteCharAt(builder.length() - 1); + } + } + builder.append(")"); + } + return builder.toString(); + } + + private String[] expectedResponse(int[] parents, int[] children, int[] facetNumbers) { + List result = new LinkedList<>(); + if (children != null && children.length == 0) { + result.add("//*[@numFound='" + 0 + "']"); + } else { + if (parents == null) { + result.add("//*[@numFound='" + NUMBER_OF_PARENTS + "']"); + for (int i = 0; i < NUMBER_OF_PARENTS; i++) { + result.add("//doc/str[@name=\"parent_s\"]='" + parent(i) + "'"); + } + } else { + result.add("//*[@numFound='" + parents.length + "']"); + for (int parent : parents) { + result.add("//doc/str[@name=\"parent_s\"]='" + parent(parent) + "'"); + } + } + } + if (facetNumbers != null) { + for (int facetNumber : facetNumbers) { + result.add("//lst[@name='facet_counts']/lst[@name='facet_fields']/lst[@name='" + facets[facetNumber].getFieldNameForIndex() + "']"); + RandomFacetValue[] facetValues = facets[facetNumber].facetValues; + for (RandomFacetValue facetValue : facetValues) { + int expectedFacetCount = facetValue.getFacetCount(parents, children); + if (expectedFacetCount > 0) { + result.add("//lst[@name='facet_counts']/lst[@name='facet_fields']/lst[@name='" + + facets[facetNumber].getFieldNameForIndex() + "']/int[@name='" + + facetValue.facetValue + "' and text()='" + expectedFacetCount + "']"); + } + } + } + } + return result.toArray(new String[result.size()]); + } + + private static Facet[] createFacets() { + int[] facetsToCreate = getRandomArray(facetFields.length); + Facet[] facets = new Facet[facetsToCreate.length]; + int i = 0; + for (int facetNumber : facetsToCreate) { + facets[i++] = new Facet(facetFields[facetNumber]); + } + return facets; + } + + private static int[] getRandomArray(int maxNumber) { + int[] buffer = new int[maxNumber]; + int count = 0; + for (int i = 0; i < maxNumber; i++) { + if (random().nextBoolean()) { + buffer[count++] = i; + } + } + int[] result = new int[count]; + System.arraycopy(buffer, 0, result, 0, count); + return result; + } + + private static class Facet { + private String fieldName; + private boolean multiValued = true; + FieldType fieldType; + RandomFacetValue[] facetValues; + + Facet(String fieldName) { + this.fieldName = fieldName; + fieldType = FieldType.values()[random().nextInt(FieldType.values().length)]; + if ( FieldType.String.equals(fieldType)) { + // sortedDocValues are supported for string fields only + multiValued = random().nextBoolean(); + } + + fieldType = FieldType.String; + facetValues = new RandomFacetValue[NUMBER_OF_VALUES]; + for (int i = 0; i < NUMBER_OF_VALUES; i++) { + String value = createRandomValue(i); + facetValues[i] = new RandomFacetValue(value); + } + if (!multiValued) { + makeValuesSingle(); + } + } + + private String createRandomValue(int i) { + switch( fieldType ) { + case String: + return fieldName.substring(0, 2) + "_" + i; + case Float: + return createFloatValue(i); + default: + return String.valueOf(i); + } + } + + String getFieldNameForIndex() { + String multiValuedPostfix = multiValued ? "_multi" : "_single"; + return fieldName + fieldType.fieldPostfix + multiValuedPostfix; + } + + private void makeValuesSingle() { + for ( int i = 0; i < NUMBER_OF_PARENTS; i++) { + List values = getValuesForParent(i); + if ( values.size() > 0) { + int singleValueOrd = values.get(random().nextInt(values.size())); + setSingleValueForParent(i,singleValueOrd); + } + for ( int j=0; j < NUMBER_OF_CHILDREN; j++) { + values = getValuesForChild(i,j); + if ( values.size() > 0 ) { + int singleValueOrd = values.get(random().nextInt(values.size())); + setSingleValueForChild(i, j, singleValueOrd); + } + } + } + } + + private List getValuesForParent(int parentNumber) { + List result = new ArrayList<>(); + for (int i = 0; i getValuesForChild(int parentNumber, int childNumber) { + List result = new ArrayList<>(); + for (int i = 0; i 0); + if (!result) { + if (childNumbers == null) { + for (boolean childHasValue : childrenHaveValue) { + result = childHasValue; + if (result) { + break; + } + } + } else { + for (int child : childNumbers) { + result = childrenHaveValue[child]; + if (result) { + break; + } + } + } + } + return result; + } + } +} diff --git a/solr/core/src/test/org/apache/solr/search/join/BlockJoinFacetSimpleTest.java b/solr/core/src/test/org/apache/solr/search/join/BlockJoinFacetSimpleTest.java new file mode 100644 index 00000000000..13a522820f7 --- /dev/null +++ b/solr/core/src/test/org/apache/solr/search/join/BlockJoinFacetSimpleTest.java @@ -0,0 +1,98 @@ +package org.apache.solr.search.join; + +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Collections; +import java.util.List; + +import org.apache.solr.SolrTestCaseJ4; +import org.junit.BeforeClass; +import org.junit.Test; + +public class BlockJoinFacetSimpleTest extends SolrTestCaseJ4 { + private static String handler; + + @BeforeClass + public static void beforeClass() throws Exception { + initCore("solrconfig-blockjoinfacetcomponent.xml", "schema-blockjoinfacetcomponent.xml"); + handler = random().nextBoolean() ? "blockJoinDocSetFacetRH":"blockJoinFacetRH"; + createIndex(); + } + + public static void createIndex() throws Exception { + + final String match; + List docs = Arrays.asList( +// match + match = adoc("id", "10","type_s", "parent","BRAND_s", "Nike").replace("", + ""+ + doc("id", "11","type_s", "child","COLOR_s", "Red","SIZE_s", "XL")+// matches child filter + doc("id", "12","type_s", "child","COLOR_s", "Red","SIZE_s", "XL")+// matches child filter + doc("id", "13","type_s", "child","COLOR_s", "Blue","SIZE_s", "XL")+""), +// mismatch + adoc("id", "100","type_s", "parent","BRAND_s", "Reebok").replace("", + ""+doc("id", "101","type_s", "child","COLOR_s", "Red","SIZE_s", "M")+ + doc("id", "102","type_s", "child","COLOR_s", "Blue","SIZE_s", "XL")+ + doc("id", "104","type_s", "child","COLOR_s", "While","SIZE_s", "XL")+ + doc("id", "105","type_s", "child","COLOR_s", "Green","SIZE_s", "XXXL")+ + "")); + + Collections.shuffle(docs, random()); + for(String d : docs){ + assertU(d); + } + if(random().nextBoolean()){// let's have a deleted doc + if(random().nextBoolean()){ + assertU("let's have two segs",commit()); + } + assertU("overriding matching doc",match); + } + assertU(commit()); + assertQ(req("q", "*:*"), "//*[@numFound='" + 9 + "']"); + } + + @Test + public void testSimple() throws Exception { + //query + // parents + assertQ(req("q", "type_s:parent"), "//*[@numFound='" + 2 + "']"); + + String alt[][] ={ {"q", "{!parent which=\"type_s:parent\"}+COLOR_s:Red +SIZE_s:XL"}, + {"q", "+{!parent which=\"type_s:parent\"}+COLOR_s:Red +BRAND_s:Nike"}, + {"q", "{!parent which=\"type_s:parent\"}+COLOR_s:Red", "fq", "BRAND_s:Nike"}}; + + for(String param[] : alt){ + final List reqParams = new ArrayList<>(Arrays.asList(param)); + reqParams.addAll(Arrays.asList("qt",handler, + "facet", (random().nextBoolean() ? "true":"false"),// it's indifferent to + "child.facet.field", "COLOR_s", + "child.facet.field", "SIZE_s")); + assertQ(req(reqParams.toArray(new String[0])), + "//*[@numFound='" + 1 + "']", + "//lst[@name='COLOR_s']/int[@name='Red'][.='1']", + // "//lst[@name='COLOR_s']/int[@name='Blue'][.='1']", + "count(//lst[@name='COLOR_s']/int)=1", + "//lst[@name='SIZE_s']/int[@name='XL'][.='1']", + "count(//lst[@name='SIZE_s']/int)=1"); + + } + } + +}