SOLR-8643: made BlockJoinFacetComponent just a shortcut for BlockJoinDocSetFacetComponent, which supports pure disjunctions

This commit is contained in:
Mikhail Khludnev 2016-08-19 14:36:04 +03:00
parent 9a5dcce5de
commit 15bfeb6b0c
5 changed files with 170 additions and 229 deletions

View File

@ -252,6 +252,8 @@ Other Changes
* SOLR-9421: Refactored out OverseerCollectionMessageHandler to smaller classes (noble)
* SOLR-8643: BlockJoinFacetComponent is substituted by BlockJoinFacetDocSetComponent. It doesn't need to change solrconfig.xml (Mikhail Khludnev)
================== 6.1.0 ==================
Consult the LUCENE_CHANGES.txt file for additional, low level, changes in this release.

View File

@ -22,16 +22,13 @@ import java.util.List;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.search.BooleanClause;
import org.apache.lucene.search.BooleanQuery;
import org.apache.lucene.search.Collector;
import org.apache.lucene.search.DocIdSet;
import org.apache.lucene.search.DocIdSetIterator;
import org.apache.lucene.search.LeafCollector;
import org.apache.lucene.search.MatchAllDocsQuery;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.join.ToParentBlockJoinQuery;
import org.apache.solr.common.SolrException;
import org.apache.solr.handler.component.ResponseBuilder;
import org.apache.solr.request.SolrQueryRequest;
import org.apache.solr.search.BitDocSet;
import org.apache.solr.search.DocSet;
import org.apache.solr.search.Filter;
@ -40,11 +37,11 @@ import org.apache.solr.search.facet.BlockJoin;
import org.apache.solr.search.join.BlockJoinFieldFacetAccumulator.AggregatableDocIter;
/**
* It does the same as BlockJoinFacetComponent, but operates on docsets,
* it should be faster for static mostly indexes. This component doesn't impact
* query result caching, but hits filter cache to retrieve docsets.
* Calculates facets on children documents and aggregates hits by parent documents.
* Enables when child.facet.field parameter specifies a field name for faceting.
* So far it supports string fields only. It requires to search by {@link ToParentBlockJoinQuery}.
* */
public class BlockJoinDocSetFacetComponent extends BlockJoinFacetComponent {
public class BlockJoinDocSetFacetComponent extends BlockJoinFacetComponentSupport {
private final String bjqKey = this.getClass().getSimpleName()+".bjq";
@ -115,27 +112,6 @@ public class BlockJoinDocSetFacetComponent extends BlockJoinFacetComponent {
}
}
private static final class NoDelegateFacetCollector extends BlockJoinFacetCollector {
{
setDelegate(new Collector() {
@Override
public boolean needsScores() {
return false;
}
@Override
public LeafCollector getLeafCollector(LeafReaderContext context) throws IOException {
return null;
}
});
}
private NoDelegateFacetCollector(SolrQueryRequest req) throws IOException {
super(req);
}
}
public BlockJoinDocSetFacetComponent() {}
@Override
@ -196,7 +172,7 @@ public class BlockJoinDocSetFacetComponent extends BlockJoinFacetComponent {
Filter filter = selectedChildren.getTopFilter();
final BlockJoinFacetCollector facetCounter = new NoDelegateFacetCollector(rb.req);
final BlockJoinFacetAccsHolder facetCounter = new BlockJoinFacetAccsHolder(rb.req);
for (int subIdx = 0; subIdx < leaves.size(); subIdx++) {
LeafReaderContext subCtx = leaves.get(subIdx);

View File

@ -17,15 +17,11 @@
package org.apache.solr.search.join;
import java.io.IOException;
import java.util.LinkedList;
import java.util.Queue;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.search.Scorer;
import org.apache.lucene.search.join.ToParentBlockJoinQuery.ChildrenMatchesScorer;
import org.apache.solr.common.util.NamedList;
import org.apache.solr.request.SolrQueryRequest;
import org.apache.solr.search.DelegatingCollector;
import org.apache.solr.search.join.BlockJoinFieldFacetAccumulator.AggregatableDocIter;
import org.apache.solr.search.join.BlockJoinFieldFacetAccumulator.SortedIntsAggDocIterator;
@ -33,14 +29,14 @@ import org.apache.solr.search.join.BlockJoinFieldFacetAccumulator.SortedIntsAggD
* For each collected parent document creates matched block, which is a docSet with matched children and parent doc
* itself. Then updates each BlockJoinFieldFacetAccumulator with the created matched block.
*/
class BlockJoinFacetCollector extends DelegatingCollector {
class BlockJoinFacetAccsHolder {
private BlockJoinFieldFacetAccumulator[] blockJoinFieldFacetAccumulators;
private boolean firstSegment = true;
private ChildrenMatchesScorer blockJoinScorer;
private int[] childDocs = new int[0];
BlockJoinFacetCollector(SolrQueryRequest req) throws IOException {
String[] facetFieldNames = BlockJoinFacetComponent.getChildFacetFields(req);
BlockJoinFacetAccsHolder(SolrQueryRequest req) throws IOException {
String[] facetFieldNames = BlockJoinFacetComponentSupport.getChildFacetFields(req);
assert facetFieldNames != null;
blockJoinFieldFacetAccumulators = new BlockJoinFieldFacetAccumulator[facetFieldNames.length];
for (int i = 0; i < facetFieldNames.length; i++) {
@ -48,28 +44,7 @@ class BlockJoinFacetCollector extends DelegatingCollector {
}
}
@Override
public void setScorer(Scorer scorer) throws IOException {
super.setScorer(scorer);
blockJoinScorer = getToParentScorer(scorer, new LinkedList<Scorer>());
if (blockJoinScorer != null) {
// instruct scorer to keep track of the child docIds for retrieval purposes.
blockJoinScorer.trackPendingChildHits();
}
}
private ChildrenMatchesScorer getToParentScorer(Scorer scorer, Queue<Scorer> queue) {
if (scorer == null || scorer instanceof ChildrenMatchesScorer) {
return (ChildrenMatchesScorer) scorer;
} else {
for (Scorer.ChildScorer child : scorer.getChildren()) {
queue.add(child.child);
}
return getToParentScorer(queue.poll(), queue);
}
}
@Override
protected void doSetNextReader(LeafReaderContext context) throws IOException {
for (BlockJoinFieldFacetAccumulator blockJoinFieldFacetAccumulator : blockJoinFieldFacetAccumulators) {
if(!firstSegment){
@ -78,21 +53,12 @@ class BlockJoinFacetCollector extends DelegatingCollector {
blockJoinFieldFacetAccumulator.setNextReader(context);
}
firstSegment = false;
super.doSetNextReader(context);
}
@Override
public void collect(int doc) throws IOException {
incrementFacets(doc);
super.collect(doc);
}
@Override
public void finish() throws IOException {
for (BlockJoinFieldFacetAccumulator blockJoinFieldFacetAccumulator : blockJoinFieldFacetAccumulators) {
blockJoinFieldFacetAccumulator.migrateGlobal();
}
super.finish();
}
protected void incrementFacets(int parent) throws IOException {

View File

@ -16,167 +16,8 @@
*/
package org.apache.solr.search.join;
import java.io.IOException;
import java.util.LinkedList;
import java.util.List;
import java.util.Map;
/** this is just a stub refers to {@link BlockJoinDocSetFacetComponent} to avoid
* changes in configs */
public class BlockJoinFacetComponent extends BlockJoinDocSetFacetComponent {
import org.apache.lucene.search.BooleanClause;
import org.apache.lucene.search.BooleanQuery;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.join.ToParentBlockJoinQuery;
import org.apache.solr.common.SolrException;
import org.apache.solr.common.params.ShardParams;
import org.apache.solr.common.util.NamedList;
import org.apache.solr.handler.component.ResponseBuilder;
import org.apache.solr.handler.component.SearchComponent;
import org.apache.solr.handler.component.ShardRequest;
import org.apache.solr.handler.component.ShardResponse;
import org.apache.solr.request.SolrQueryRequest;
import org.apache.solr.search.DelegatingCollector;
import org.apache.solr.search.SolrIndexSearcher;
/**
* Calculates facets on children documents and aggregates hits by parent documents.
* Enables when child.facet.field parameter specifies a field name for faceting.
* So far it supports string fields only. It requires to search by {@link ToParentBlockJoinQuery}.
* It disables query result cache but only when it's ebaled for request by child.facet.field parameter
* */
public class BlockJoinFacetComponent extends SearchComponent {
public static final String CHILD_FACET_FIELD_PARAMETER = "child.facet.field";
public static final String NO_TO_PARENT_BJQ_MESSAGE = "Block join faceting is allowed with ToParentBlockJoinQuery only";
public static final String COLLECTOR_CONTEXT_PARAM = "blockJoinFacetCollector";
@Override
public void prepare(ResponseBuilder rb) throws IOException {
if (getChildFacetFields(rb.req) != null) {
validateQuery(rb.getQuery());
// we count facets only when searching
rb.setFieldFlags(rb.getFieldFlags() | SolrIndexSearcher.NO_CHECK_QCACHE);
if (rb.getFilters() == null) {
rb.setFilters(new LinkedList<Query>());
}
DelegatingCollector blockJoinFacetCollector = new BlockJoinFacetCollector(rb.req);
rb.req.getContext().put(COLLECTOR_CONTEXT_PARAM, blockJoinFacetCollector);
rb.getFilters().add(new BlockJoinFacetFilter(blockJoinFacetCollector));
}
}
protected void validateQuery(Query query) {
if (!(query instanceof ToParentBlockJoinQuery)) {
if (query instanceof BooleanQuery) {
List<BooleanClause> clauses = ((BooleanQuery) query).clauses();
for (BooleanClause clause : clauses) {
if (clause.getQuery() instanceof ToParentBlockJoinQuery) {
return;
}
}
}
throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, NO_TO_PARENT_BJQ_MESSAGE);
}
}
static String[] getChildFacetFields(SolrQueryRequest req) {
return req.getParams().getParams(CHILD_FACET_FIELD_PARAMETER);
}
@Override
public void process(ResponseBuilder rb) throws IOException {
if (getChildFacetFields(rb.req) != null) {
BlockJoinFacetCollector blockJoinFacetCollector = (BlockJoinFacetCollector) rb.req.getContext().get(COLLECTOR_CONTEXT_PARAM);
assert blockJoinFacetCollector != null;
NamedList output;
if (isShard(rb)) {
// distributed search, put results into own cell in order not to clash with facet component
output = getChildFacetFields(rb.rsp.getValues(), true);
} else {
// normal process, put results into standard response
output = getFacetFieldsList(rb);
}
mergeFacets(output, blockJoinFacetCollector.getFacets());
}
}
private boolean isShard(ResponseBuilder rb) {
return "true".equals(rb.req.getParams().get(ShardParams.IS_SHARD));
}
private NamedList getChildFacetFields(NamedList responseValues, boolean createIfAbsent) {
return getNamedListFromList(responseValues, "child_facet_fields", createIfAbsent);
}
private void mergeFacets(NamedList childFacetFields, NamedList shardFacets) {
if (shardFacets != null) {
for (Map.Entry<String, NamedList<Integer>> nextShardFacet : (Iterable<Map.Entry<String, NamedList<Integer>>>) shardFacets) {
String fieldName = nextShardFacet.getKey();
NamedList<Integer> collectedFacet = (NamedList<Integer>) childFacetFields.get(fieldName);
NamedList<Integer> shardFacet = nextShardFacet.getValue();
if (collectedFacet == null) {
childFacetFields.add(fieldName, shardFacet);
} else {
mergeFacetValues(collectedFacet, shardFacet);
}
}
}
}
private void mergeFacetValues(NamedList<Integer> collectedFacetValue, NamedList<Integer> shardFacetValue) {
for (Map.Entry<String, Integer> nextShardValue : shardFacetValue) {
String facetValue = nextShardValue.getKey();
Integer shardCount = nextShardValue.getValue();
int indexOfCollectedValue = collectedFacetValue.indexOf(facetValue, 0);
if (indexOfCollectedValue == -1) {
collectedFacetValue.add(facetValue, shardCount);
} else {
int newCount = collectedFacetValue.getVal(indexOfCollectedValue) + shardCount;
collectedFacetValue.setVal(indexOfCollectedValue, newCount);
}
}
}
private NamedList getNamedListFromList(NamedList parentList, String name, boolean createIfAbsent) {
NamedList result = null;
if (parentList != null) {
result = (NamedList) parentList.get(name);
if (result == null && createIfAbsent) {
result = new NamedList();
parentList.add(name, result);
}
}
return result;
}
@Override
public void handleResponses(ResponseBuilder rb, ShardRequest sreq) {
NamedList collectedChildFacetFields = getChildFacetFields(rb.rsp.getValues(), true);
List<ShardResponse> responses = sreq.responses;
for (ShardResponse shardResponse : responses) {
NamedList shardChildFacetFields = getChildFacetFields(shardResponse.getSolrResponse().getResponse(), false);
mergeFacets(collectedChildFacetFields, shardChildFacetFields);
}
}
@Override
public void finishStage(ResponseBuilder rb) {
if (rb.stage != ResponseBuilder.STAGE_GET_FIELDS) return;
NamedList childFacetFields = getChildFacetFields(rb.rsp.getValues(), true);
NamedList facetFields = getFacetFieldsList(rb);
for (Map.Entry<String, NamedList> childFacetField : (Iterable<Map.Entry<String, NamedList>>) childFacetFields) {
facetFields.add(childFacetField.getKey(), childFacetField.getValue());
}
rb.rsp.getValues().remove("child_facet_fields");
}
private NamedList getFacetFieldsList(ResponseBuilder rb) {
NamedList facetCounts = getNamedListFromList(rb.rsp.getValues(), "facet_counts", true);
return getNamedListFromList(facetCounts, "facet_fields", true);
}
@Override
public String getDescription() {
return "BlockJoin facet component";
}
}

View File

@ -0,0 +1,156 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.solr.search.join;
import java.io.IOException;
import java.util.List;
import java.util.Map;
import org.apache.lucene.search.BooleanClause;
import org.apache.lucene.search.BooleanQuery;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.join.ToParentBlockJoinQuery;
import org.apache.solr.common.SolrException;
import org.apache.solr.common.params.ShardParams;
import org.apache.solr.common.util.NamedList;
import org.apache.solr.handler.component.ResponseBuilder;
import org.apache.solr.handler.component.SearchComponent;
import org.apache.solr.handler.component.ShardRequest;
import org.apache.solr.handler.component.ShardResponse;
import org.apache.solr.request.SolrQueryRequest;
abstract class BlockJoinFacetComponentSupport extends SearchComponent {
public static final String CHILD_FACET_FIELD_PARAMETER = "child.facet.field";
public static final String NO_TO_PARENT_BJQ_MESSAGE = "Block join faceting is allowed with ToParentBlockJoinQuery only";
public static final String COLLECTOR_CONTEXT_PARAM = "blockJoinFacetCollector";
protected void validateQuery(Query query) {
if (!(query instanceof ToParentBlockJoinQuery)) {
if (query instanceof BooleanQuery) {
List<BooleanClause> clauses = ((BooleanQuery) query).clauses();
for (BooleanClause clause : clauses) {
if (clause.getQuery() instanceof ToParentBlockJoinQuery) {
return;
}
}
}
throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, NO_TO_PARENT_BJQ_MESSAGE);
}
}
static String[] getChildFacetFields(SolrQueryRequest req) {
return req.getParams().getParams(CHILD_FACET_FIELD_PARAMETER);
}
@Override
public void process(ResponseBuilder rb) throws IOException {
if (getChildFacetFields(rb.req) != null) {
BlockJoinFacetAccsHolder blockJoinFacetCollector = (BlockJoinFacetAccsHolder) rb.req.getContext().get(COLLECTOR_CONTEXT_PARAM);
assert blockJoinFacetCollector != null;
NamedList output;
if (isShard(rb)) {
// distributed search, put results into own cell in order not to clash with facet component
output = getChildFacetFields(rb.rsp.getValues(), true);
} else {
// normal process, put results into standard response
output = getFacetFieldsList(rb);
}
mergeFacets(output, blockJoinFacetCollector.getFacets());
}
}
private boolean isShard(ResponseBuilder rb) {
return "true".equals(rb.req.getParams().get(ShardParams.IS_SHARD));
}
private NamedList getChildFacetFields(NamedList responseValues, boolean createIfAbsent) {
return getNamedListFromList(responseValues, "child_facet_fields", createIfAbsent);
}
private void mergeFacets(NamedList childFacetFields, NamedList shardFacets) {
if (shardFacets != null) {
for (Map.Entry<String, NamedList<Integer>> nextShardFacet : (Iterable<Map.Entry<String, NamedList<Integer>>>) shardFacets) {
String fieldName = nextShardFacet.getKey();
NamedList<Integer> collectedFacet = (NamedList<Integer>) childFacetFields.get(fieldName);
NamedList<Integer> shardFacet = nextShardFacet.getValue();
if (collectedFacet == null) {
childFacetFields.add(fieldName, shardFacet);
} else {
mergeFacetValues(collectedFacet, shardFacet);
}
}
}
}
private void mergeFacetValues(NamedList<Integer> collectedFacetValue, NamedList<Integer> shardFacetValue) {
for (Map.Entry<String, Integer> nextShardValue : shardFacetValue) {
String facetValue = nextShardValue.getKey();
Integer shardCount = nextShardValue.getValue();
int indexOfCollectedValue = collectedFacetValue.indexOf(facetValue, 0);
if (indexOfCollectedValue == -1) {
collectedFacetValue.add(facetValue, shardCount);
} else {
int newCount = collectedFacetValue.getVal(indexOfCollectedValue) + shardCount;
collectedFacetValue.setVal(indexOfCollectedValue, newCount);
}
}
}
private NamedList getNamedListFromList(NamedList parentList, String name, boolean createIfAbsent) {
NamedList result = null;
if (parentList != null) {
result = (NamedList) parentList.get(name);
if (result == null && createIfAbsent) {
result = new NamedList();
parentList.add(name, result);
}
}
return result;
}
@Override
public void handleResponses(ResponseBuilder rb, ShardRequest sreq) {
NamedList collectedChildFacetFields = getChildFacetFields(rb.rsp.getValues(), true);
List<ShardResponse> responses = sreq.responses;
for (ShardResponse shardResponse : responses) {
NamedList shardChildFacetFields = getChildFacetFields(shardResponse.getSolrResponse().getResponse(), false);
mergeFacets(collectedChildFacetFields, shardChildFacetFields);
}
}
@Override
public void finishStage(ResponseBuilder rb) {
if (rb.stage != ResponseBuilder.STAGE_GET_FIELDS) return;
NamedList childFacetFields = getChildFacetFields(rb.rsp.getValues(), true);
NamedList facetFields = getFacetFieldsList(rb);
for (Map.Entry<String, NamedList> childFacetField : (Iterable<Map.Entry<String, NamedList>>) childFacetFields) {
facetFields.add(childFacetField.getKey(), childFacetField.getValue());
}
rb.rsp.getValues().remove("child_facet_fields");
}
private NamedList getFacetFieldsList(ResponseBuilder rb) {
NamedList facetCounts = getNamedListFromList(rb.rsp.getValues(), "facet_counts", true);
return getNamedListFromList(facetCounts, "facet_fields", true);
}
@Override
public String getDescription() {
return "BlockJoin facet component";
}
}