SOLR-8532: GraphQuery don't collect edges at maxDepth level

This commit is contained in:
yonik 2016-01-29 10:59:49 -05:00
parent c403083872
commit e6db8ba214
4 changed files with 79 additions and 40 deletions

View File

@ -172,6 +172,9 @@ Optimizations
count. Also includes change to move to the next non-zero term value when selecting a segment
position. (Keith Laban, Steve Bower, Dennis Gove)
* SOLR-8532: Optimize GraphQuery when maxDepth is set by not collecting edges at the maxDepth level.
(Kevin Watters via yonik)
Other Changes
----------------------

View File

@ -135,8 +135,8 @@ public class GraphQuery extends Query {
SolrIndexSearcher fromSearcher;
private float queryNorm = 1.0F;
private float queryWeight = 1.0F;
int frontierSize = 0;
public int currentDepth = 0;
private int frontierSize = 0;
private int currentDepth = -1;
private Filter filter;
private DocSet resultSet;
@ -177,69 +177,82 @@ public class GraphQuery extends Query {
* @throws IOException - if a sub search fails... maybe other cases too! :)
*/
private DocSet getDocSet() throws IOException {
DocSet fromSet = null;
FixedBitSet seedResultBits = null;
// Size that the bit set needs to be.
int capacity = fromSearcher.getRawReader().maxDoc();
// The bit set to contain the results that match the query.
FixedBitSet resultBits = new FixedBitSet(capacity);
// The measure of how deep in the graph we have gone.
currentDepth = 0;
// this holds the result at each level
BitDocSet fromSet = null;
// the root docs if we return root is false
FixedBitSet rootBits = null;
// the initial query for the frontier for the first query
Query frontierQuery = q;
// Find all documents in this graph that are leaf nodes to speed traversal
// TODO: speed this up in the future with HAS_FIELD type queries
BooleanQuery.Builder leafNodeQuery = new BooleanQuery.Builder();
WildcardQuery edgeQuery = new WildcardQuery(new Term(toField, "*"));
leafNodeQuery.add(edgeQuery, Occur.MUST_NOT);
DocSet leafNodes = fromSearcher.getDocSet(leafNodeQuery.build());
DocSet leafNodes = resolveLeafNodes(toField);
// Start the breadth first graph traversal.
do {
// Create the graph result collector for this level
GraphTermsCollector graphResultCollector = new GraphTermsCollector(toField,capacity, resultBits, leafNodes);
// traverse the level!
fromSearcher.search(frontierQuery, graphResultCollector);
// All edge ids on the frontier.
BytesRefHash collectorTerms = graphResultCollector.getCollectorTerms();
frontierSize = collectorTerms.size();
// The resulting doc set from the frontier.
fromSet = graphResultCollector.getDocSet();
if (seedResultBits == null) {
// grab a copy of the seed bits (these are the "rootNodes")
seedResultBits = ((BitDocSet)fromSet).getBits().clone();
}
Integer fs = new Integer(frontierSize);
FrontierQuery fq = buildFrontierQuery(collectorTerms, fs);
if (fq == null) {
// in case we get null back, make sure we know we're done at this level.
fq = new FrontierQuery(null, 0);
}
frontierQuery = fq.getQuery();
frontierSize = fq.getFrontierSize();
// Add the bits from this level to the result set.
resultBits.or(((BitDocSet)fromSet).getBits());
// Increment how far we have gone in the frontier.
currentDepth++;
// Break out if we have reached our max depth
if (currentDepth >= maxDepth && maxDepth != -1) {
// if we are at the max level we don't need the graph terms collector.
// TODO validate that the join case works properly.
if (maxDepth != -1 && currentDepth >= maxDepth) {
// if we've reached the max depth, don't worry about collecting edges.
fromSet = fromSearcher.getDocSetBits(frontierQuery);
// explicitly the frontier size is zero now so we can break
frontierSize = 0;
} else {
// when we're not at the max depth level, we need to collect edges
// Create the graph result collector for this level
GraphTermsCollector graphResultCollector = new GraphTermsCollector(toField,capacity, resultBits, leafNodes);
fromSearcher.search(frontierQuery, graphResultCollector);
fromSet = graphResultCollector.getDocSet();
// All edge ids on the frontier.
BytesRefHash collectorTerms = graphResultCollector.getCollectorTerms();
frontierSize = collectorTerms.size();
// The resulting doc set from the frontier.
FrontierQuery fq = buildFrontierQuery(collectorTerms, frontierSize);
if (fq == null) {
// in case we get null back, make sure we know we're done at this level.
frontierSize = 0;
} else {
frontierQuery = fq.getQuery();
frontierSize = fq.getFrontierSize();
}
}
if (currentDepth == 0 && !returnRoot) {
// grab a copy of the root bits but only if we need it.
rootBits = fromSet.getBits();
}
// Add the bits from this level to the result set.
resultBits.or(fromSet.getBits());
// test if we discovered any new edges, if not , we're done.
if ((maxDepth != -1 && currentDepth >= maxDepth)) {
break;
}
// test if we discovered any new edges, if not , we're done.
} while (frontierSize > 0);
// helper bit set operations on the final result set
if (!returnRoot) {
resultBits.andNot(seedResultBits);
resultBits.andNot(rootBits);
}
// this is the final resulting filter.
BitDocSet resultSet = new BitDocSet(resultBits);
// If we only want to return leaf nodes do that here.
if (onlyLeafNodes) {
return resultSet.intersection(leafNodes);
} else {
// create a doc set off the bits that we found.
return resultSet;
}
}
private DocSet resolveLeafNodes(String field) throws IOException {
BooleanQuery.Builder leafNodeQuery = new BooleanQuery.Builder();
WildcardQuery edgeQuery = new WildcardQuery(new Term(field, "*"));
leafNodeQuery.add(edgeQuery, Occur.MUST_NOT);
DocSet leafNodes = fromSearcher.getDocSet(leafNodeQuery.build());
return leafNodes;
}
/** Build an automaton to represent the frontier query */
private Automaton buildAutomaton(BytesRefHash termBytesHash) {
// need top pass a sorted set of terms to the autn builder (maybe a better way to avoid this?)

View File

@ -108,7 +108,7 @@ class GraphTermsCollector extends SimpleCollector implements Collector {
numHits++;
}
public DocSet getDocSet() {
public BitDocSet getDocSet() {
if (bits == null) {
// TODO: this shouldn't happen
bits = new FixedBitSet(maxDoc);

View File

@ -77,6 +77,29 @@ public class GraphQueryTest extends SolrTestCaseJ4 {
qr = createRequest(g4Query);
assertQ(qr,"//*[@numFound='2']");
String g5Query = "{!graph from=\"node_id\" to=\"edge_id\" returnRoot=\"true\" returnOnlyLeaf=\"false\" maxDepth=0}id:doc_8";
qr = createRequest(g5Query);
assertQ(qr,"//*[@numFound='1']");
String g6Query = "{!graph from=\"node_id\" to=\"edge_id\" returnRoot=\"true\" returnOnlyLeaf=\"false\" maxDepth=1}id:doc_8";
qr = createRequest(g6Query);
assertQ(qr,"//*[@numFound='3']");
String g7Query = "{!graph from=\"node_id\" to=\"edge_id\" returnRoot=\"false\" returnOnlyLeaf=\"false\" maxDepth=1}id:doc_8";
qr = createRequest(g7Query);
assertQ(qr,"//*[@numFound='2']");
String g8Query = "{!graph from=\"node_id\" to=\"edge_id\" returnRoot=\"false\" returnOnlyLeaf=\"true\" maxDepth=2}id:doc_8";
qr = createRequest(g8Query);
assertQ(qr,"//*[@numFound='1']");
String g9Query = "{!graph from=\"node_id\" to=\"edge_id\" maxDepth=1}id:doc_1";
qr = createRequest(g9Query);
assertQ(qr,"//*[@numFound='2']");
String g10Query = "{!graph from=\"node_id\" to=\"edge_id\" returnRoot=false maxDepth=1}id:doc_1";
qr = createRequest(g10Query);
assertQ(qr,"//*[@numFound='1']");
}
private SolrQueryRequest createRequest(String query) {