mirror of https://github.com/apache/lucene.git
SOLR-8532: GraphQuery don't collect edges at maxDepth level
This commit is contained in:
parent
c403083872
commit
e6db8ba214
|
@ -172,6 +172,9 @@ Optimizations
|
|||
count. Also includes change to move to the next non-zero term value when selecting a segment
|
||||
position. (Keith Laban, Steve Bower, Dennis Gove)
|
||||
|
||||
* SOLR-8532: Optimize GraphQuery when maxDepth is set by not collecting edges at the maxDepth level.
|
||||
(Kevin Watters via yonik)
|
||||
|
||||
Other Changes
|
||||
----------------------
|
||||
|
||||
|
|
|
@ -135,8 +135,8 @@ public class GraphQuery extends Query {
|
|||
SolrIndexSearcher fromSearcher;
|
||||
private float queryNorm = 1.0F;
|
||||
private float queryWeight = 1.0F;
|
||||
int frontierSize = 0;
|
||||
public int currentDepth = 0;
|
||||
private int frontierSize = 0;
|
||||
private int currentDepth = -1;
|
||||
private Filter filter;
|
||||
private DocSet resultSet;
|
||||
|
||||
|
@ -177,69 +177,82 @@ public class GraphQuery extends Query {
|
|||
* @throws IOException - if a sub search fails... maybe other cases too! :)
|
||||
*/
|
||||
private DocSet getDocSet() throws IOException {
|
||||
DocSet fromSet = null;
|
||||
FixedBitSet seedResultBits = null;
|
||||
// Size that the bit set needs to be.
|
||||
int capacity = fromSearcher.getRawReader().maxDoc();
|
||||
// The bit set to contain the results that match the query.
|
||||
FixedBitSet resultBits = new FixedBitSet(capacity);
|
||||
// The measure of how deep in the graph we have gone.
|
||||
currentDepth = 0;
|
||||
// this holds the result at each level
|
||||
BitDocSet fromSet = null;
|
||||
// the root docs if we return root is false
|
||||
FixedBitSet rootBits = null;
|
||||
// the initial query for the frontier for the first query
|
||||
Query frontierQuery = q;
|
||||
// Find all documents in this graph that are leaf nodes to speed traversal
|
||||
// TODO: speed this up in the future with HAS_FIELD type queries
|
||||
BooleanQuery.Builder leafNodeQuery = new BooleanQuery.Builder();
|
||||
WildcardQuery edgeQuery = new WildcardQuery(new Term(toField, "*"));
|
||||
leafNodeQuery.add(edgeQuery, Occur.MUST_NOT);
|
||||
DocSet leafNodes = fromSearcher.getDocSet(leafNodeQuery.build());
|
||||
DocSet leafNodes = resolveLeafNodes(toField);
|
||||
// Start the breadth first graph traversal.
|
||||
|
||||
do {
|
||||
// Create the graph result collector for this level
|
||||
GraphTermsCollector graphResultCollector = new GraphTermsCollector(toField,capacity, resultBits, leafNodes);
|
||||
// traverse the level!
|
||||
fromSearcher.search(frontierQuery, graphResultCollector);
|
||||
// All edge ids on the frontier.
|
||||
BytesRefHash collectorTerms = graphResultCollector.getCollectorTerms();
|
||||
frontierSize = collectorTerms.size();
|
||||
// The resulting doc set from the frontier.
|
||||
fromSet = graphResultCollector.getDocSet();
|
||||
if (seedResultBits == null) {
|
||||
// grab a copy of the seed bits (these are the "rootNodes")
|
||||
seedResultBits = ((BitDocSet)fromSet).getBits().clone();
|
||||
}
|
||||
Integer fs = new Integer(frontierSize);
|
||||
FrontierQuery fq = buildFrontierQuery(collectorTerms, fs);
|
||||
if (fq == null) {
|
||||
// in case we get null back, make sure we know we're done at this level.
|
||||
fq = new FrontierQuery(null, 0);
|
||||
}
|
||||
frontierQuery = fq.getQuery();
|
||||
frontierSize = fq.getFrontierSize();
|
||||
// Add the bits from this level to the result set.
|
||||
resultBits.or(((BitDocSet)fromSet).getBits());
|
||||
// Increment how far we have gone in the frontier.
|
||||
currentDepth++;
|
||||
// Break out if we have reached our max depth
|
||||
if (currentDepth >= maxDepth && maxDepth != -1) {
|
||||
// if we are at the max level we don't need the graph terms collector.
|
||||
// TODO validate that the join case works properly.
|
||||
if (maxDepth != -1 && currentDepth >= maxDepth) {
|
||||
// if we've reached the max depth, don't worry about collecting edges.
|
||||
fromSet = fromSearcher.getDocSetBits(frontierQuery);
|
||||
// explicitly the frontier size is zero now so we can break
|
||||
frontierSize = 0;
|
||||
} else {
|
||||
// when we're not at the max depth level, we need to collect edges
|
||||
// Create the graph result collector for this level
|
||||
GraphTermsCollector graphResultCollector = new GraphTermsCollector(toField,capacity, resultBits, leafNodes);
|
||||
fromSearcher.search(frontierQuery, graphResultCollector);
|
||||
fromSet = graphResultCollector.getDocSet();
|
||||
// All edge ids on the frontier.
|
||||
BytesRefHash collectorTerms = graphResultCollector.getCollectorTerms();
|
||||
frontierSize = collectorTerms.size();
|
||||
// The resulting doc set from the frontier.
|
||||
FrontierQuery fq = buildFrontierQuery(collectorTerms, frontierSize);
|
||||
if (fq == null) {
|
||||
// in case we get null back, make sure we know we're done at this level.
|
||||
frontierSize = 0;
|
||||
} else {
|
||||
frontierQuery = fq.getQuery();
|
||||
frontierSize = fq.getFrontierSize();
|
||||
}
|
||||
}
|
||||
if (currentDepth == 0 && !returnRoot) {
|
||||
// grab a copy of the root bits but only if we need it.
|
||||
rootBits = fromSet.getBits();
|
||||
}
|
||||
// Add the bits from this level to the result set.
|
||||
resultBits.or(fromSet.getBits());
|
||||
// test if we discovered any new edges, if not , we're done.
|
||||
if ((maxDepth != -1 && currentDepth >= maxDepth)) {
|
||||
break;
|
||||
}
|
||||
// test if we discovered any new edges, if not , we're done.
|
||||
} while (frontierSize > 0);
|
||||
// helper bit set operations on the final result set
|
||||
if (!returnRoot) {
|
||||
resultBits.andNot(seedResultBits);
|
||||
resultBits.andNot(rootBits);
|
||||
}
|
||||
// this is the final resulting filter.
|
||||
BitDocSet resultSet = new BitDocSet(resultBits);
|
||||
// If we only want to return leaf nodes do that here.
|
||||
if (onlyLeafNodes) {
|
||||
return resultSet.intersection(leafNodes);
|
||||
} else {
|
||||
// create a doc set off the bits that we found.
|
||||
return resultSet;
|
||||
}
|
||||
}
|
||||
|
||||
private DocSet resolveLeafNodes(String field) throws IOException {
|
||||
BooleanQuery.Builder leafNodeQuery = new BooleanQuery.Builder();
|
||||
WildcardQuery edgeQuery = new WildcardQuery(new Term(field, "*"));
|
||||
leafNodeQuery.add(edgeQuery, Occur.MUST_NOT);
|
||||
DocSet leafNodes = fromSearcher.getDocSet(leafNodeQuery.build());
|
||||
return leafNodes;
|
||||
}
|
||||
|
||||
/** Build an automaton to represent the frontier query */
|
||||
private Automaton buildAutomaton(BytesRefHash termBytesHash) {
|
||||
// need top pass a sorted set of terms to the autn builder (maybe a better way to avoid this?)
|
||||
|
|
|
@ -108,7 +108,7 @@ class GraphTermsCollector extends SimpleCollector implements Collector {
|
|||
numHits++;
|
||||
}
|
||||
|
||||
public DocSet getDocSet() {
|
||||
public BitDocSet getDocSet() {
|
||||
if (bits == null) {
|
||||
// TODO: this shouldn't happen
|
||||
bits = new FixedBitSet(maxDoc);
|
||||
|
|
|
@ -77,6 +77,29 @@ public class GraphQueryTest extends SolrTestCaseJ4 {
|
|||
qr = createRequest(g4Query);
|
||||
assertQ(qr,"//*[@numFound='2']");
|
||||
|
||||
String g5Query = "{!graph from=\"node_id\" to=\"edge_id\" returnRoot=\"true\" returnOnlyLeaf=\"false\" maxDepth=0}id:doc_8";
|
||||
qr = createRequest(g5Query);
|
||||
assertQ(qr,"//*[@numFound='1']");
|
||||
|
||||
String g6Query = "{!graph from=\"node_id\" to=\"edge_id\" returnRoot=\"true\" returnOnlyLeaf=\"false\" maxDepth=1}id:doc_8";
|
||||
qr = createRequest(g6Query);
|
||||
assertQ(qr,"//*[@numFound='3']");
|
||||
|
||||
String g7Query = "{!graph from=\"node_id\" to=\"edge_id\" returnRoot=\"false\" returnOnlyLeaf=\"false\" maxDepth=1}id:doc_8";
|
||||
qr = createRequest(g7Query);
|
||||
assertQ(qr,"//*[@numFound='2']");
|
||||
|
||||
String g8Query = "{!graph from=\"node_id\" to=\"edge_id\" returnRoot=\"false\" returnOnlyLeaf=\"true\" maxDepth=2}id:doc_8";
|
||||
qr = createRequest(g8Query);
|
||||
assertQ(qr,"//*[@numFound='1']");
|
||||
|
||||
String g9Query = "{!graph from=\"node_id\" to=\"edge_id\" maxDepth=1}id:doc_1";
|
||||
qr = createRequest(g9Query);
|
||||
assertQ(qr,"//*[@numFound='2']");
|
||||
|
||||
String g10Query = "{!graph from=\"node_id\" to=\"edge_id\" returnRoot=false maxDepth=1}id:doc_1";
|
||||
qr = createRequest(g10Query);
|
||||
assertQ(qr,"//*[@numFound='1']");
|
||||
}
|
||||
|
||||
private SolrQueryRequest createRequest(String query) {
|
||||
|
|
Loading…
Reference in New Issue