mirror of https://github.com/apache/lucene.git
SOLR-8532: GraphQuery don't collect edges at maxDepth level
This commit is contained in:
parent
c403083872
commit
e6db8ba214
|
@ -172,6 +172,9 @@ Optimizations
|
||||||
count. Also includes change to move to the next non-zero term value when selecting a segment
|
count. Also includes change to move to the next non-zero term value when selecting a segment
|
||||||
position. (Keith Laban, Steve Bower, Dennis Gove)
|
position. (Keith Laban, Steve Bower, Dennis Gove)
|
||||||
|
|
||||||
|
* SOLR-8532: Optimize GraphQuery when maxDepth is set by not collecting edges at the maxDepth level.
|
||||||
|
(Kevin Watters via yonik)
|
||||||
|
|
||||||
Other Changes
|
Other Changes
|
||||||
----------------------
|
----------------------
|
||||||
|
|
||||||
|
|
|
@ -135,8 +135,8 @@ public class GraphQuery extends Query {
|
||||||
SolrIndexSearcher fromSearcher;
|
SolrIndexSearcher fromSearcher;
|
||||||
private float queryNorm = 1.0F;
|
private float queryNorm = 1.0F;
|
||||||
private float queryWeight = 1.0F;
|
private float queryWeight = 1.0F;
|
||||||
int frontierSize = 0;
|
private int frontierSize = 0;
|
||||||
public int currentDepth = 0;
|
private int currentDepth = -1;
|
||||||
private Filter filter;
|
private Filter filter;
|
||||||
private DocSet resultSet;
|
private DocSet resultSet;
|
||||||
|
|
||||||
|
@ -177,69 +177,82 @@ public class GraphQuery extends Query {
|
||||||
* @throws IOException - if a sub search fails... maybe other cases too! :)
|
* @throws IOException - if a sub search fails... maybe other cases too! :)
|
||||||
*/
|
*/
|
||||||
private DocSet getDocSet() throws IOException {
|
private DocSet getDocSet() throws IOException {
|
||||||
DocSet fromSet = null;
|
|
||||||
FixedBitSet seedResultBits = null;
|
|
||||||
// Size that the bit set needs to be.
|
// Size that the bit set needs to be.
|
||||||
int capacity = fromSearcher.getRawReader().maxDoc();
|
int capacity = fromSearcher.getRawReader().maxDoc();
|
||||||
// The bit set to contain the results that match the query.
|
// The bit set to contain the results that match the query.
|
||||||
FixedBitSet resultBits = new FixedBitSet(capacity);
|
FixedBitSet resultBits = new FixedBitSet(capacity);
|
||||||
// The measure of how deep in the graph we have gone.
|
// this holds the result at each level
|
||||||
currentDepth = 0;
|
BitDocSet fromSet = null;
|
||||||
|
// the root docs if we return root is false
|
||||||
|
FixedBitSet rootBits = null;
|
||||||
// the initial query for the frontier for the first query
|
// the initial query for the frontier for the first query
|
||||||
Query frontierQuery = q;
|
Query frontierQuery = q;
|
||||||
// Find all documents in this graph that are leaf nodes to speed traversal
|
// Find all documents in this graph that are leaf nodes to speed traversal
|
||||||
// TODO: speed this up in the future with HAS_FIELD type queries
|
DocSet leafNodes = resolveLeafNodes(toField);
|
||||||
BooleanQuery.Builder leafNodeQuery = new BooleanQuery.Builder();
|
|
||||||
WildcardQuery edgeQuery = new WildcardQuery(new Term(toField, "*"));
|
|
||||||
leafNodeQuery.add(edgeQuery, Occur.MUST_NOT);
|
|
||||||
DocSet leafNodes = fromSearcher.getDocSet(leafNodeQuery.build());
|
|
||||||
// Start the breadth first graph traversal.
|
// Start the breadth first graph traversal.
|
||||||
|
|
||||||
do {
|
do {
|
||||||
// Create the graph result collector for this level
|
|
||||||
GraphTermsCollector graphResultCollector = new GraphTermsCollector(toField,capacity, resultBits, leafNodes);
|
|
||||||
// traverse the level!
|
|
||||||
fromSearcher.search(frontierQuery, graphResultCollector);
|
|
||||||
// All edge ids on the frontier.
|
|
||||||
BytesRefHash collectorTerms = graphResultCollector.getCollectorTerms();
|
|
||||||
frontierSize = collectorTerms.size();
|
|
||||||
// The resulting doc set from the frontier.
|
|
||||||
fromSet = graphResultCollector.getDocSet();
|
|
||||||
if (seedResultBits == null) {
|
|
||||||
// grab a copy of the seed bits (these are the "rootNodes")
|
|
||||||
seedResultBits = ((BitDocSet)fromSet).getBits().clone();
|
|
||||||
}
|
|
||||||
Integer fs = new Integer(frontierSize);
|
|
||||||
FrontierQuery fq = buildFrontierQuery(collectorTerms, fs);
|
|
||||||
if (fq == null) {
|
|
||||||
// in case we get null back, make sure we know we're done at this level.
|
|
||||||
fq = new FrontierQuery(null, 0);
|
|
||||||
}
|
|
||||||
frontierQuery = fq.getQuery();
|
|
||||||
frontierSize = fq.getFrontierSize();
|
|
||||||
// Add the bits from this level to the result set.
|
|
||||||
resultBits.or(((BitDocSet)fromSet).getBits());
|
|
||||||
// Increment how far we have gone in the frontier.
|
// Increment how far we have gone in the frontier.
|
||||||
currentDepth++;
|
currentDepth++;
|
||||||
// Break out if we have reached our max depth
|
// if we are at the max level we don't need the graph terms collector.
|
||||||
if (currentDepth >= maxDepth && maxDepth != -1) {
|
// TODO validate that the join case works properly.
|
||||||
|
if (maxDepth != -1 && currentDepth >= maxDepth) {
|
||||||
|
// if we've reached the max depth, don't worry about collecting edges.
|
||||||
|
fromSet = fromSearcher.getDocSetBits(frontierQuery);
|
||||||
|
// explicitly the frontier size is zero now so we can break
|
||||||
|
frontierSize = 0;
|
||||||
|
} else {
|
||||||
|
// when we're not at the max depth level, we need to collect edges
|
||||||
|
// Create the graph result collector for this level
|
||||||
|
GraphTermsCollector graphResultCollector = new GraphTermsCollector(toField,capacity, resultBits, leafNodes);
|
||||||
|
fromSearcher.search(frontierQuery, graphResultCollector);
|
||||||
|
fromSet = graphResultCollector.getDocSet();
|
||||||
|
// All edge ids on the frontier.
|
||||||
|
BytesRefHash collectorTerms = graphResultCollector.getCollectorTerms();
|
||||||
|
frontierSize = collectorTerms.size();
|
||||||
|
// The resulting doc set from the frontier.
|
||||||
|
FrontierQuery fq = buildFrontierQuery(collectorTerms, frontierSize);
|
||||||
|
if (fq == null) {
|
||||||
|
// in case we get null back, make sure we know we're done at this level.
|
||||||
|
frontierSize = 0;
|
||||||
|
} else {
|
||||||
|
frontierQuery = fq.getQuery();
|
||||||
|
frontierSize = fq.getFrontierSize();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (currentDepth == 0 && !returnRoot) {
|
||||||
|
// grab a copy of the root bits but only if we need it.
|
||||||
|
rootBits = fromSet.getBits();
|
||||||
|
}
|
||||||
|
// Add the bits from this level to the result set.
|
||||||
|
resultBits.or(fromSet.getBits());
|
||||||
|
// test if we discovered any new edges, if not , we're done.
|
||||||
|
if ((maxDepth != -1 && currentDepth >= maxDepth)) {
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
// test if we discovered any new edges, if not , we're done.
|
|
||||||
} while (frontierSize > 0);
|
} while (frontierSize > 0);
|
||||||
// helper bit set operations on the final result set
|
// helper bit set operations on the final result set
|
||||||
if (!returnRoot) {
|
if (!returnRoot) {
|
||||||
resultBits.andNot(seedResultBits);
|
resultBits.andNot(rootBits);
|
||||||
}
|
}
|
||||||
|
// this is the final resulting filter.
|
||||||
BitDocSet resultSet = new BitDocSet(resultBits);
|
BitDocSet resultSet = new BitDocSet(resultBits);
|
||||||
// If we only want to return leaf nodes do that here.
|
// If we only want to return leaf nodes do that here.
|
||||||
if (onlyLeafNodes) {
|
if (onlyLeafNodes) {
|
||||||
return resultSet.intersection(leafNodes);
|
return resultSet.intersection(leafNodes);
|
||||||
} else {
|
} else {
|
||||||
// create a doc set off the bits that we found.
|
|
||||||
return resultSet;
|
return resultSet;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private DocSet resolveLeafNodes(String field) throws IOException {
|
||||||
|
BooleanQuery.Builder leafNodeQuery = new BooleanQuery.Builder();
|
||||||
|
WildcardQuery edgeQuery = new WildcardQuery(new Term(field, "*"));
|
||||||
|
leafNodeQuery.add(edgeQuery, Occur.MUST_NOT);
|
||||||
|
DocSet leafNodes = fromSearcher.getDocSet(leafNodeQuery.build());
|
||||||
|
return leafNodes;
|
||||||
|
}
|
||||||
|
|
||||||
/** Build an automaton to represent the frontier query */
|
/** Build an automaton to represent the frontier query */
|
||||||
private Automaton buildAutomaton(BytesRefHash termBytesHash) {
|
private Automaton buildAutomaton(BytesRefHash termBytesHash) {
|
||||||
// need top pass a sorted set of terms to the autn builder (maybe a better way to avoid this?)
|
// need top pass a sorted set of terms to the autn builder (maybe a better way to avoid this?)
|
||||||
|
|
|
@ -108,7 +108,7 @@ class GraphTermsCollector extends SimpleCollector implements Collector {
|
||||||
numHits++;
|
numHits++;
|
||||||
}
|
}
|
||||||
|
|
||||||
public DocSet getDocSet() {
|
public BitDocSet getDocSet() {
|
||||||
if (bits == null) {
|
if (bits == null) {
|
||||||
// TODO: this shouldn't happen
|
// TODO: this shouldn't happen
|
||||||
bits = new FixedBitSet(maxDoc);
|
bits = new FixedBitSet(maxDoc);
|
||||||
|
|
|
@ -77,6 +77,29 @@ public class GraphQueryTest extends SolrTestCaseJ4 {
|
||||||
qr = createRequest(g4Query);
|
qr = createRequest(g4Query);
|
||||||
assertQ(qr,"//*[@numFound='2']");
|
assertQ(qr,"//*[@numFound='2']");
|
||||||
|
|
||||||
|
String g5Query = "{!graph from=\"node_id\" to=\"edge_id\" returnRoot=\"true\" returnOnlyLeaf=\"false\" maxDepth=0}id:doc_8";
|
||||||
|
qr = createRequest(g5Query);
|
||||||
|
assertQ(qr,"//*[@numFound='1']");
|
||||||
|
|
||||||
|
String g6Query = "{!graph from=\"node_id\" to=\"edge_id\" returnRoot=\"true\" returnOnlyLeaf=\"false\" maxDepth=1}id:doc_8";
|
||||||
|
qr = createRequest(g6Query);
|
||||||
|
assertQ(qr,"//*[@numFound='3']");
|
||||||
|
|
||||||
|
String g7Query = "{!graph from=\"node_id\" to=\"edge_id\" returnRoot=\"false\" returnOnlyLeaf=\"false\" maxDepth=1}id:doc_8";
|
||||||
|
qr = createRequest(g7Query);
|
||||||
|
assertQ(qr,"//*[@numFound='2']");
|
||||||
|
|
||||||
|
String g8Query = "{!graph from=\"node_id\" to=\"edge_id\" returnRoot=\"false\" returnOnlyLeaf=\"true\" maxDepth=2}id:doc_8";
|
||||||
|
qr = createRequest(g8Query);
|
||||||
|
assertQ(qr,"//*[@numFound='1']");
|
||||||
|
|
||||||
|
String g9Query = "{!graph from=\"node_id\" to=\"edge_id\" maxDepth=1}id:doc_1";
|
||||||
|
qr = createRequest(g9Query);
|
||||||
|
assertQ(qr,"//*[@numFound='2']");
|
||||||
|
|
||||||
|
String g10Query = "{!graph from=\"node_id\" to=\"edge_id\" returnRoot=false maxDepth=1}id:doc_1";
|
||||||
|
qr = createRequest(g10Query);
|
||||||
|
assertQ(qr,"//*[@numFound='1']");
|
||||||
}
|
}
|
||||||
|
|
||||||
private SolrQueryRequest createRequest(String query) {
|
private SolrQueryRequest createRequest(String query) {
|
||||||
|
|
Loading…
Reference in New Issue