SOLR-7543: basic graph traversal query

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1707818 13f79535-47bb-0310-9956-ffa450edef68
2015-10-09 21:27:03 +00:00 · 2015-10-09 21:27:03 +00:00 · 0a4b0833a2
parent 99c2515d99
commit 0a4b0833a2
11 changed files with 1022 additions and 1 deletions
--- a/lucene/core/src/java/org/apache/lucene/util/automaton/DaciukMihovAutomatonBuilder.java
+++ b/lucene/core/src/java/org/apache/lucene/util/automaton/DaciukMihovAutomatonBuilder.java
@ -33,7 +33,15 @@ import org.apache.lucene.util.UnicodeUtil;
 * @see #build(Collection)
 * @see Automata#makeStringUnion(Collection)
 */
-final class DaciukMihovAutomatonBuilder {
+public final class DaciukMihovAutomatonBuilder {
+  
+  /**
+   * The default constructor is private.  Use static methods directly.
+   */
+  private DaciukMihovAutomatonBuilder() {
+    super();
+  }
+
  /**
   * DFSA state with <code>char</code> labels on transitions.
   */
--- a/solr/CHANGES.txt
+++ b/solr/CHANGES.txt
@ -71,6 +71,11 @@ New Features

 * SOLR-8038: Add the StatsStream to the Streaming API and wire it into the SQLHandler (Joel Bernstein)

+* SOLR-7543: Basic graph traversal query
+  Example: {!graph from="node_id" to="edge_id"}id:doc_1
+  (Kevin Watters, yonik)
+
+
 Optimizations
 ----------------------
 * SOLR-7876: Speed up queries and operations that use many terms when timeAllowed has not been
--- a/solr/core/src/java/org/apache/solr/search/QParserPlugin.java
+++ b/solr/core/src/java/org/apache/solr/search/QParserPlugin.java
@ -22,6 +22,7 @@ import org.apache.solr.core.SolrInfoMBean;
 import org.apache.solr.request.SolrQueryRequest;
 import org.apache.solr.search.join.BlockJoinChildQParserPlugin;
 import org.apache.solr.search.join.BlockJoinParentQParserPlugin;
+import org.apache.solr.search.join.GraphQParserPlugin;
 import org.apache.solr.search.mlt.MLTQParserPlugin;
 import org.apache.solr.util.plugin.NamedListInitializedPlugin;

@ -73,6 +74,7 @@ public abstract class QParserPlugin implements NamedListInitializedPlugin, SolrI
    map.put(ExportQParserPlugin.NAME, ExportQParserPlugin.class);
    map.put(MLTQParserPlugin.NAME, MLTQParserPlugin.class);
    map.put(HashQParserPlugin.NAME, HashQParserPlugin.class);
+    map.put(GraphQParserPlugin.NAME, GraphQParserPlugin.class);
    standardPlugins = Collections.unmodifiableMap(map);
  }

--- a/solr/core/src/java/org/apache/solr/search/join/FrontierQuery.java
+++ b/solr/core/src/java/org/apache/solr/search/join/FrontierQuery.java
@ -0,0 +1,49 @@
+package org.apache.solr.search.join;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import org.apache.lucene.search.Query;
+/**
+ * Frontier Query represents the next hop of a GraphTraversal.
+ * It contains the query to execute and the number of edges to traverse.
+ * @lucene.internal
+ */
+class FrontierQuery {
+  
+  private final Query query;
+  private final Integer frontierSize;
+  
+  public FrontierQuery(Query query, Integer frontierSize) {
+    super();
+    this.query = query;
+    this.frontierSize = frontierSize;
+  }
+  /**
+   * Return the query that represents the frontier at the current level.
+   */
+  public Query getQuery() {
+    return query;
+  }
+  /**
+   * Return the number of edges in the frontier query.
+   */
+  public Integer getFrontierSize() {
+    return frontierSize;
+  }
+  
+}
--- a/solr/core/src/java/org/apache/solr/search/join/GraphQParserPlugin.java
+++ b/solr/core/src/java/org/apache/solr/search/join/GraphQParserPlugin.java
@ -0,0 +1,44 @@
+package org.apache.solr.search.join;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import org.apache.solr.common.params.SolrParams;
+import org.apache.solr.common.util.NamedList;
+import org.apache.solr.request.SolrQueryRequest;
+import org.apache.solr.search.QParser;
+import org.apache.solr.search.QParserPlugin;
+
+/**
+ * Query parser plugin for solr to wrap the graph query parser.
+ */
+public class GraphQParserPlugin extends QParserPlugin {
+  
+  // Graph Query Parser parser name
+  public static final String NAME = "graph";
+  
+  @Override
+  public void init(NamedList args) {
+  }
+  
+  @Override
+  public QParser createParser(String qstr, SolrParams localParams, SolrParams params, SolrQueryRequest req) {
+    // return the graph query parser for this request.
+    return new GraphQueryParser(qstr, localParams, params, req);
+  }
+  
+}
--- a/solr/core/src/java/org/apache/solr/search/join/GraphQuery.java
+++ b/solr/core/src/java/org/apache/solr/search/join/GraphQuery.java
@ -0,0 +1,506 @@
+package org.apache.solr.search.join;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.List;
+import java.util.Set;
+import java.util.TreeSet;
+
+import org.apache.lucene.index.LeafReaderContext;
+import org.apache.lucene.index.Term;
+import org.apache.lucene.queries.TermsQuery;
+import org.apache.lucene.search.AutomatonQuery;
+import org.apache.lucene.search.BooleanClause;
+import org.apache.lucene.search.BooleanClause.Occur;
+import org.apache.lucene.search.BooleanQuery;
+import org.apache.lucene.search.DocIdSet;
+import org.apache.lucene.search.DocIdSetIterator;
+import org.apache.lucene.search.Explanation;
+import org.apache.lucene.search.Filter;
+import org.apache.lucene.search.IndexSearcher;
+import org.apache.lucene.search.Query;
+import org.apache.lucene.search.Scorer;
+import org.apache.lucene.search.TermQuery;
+import org.apache.lucene.search.Weight;
+import org.apache.lucene.search.WildcardQuery;
+import org.apache.lucene.util.BytesRef;
+import org.apache.lucene.util.BytesRefHash;
+import org.apache.lucene.util.FixedBitSet;
+import org.apache.lucene.util.automaton.Automaton;
+import org.apache.lucene.util.automaton.DaciukMihovAutomatonBuilder;
+import org.apache.solr.handler.component.ResponseBuilder;
+import org.apache.solr.search.BitDocSet;
+import org.apache.solr.search.DocSet;
+import org.apache.solr.search.SolrIndexSearcher;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+/**
+ * GraphQuery - search for nodes and traverse edges in an index.
+ * 
+ * Params:
+ * fromField = the field that contains the node id
+ * toField = the field that contains the edge ids
+ * traversalFilter = a query that can be applied for each hop in the graph.
+ * maxDepth = the max depth to traverse.  (start nodes is depth=1)
+ * onlyLeafNodes = only return documents that have no edge id values.
+ * returnRoot = if false, the documents matching the initial query will not be returned.
+ *
+ * @lucene.experimental
+ */
+public class GraphQuery extends Query {
+  
+  /** The inital node matching query */
+  private Query q;
+  /** the field with the node id */
+  private String fromField;
+  /** the field containing the edge ids */
+  private String toField;
+  /** A query to apply while traversing the graph to filter out edges */
+  private Query traversalFilter;
+  /** The max depth to traverse the graph, -1 means no limit. */
+  private int maxDepth = -1;
+
+  /** Use automaton compilation for graph query traversal (experimental + expert use only) */
+  private boolean useAutn = true;
+  
+  /** If this is true, the graph traversal result will only return documents that 
+   * do not have a value in the edge field. (Only leaf nodes returned from the graph) */
+  private boolean onlyLeafNodes = false;
+  
+  /** False if documents matching the start query for the graph will be excluded from the final result set.  */
+  private boolean returnRoot = true;
+  
+  /**
+   * Create a graph query 
+   * q - the starting node query
+   * fromField - the field containing the node id
+   * toField - the field containing the edge ids
+   */
+  public GraphQuery(Query q, String fromField, String toField) {
+    this(q, fromField, toField, null);
+  }
+  
+  /**
+   * Create a graph query with a traversal filter applied while traversing the frontier.
+   * q - the starting node query
+   * fromField - the field containing the node id
+   * toField - the field containing the edge ids
+   * traversalFilter - the filter to be applied on each iteration of the frontier.
+   */
+  public GraphQuery(Query q, String fromField, String toField, Query traversalFilter) {
+    this.q = q;
+    this.fromField = fromField;
+    this.toField = toField;
+    this.traversalFilter = traversalFilter;
+  }
+  
+  @Override
+  public Weight createWeight(IndexSearcher searcher, boolean needsScores) throws IOException {
+    Weight graphWeight = new GraphQueryWeight((SolrIndexSearcher)searcher);
+    return graphWeight;
+  }
+  
+  @Override
+  public String toString(String field) {
+    StringBuilder sb = new StringBuilder();
+    sb.append("[[" + q.toString() + "]," + fromField + "=" + toField + "]");
+    if (traversalFilter != null) {
+      sb.append(" [TraversalFilter: " + traversalFilter.toString() + "]");
+    }
+    sb.append("[maxDepth=" + maxDepth + "]");
+    sb.append("[returnRoot=" + returnRoot + "]");
+    sb.append("[onlyLeafNodes=" + onlyLeafNodes + "]");
+    sb.append("[useAutn=" + useAutn + "]");
+    return sb.toString();
+  }
+  
+  protected class GraphQueryWeight extends Weight {
+    
+    SolrIndexSearcher fromSearcher;
+    private float queryNorm = 1.0F;
+    private float queryWeight = 1.0F; 
+    int frontierSize = 0;
+    public int currentDepth = 0;
+    private Filter filter;
+    private DocSet resultSet;
+    
+    public GraphQueryWeight(SolrIndexSearcher searcher) {
+      // Grab the searcher so we can run additional searches.
+      super(null);
+      this.fromSearcher = searcher;
+    }
+    
+    @Override
+    public Explanation explain(LeafReaderContext context, int doc) throws IOException {
+      // currently no ranking for graph queries. 
+      final Scorer cs = scorer(context);
+      final boolean exists = (cs != null && cs.advance(doc) == doc);
+      if (exists) {
+        List<Explanation> subs = new ArrayList<Explanation>();
+        return Explanation.match(1.0F, "Graph Match", subs);
+      } else {
+        List<Explanation> subs = new ArrayList<Explanation>();
+        return Explanation.noMatch("No Graph Match.", subs);
+      }
+    }
+    
+    @Override
+    public float getValueForNormalization() throws IOException {
+      return 1F;
+    }
+    
+    @Override
+    public void normalize(float norm, float topLevelBoost) {
+      this.queryWeight = norm * topLevelBoost;
+    }
+    
+    /**
+     * This computes the matching doc set for a given graph query
+     * 
+     * @return DocSet representing the documents in the graph.
+     * @throws IOException - if a sub search fails... maybe other cases too! :)
+     */
+    private DocSet getDocSet() throws IOException {
+      DocSet fromSet = null;
+      FixedBitSet seedResultBits = null;
+      // Size that the bit set needs to be.
+      int capacity = fromSearcher.getRawReader().maxDoc();
+      // The bit set to contain the results that match the query.
+      FixedBitSet resultBits = new FixedBitSet(capacity);
+      // The measure of how deep in the graph we have gone.
+      currentDepth = 0;
+      // the initial query for the frontier for the first query
+      Query frontierQuery = q;
+      // Find all documents in this graph that are leaf nodes to speed traversal
+      // TODO: speed this up in the future with HAS_FIELD type queries
+      BooleanQuery.Builder leafNodeQuery = new BooleanQuery.Builder();
+      WildcardQuery edgeQuery = new WildcardQuery(new Term(toField, "*"));
+      leafNodeQuery.add(edgeQuery, Occur.MUST_NOT);
+      DocSet leafNodes = fromSearcher.getDocSet(leafNodeQuery.build());
+      // Start the breadth first graph traversal.
+      do {
+        // Create the graph result collector for this level
+        GraphTermsCollector graphResultCollector = new GraphTermsCollector(toField,capacity, resultBits, leafNodes);
+        // traverse the level!
+        fromSearcher.search(frontierQuery, graphResultCollector);
+        // All edge ids on the frontier.
+        BytesRefHash collectorTerms = graphResultCollector.getCollectorTerms();
+        frontierSize = collectorTerms.size();
+        // The resulting doc set from the frontier.
+        fromSet = graphResultCollector.getDocSet();
+        if (seedResultBits == null) {
+          // grab a copy of the seed bits  (these are the "rootNodes")
+          seedResultBits = ((BitDocSet)fromSet).getBits().clone();
+        }
+        Integer fs = new Integer(frontierSize);
+        FrontierQuery fq = buildFrontierQuery(collectorTerms, fs);
+        if (fq == null) {
+          // in case we get null back, make sure we know we're done at this level.
+          fq = new FrontierQuery(null, 0);
+        }
+        frontierQuery = fq.getQuery();
+        frontierSize = fq.getFrontierSize();
+        // Add the bits from this level to the result set.
+        resultBits.or(((BitDocSet)fromSet).getBits());
+        // Increment how far we have gone in the frontier.
+        currentDepth++;
+        // Break out if we have reached our max depth
+        if (currentDepth >= maxDepth && maxDepth != -1) {
+          break;
+        }
+        // test if we discovered any new edges, if not , we're done.
+      } while (frontierSize > 0);
+      // helper bit set operations on the final result set
+      if (!returnRoot) {
+        resultBits.andNot(seedResultBits);
+      }
+      BitDocSet resultSet = new BitDocSet(resultBits);
+      // If we only want to return leaf nodes do that here.
+      if (onlyLeafNodes) {
+        return resultSet.intersection(leafNodes);
+      } else {
+        // create a doc set off the bits that we found.
+        return resultSet;
+      }
+    }
+    
+    /** Build an automaton to represent the frontier query */
+    private Automaton buildAutomaton(BytesRefHash termBytesHash) {
+      // need top pass a sorted set of terms to the autn builder (maybe a better way to avoid this?)
+      final TreeSet<BytesRef> terms = new TreeSet<BytesRef>();
+      for (int i = 0 ; i < termBytesHash.size(); i++) {
+        BytesRef ref = new BytesRef();
+        termBytesHash.get(i, ref);
+        terms.add(ref);
+      }
+      final Automaton a = DaciukMihovAutomatonBuilder.build(terms);
+      return a;    
+    }
+    
+    /**
+     * This return a query that represents the documents that match the next hop in the query.
+     * 
+     * collectorTerms - the terms that represent the edge ids for the current frontier.
+     * frontierSize - the size of the frontier query (number of unique edges)
+     *  
+     */
+    public FrontierQuery buildFrontierQuery(BytesRefHash collectorTerms, Integer frontierSize) {
+      if (collectorTerms == null || collectorTerms.size() == 0) {
+        // return null if there are no terms (edges) to traverse.
+        return null;
+      } else {
+        // Create a query
+        Query q = null;
+
+        // TODO: see if we should dynamically select this based on the frontier size.
+        if (useAutn) {
+          // build an automaton based query for the frontier.
+          Automaton autn = buildAutomaton(collectorTerms);
+          AutomatonQuery autnQuery = new AutomatonQuery(new Term(fromField), autn);
+          q = autnQuery;
+        } else {
+          List<BytesRef> termList = new ArrayList<>(collectorTerms.size());
+          for (int i = 0 ; i < collectorTerms.size(); i++) {
+            BytesRef ref = new BytesRef();
+            collectorTerms.get(i, ref);
+            termList.add(ref);
+          }
+          q = new TermsQuery(fromField, termList);
+        }
+        
+        // If there is a filter to be used while crawling the graph, add that.
+        if (traversalFilter != null) {
+          BooleanQuery.Builder builder = new BooleanQuery.Builder();
+          builder.add(q, Occur.MUST);
+          builder.add(traversalFilter, Occur.MUST);
+          q = builder.build();
+        } 
+        // return the new query. 
+        FrontierQuery frontier = new FrontierQuery(q, frontierSize);
+        return frontier;
+      }
+    }
+    
+    @Override
+    public Scorer scorer(LeafReaderContext context) throws IOException {
+      if (filter == null) {
+        resultSet = getDocSet();
+        filter = resultSet.getTopFilter();
+      }
+      DocIdSet readerSet = filter.getDocIdSet(context,context.reader().getLiveDocs());
+      // create a scrorer on the result set, if results from right query are empty, use empty iterator.
+      return new GraphScorer(this, readerSet == null ? DocIdSetIterator.empty() : readerSet.iterator(), 1);
+    }
+    
+    @Override
+    public void extractTerms(Set<Term> terms) {
+      // NoOp for now , not used.. / supported
+    }
+    
+  }
+  
+  private class GraphScorer extends Scorer {
+    
+    final DocIdSetIterator iter;
+    final float score;
+    // graph query scorer constructor with iterator
+    public GraphScorer(Weight w, DocIdSetIterator iter, float score) throws IOException {
+      super(w);
+      this.iter = iter==null ? DocIdSet.EMPTY.iterator() : iter;
+      this.score = score;
+    }
+    
+    @Override
+    public float score() throws IOException {
+      // no dynamic scoring now.  
+      return score;
+    }
+    
+    @Override
+    public int nextDoc() throws IOException {
+      return iter.nextDoc();
+    }
+    
+    @Override
+    public int docID() {
+      // current position of the doc iterator.
+      return iter.docID();
+    }
+    
+    @Override
+    public int advance(int target) throws IOException {
+      return iter.advance(target);
+    }
+    
+    @Override
+    public int freq() throws IOException {
+      return 1;
+    }
+    
+    @Override
+    public long cost() {
+      // TODO: potentially very expensive!  what's a good value for this?
+      return 0;
+    }
+  }
+  
+  /**
+   * @return The query to be used as a filter for each hop in the graph.
+   */
+  public Query getTraversalFilter() {
+    return traversalFilter;
+  }
+  
+  public void setTraversalFilter(Query traversalFilter) {
+    this.traversalFilter = traversalFilter;
+  }
+  
+  public Query getQ() {
+    return q;
+  }
+  
+  public void setQ(Query q) {
+    this.q = q;
+  }
+  
+  /**
+   * @return The field that contains the node id
+   */
+  public String getFromField() {
+    return fromField;
+  }
+  
+  public void setFromField(String fromField) {
+    this.fromField = fromField;
+  }
+  
+  /**
+   * @return the field that contains the edge id(s)
+   */
+  public String getToField() {
+    return toField;
+  }
+  
+  public void setToField(String toField) {
+    this.toField = toField;
+  }
+  
+  /**
+   * @return Max depth for traversal,  -1 for infinite!
+   */
+  public int getMaxDepth() {
+    return maxDepth;
+  }
+  
+  public void setMaxDepth(int maxDepth) {
+    this.maxDepth = maxDepth;
+  }
+  
+  /**
+   * @return If true , an automaton query will be compiled for each new frontier traversal
+   * this helps to avoid max boolean clause errors.
+   */
+  public boolean isUseAutn() {
+    return useAutn;
+  }
+  
+  public void setUseAutn(boolean useAutn) {
+    this.useAutn = useAutn;
+  }
+  
+  /**
+   * @return if true only documents that do not have a value in the edge id field will be returned.
+   */
+  public boolean isOnlyLeafNodes() {
+    return onlyLeafNodes;
+  }
+  
+  public void setOnlyLeafNodes(boolean onlyLeafNodes) {
+    this.onlyLeafNodes = onlyLeafNodes;
+  }
+  
+  /**
+   * @return if true the documents that matched the rootNodes query will be returned.  o/w they will be removed from the result set.
+   */
+  public boolean isReturnRoot() {
+    return returnRoot;
+  }
+  
+  public void setReturnRoot(boolean returnRoot) {
+    this.returnRoot = returnRoot;
+  }
+  
+  @Override
+  public int hashCode() {
+    final int prime = 31;
+    int result = super.hashCode();
+    result = prime * result + ((fromField == null) ? 0 : fromField.hashCode());
+    result = prime * result + maxDepth;
+    result = prime * result + (onlyLeafNodes ? 1231 : 1237);
+    result = prime * result + ((q == null) ? 0 : q.hashCode());
+    result = prime * result + (returnRoot ? 1231 : 1237);
+    result = prime * result + ((toField == null) ? 0 : toField.hashCode());
+    result = prime * result + ((traversalFilter == null) ? 0 : traversalFilter.hashCode());
+    result = prime * result + (useAutn ? 1231 : 1237);
+    return result;
+  }
+  
+  @Override
+  public boolean equals(Object obj) {
+    if (this == obj)
+      return true;
+    if (!super.equals(obj))
+      return false;
+    if (getClass() != obj.getClass())
+      return false;
+    GraphQuery other = (GraphQuery) obj;
+    if (fromField == null) {
+      if (other.fromField != null)
+        return false;
+    } else if (!fromField.equals(other.fromField))
+      return false;
+    if (maxDepth != other.maxDepth)
+      return false;
+    if (onlyLeafNodes != other.onlyLeafNodes)
+      return false;
+    if (q == null) {
+      if (other.q != null)
+        return false;
+    } else if (!q.equals(other.q))
+      return false;
+    if (returnRoot != other.returnRoot)
+      return false;
+    if (toField == null) {
+      if (other.toField != null)
+        return false;
+    } else if (!toField.equals(other.toField))
+      return false;
+    if (traversalFilter == null) {
+      if (other.traversalFilter != null)
+        return false;
+    } else if (!traversalFilter.equals(other.traversalFilter))
+      return false;
+    if (useAutn != other.useAutn)
+      return false;
+    return true;
+  }
+  
+}
--- a/solr/core/src/java/org/apache/solr/search/join/GraphQueryParser.java
+++ b/solr/core/src/java/org/apache/solr/search/join/GraphQueryParser.java
@ -0,0 +1,70 @@
+package org.apache.solr.search.join;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import org.apache.lucene.search.Query;
+import org.apache.solr.common.params.CommonParams;
+import org.apache.solr.common.params.SolrParams;
+import org.apache.solr.request.SolrQueryRequest;
+import org.apache.solr.search.QParser;
+import org.apache.solr.search.QueryParsing;
+import org.apache.solr.search.SyntaxError;
+
+/**
+ * Solr query parser that will handle parsing graph query requests.
+ */
+public class GraphQueryParser extends QParser {
+  
+  public GraphQueryParser(String qstr, SolrParams localParams, SolrParams params, SolrQueryRequest req) {
+    super(qstr, localParams, params, req);
+  }
+  
+  @Override
+  public Query parse() throws SyntaxError {
+    // grab query params and defaults
+    SolrParams localParams = getLocalParams();
+
+    Query rootNodeQuery = subQuery(localParams.get(QueryParsing.V), null).getQuery();
+    String traversalFilterS = localParams.get("traversalFilter");
+    Query traversalFilter = traversalFilterS == null ? null : subQuery(traversalFilterS, null).getQuery();
+
+    String fromField = localParams.get("from", "node_id");
+    String toField = localParams.get("to", "edge_ids");
+
+    // only documents that do not have values in the edge id fields.
+    boolean onlyLeafNodes = localParams.getBool("returnOnlyLeaf", false);
+    // choose if you want to return documents that match the initial query or not.
+    boolean returnRootNodes = localParams.getBool("returnRoot", true);
+    // enable or disable the use of an automaton term for the frontier traversal.
+    int maxDepth = localParams.getInt("maxDepth", -1);
+    // if true, an automaton will be compiled to issue the next graph hop
+    // this avoid having a large number of boolean clauses. (and it's faster too!)
+    boolean useAutn = localParams.getBool("useAutn", false);
+
+    // Construct a graph query object based on parameters passed in.
+    GraphQuery gq = new GraphQuery(rootNodeQuery, fromField, toField, traversalFilter);
+    // set additional parameters that are not in the constructor.
+    gq.setMaxDepth(maxDepth);
+    gq.setOnlyLeafNodes(onlyLeafNodes);
+    gq.setReturnRoot(returnRootNodes);
+    gq.setUseAutn(useAutn);
+    // return the parsed graph query.
+    return gq;
+  }
+  
+}
--- a/solr/core/src/java/org/apache/solr/search/join/GraphTermsCollector.java
+++ b/solr/core/src/java/org/apache/solr/search/join/GraphTermsCollector.java
@ -0,0 +1,136 @@
+package org.apache.solr.search.join;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.IOException;
+
+import org.apache.lucene.index.DocValues;
+import org.apache.lucene.index.LeafReaderContext;
+import org.apache.lucene.index.SortedSetDocValues;
+import org.apache.lucene.search.Collector;
+import org.apache.lucene.search.SimpleCollector;
+import org.apache.lucene.util.BitSet;
+import org.apache.lucene.util.Bits;
+import org.apache.lucene.util.BytesRef;
+import org.apache.lucene.util.BytesRefHash;
+import org.apache.lucene.util.FixedBitSet;
+import org.apache.solr.search.BitDocSet;
+import org.apache.solr.search.DocSet;
+
+/**
+ * A graph hit collector.  This accumulates the edges for a given graph traversal.
+ * On each collect method, the collector skips edge extraction for nodes that it has
+ * already traversed.
+ * @lucene.internal
+ */
+class GraphTermsCollector extends SimpleCollector implements Collector {
+  
+  // the field to collect edge ids from
+  private String field;
+  // all the collected terms
+  private BytesRefHash collectorTerms;
+  private SortedSetDocValues docTermOrds;
+  // the result set that is being collected.
+  private Bits currentResult;
+  // known leaf nodes
+  private DocSet leafNodes;
+  // number of hits discovered at this level.
+  int numHits=0;
+  BitSet bits;
+  final int maxDoc;
+  int base;
+  int baseInParent;
+  // if we care to track this.
+  boolean hasCycles = false;
+  
+  GraphTermsCollector(String field,int maxDoc, Bits currentResult, DocSet leafNodes) {
+    this.field = field;
+    this.maxDoc = maxDoc;
+    this.collectorTerms =  new BytesRefHash();
+    this.currentResult = currentResult;
+    this.leafNodes = leafNodes;
+    if (bits==null) {
+      // create a bitset at the start that will hold the graph traversal result set 
+      bits = new FixedBitSet(maxDoc);
+    }
+  }
+  
+  public void collect(int doc) throws IOException {    
+    doc += base;
+    if (currentResult.get(doc)) {
+      // cycle detected / already been here.
+      // knowing if your graph had a cycle might be useful and it's lightweight to implement here.
+      hasCycles = true;
+      return;
+    }
+    // collect the docs
+    addDocToResult(doc);
+    // Optimization to not look up edges for a document that is a leaf node
+    if (!leafNodes.exists(doc)) {
+      addEdgeIdsToResult(doc-base);
+    } 
+    // Note: tracking links in for each result would be a huge memory hog... so not implementing at this time.
+    
+  }
+  
+  private void addEdgeIdsToResult(int doc) throws IOException {
+    // set the doc to pull the edges ids for.
+    docTermOrds.setDocument(doc);
+    BytesRef edgeValue = new BytesRef();
+    long ord;
+    while ((ord = docTermOrds.nextOrd()) != SortedSetDocValues.NO_MORE_ORDS) {
+      // TODO: handle non string type fields.
+      edgeValue = docTermOrds.lookupOrd(ord);
+      // add the edge id to the collector terms.
+      collectorTerms.add(edgeValue);
+    }
+  }
+  
+  private void addDocToResult(int docWithBase) {
+    // this document is part of the traversal. mark it in our bitmap.
+    bits.set(docWithBase);
+    // increment the hit count so we know how many docs we traversed this time.
+    numHits++;
+  }
+  
+  public DocSet getDocSet() {
+    if (bits == null) {
+      // TODO: this shouldn't happen
+      bits = new FixedBitSet(maxDoc);
+    }
+    return new BitDocSet((FixedBitSet)bits,numHits);
+  }
+  
+  @Override
+  public void doSetNextReader(LeafReaderContext context) throws IOException {
+    // Grab the updated doc values.
+    docTermOrds = DocValues.getSortedSet(context.reader(), field);
+    base = context.docBase;
+    baseInParent = context.docBaseInParent;
+  }
+  
+  public BytesRefHash getCollectorTerms() {
+    return collectorTerms;
+  }
+  
+  @Override
+  public boolean needsScores() {
+    return false;
+  }
+  
+}
--- a/solr/core/src/test-files/solr/collection1/conf/schema-graph.xml
+++ b/solr/core/src/test-files/solr/collection1/conf/schema-graph.xml
@ -0,0 +1,71 @@
+<?xml version="1.0" encoding="UTF-8" ?>
+<!-- Licensed to the Apache Software Foundation (ASF) under one or more contributor 
+  license agreements. See the NOTICE file distributed with this work for additional 
+  information regarding copyright ownership. The ASF licenses this file to 
+  You under the Apache License, Version 2.0 (the "License"); you may not use 
+  this file except in compliance with the License. You may obtain a copy of 
+  the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required 
+  by applicable law or agreed to in writing, software distributed under the 
+  License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS 
+  OF ANY KIND, either express or implied. See the License for the specific 
+  language governing permissions and limitations under the License. -->
+
+<!-- This is a stripped down schema that includes the node_id and edge_id 
+  fields to test graph queries -->
+
+<schema name="graphexample" version="1.5">
+  <!-- field names should consist of alphanumeric or underscore characters 
+    only and not start with a digit. This is not currently strictly enforced, 
+    but other field names will not have first class support from all components 
+    and back compatibility is not guaranteed. Names with both leading and trailing 
+    underscores (e.g. _version_) are reserved. -->
+  <!-- unique id for all records in the index. -->
+  <field name="id" type="string" indexed="true" stored="true"
+    required="true" multiValued="false" />
+
+  <!-- If you remove this field, you must _also_ disable the update log in 
+    solrconfig.xml or Solr won't start. _version_ and update log are required 
+    for SolrCloud -->
+  <field name="_version_" type="long" indexed="true" stored="true" />
+
+  <!-- points to the root document of a block of nested documents. Required 
+    for nested document support, may be removed otherwise (not used in graph 
+    query test) -->
+  <field name="_root_" type="string" indexed="true" stored="false" />
+
+  <!-- the field that contains the "node_id" for graph traversal -->
+  <field name="node_id" type="string" indexed="true" stored="true"
+    multiValued="false" omitNorms="true" termVectors="true" />
+  <!-- multi-valued field that contains the edge id's for graph traversal -->
+  <field name="edge_id" type="string" indexed="true" stored="true"
+    multiValued="true" omitNorms="true" omitPositions="true" termVectors="true" />
+
+  <!-- typical title/text fields -->
+  <field name="title" type="text_general" indexed="true" stored="true"
+    multiValued="true" omitNorms="true" omitPositions="true" termVectors="true" />
+  <field name="text" type="text_general" indexed="true" stored="true"
+    multiValued="true" omitNorms="true" omitPositions="true" termVectors="true" />
+
+  <!-- catch all field for indexing unknown fields. -->
+  <dynamicField name="*" type="string" indexed="true"
+    stored="true" multiValued="true" />
+  <!-- call out the explicit doc id. -->
+  <uniqueKey>id</uniqueKey>
+  <!-- Field types -->
+  <fieldType name="string" class="solr.StrField"
+    sortMissingLast="true" />
+  <fieldType name="long" class="solr.TrieLongField"
+    precisionStep="0" positionIncrementGap="0" />
+  <fieldType name="text_general" class="solr.TextField"
+    positionIncrementGap="100">
+    <analyzer type="index">
+      <tokenizer class="solr.StandardTokenizerFactory" />
+      <filter class="solr.LowerCaseFilterFactory" />
+    </analyzer>
+    <analyzer type="query">
+      <tokenizer class="solr.StandardTokenizerFactory" />
+      <filter class="solr.LowerCaseFilterFactory" />
+    </analyzer>
+  </fieldType>
+
+</schema>
--- a/solr/core/src/test/org/apache/solr/search/QueryEqualityTest.java
+++ b/solr/core/src/test/org/apache/solr/search/QueryEqualityTest.java
@ -412,6 +412,42 @@ public class QueryEqualityTest extends SolrTestCaseJ4 {
        "{!child of=foo_s:parent}dude");
  }

+  public void testGraphQuery() throws Exception {
+    SolrQueryRequest req = req("from", "node_s",
+        "to","edge_s",
+        "traversalFilter","foo",
+        "returnOnlyLeaf","true",
+        "returnRoot","false",
+        "maxDepth","2",
+        "useAutn","false"
+        );
+    // make sure all param subsitution works for all args to graph query.
+    assertQueryEquals("graph", req, 
+        "{!graph from=node_s to=edge_s}*:*",
+        "{!graph from=$from to=$to}*:*");
+    
+    assertQueryEquals("graph", req,
+        "{!graph from=node_s to=edge_s traversalFilter=foo}*:*",
+        "{!graph from=$from to=$to traversalFilter=$traversalFilter}*:*");
+    
+    assertQueryEquals("graph", req,
+        "{!graph from=node_s to=edge_s traversalFilter=foo returnOnlyLeaf=true}*:*",
+        "{!graph from=$from to=$to traversalFilter=$traversalFilter returnOnlyLeaf=$returnOnlyLeaf}*:*");
+    
+    assertQueryEquals("graph", req,
+        "{!graph from=node_s to=edge_s traversalFilter=foo returnOnlyLeaf=true returnRoot=false}*:*",
+        "{!graph from=$from to=$to traversalFilter=$traversalFilter returnOnlyLeaf=$returnOnlyLeaf returnRoot=$returnRoot}*:*");
+    
+    assertQueryEquals("graph", req,
+        "{!graph from=node_s to=edge_s traversalFilter=foo returnOnlyLeaf=true returnRoot=false maxDepth=2}*:*",
+        "{!graph from=$from to=$to traversalFilter=$traversalFilter returnOnlyLeaf=$returnOnlyLeaf returnRoot=$returnRoot maxDepth=$maxDepth}*:*");
+    
+    assertQueryEquals("graph", req,
+        "{!graph from=node_s to=edge_s traversalFilter=foo returnOnlyLeaf=true returnRoot=false maxDepth=2 useAutn=false}*:*",
+        "{!graph from=$from to=$to traversalFilter=$traversalFilter returnOnlyLeaf=$returnOnlyLeaf returnRoot=$returnRoot maxDepth=$maxDepth useAutn=$useAutn}*:*");
+    
+  }
+
  public void testQuerySurround() throws Exception {
    assertQueryEquals("surround", "{!surround}and(apache,solr)", 
                      "and(apache,solr)", "apache AND solr");
--- a/solr/core/src/test/org/apache/solr/search/join/GraphQueryTest.java
+++ b/solr/core/src/test/org/apache/solr/search/join/GraphQueryTest.java
@ -0,0 +1,94 @@
+package org.apache.solr.search.join;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import org.apache.solr.SolrTestCaseJ4;
+import org.apache.solr.common.params.SolrParams;
+import org.apache.solr.common.util.NamedList;
+import org.apache.solr.request.SolrQueryRequest;
+import org.junit.BeforeClass;
+import org.junit.Test;
+
+public class GraphQueryTest extends SolrTestCaseJ4 {
+  
+  @BeforeClass
+  public static void beforeTests() throws Exception {
+    
+    initCore("solrconfig.xml","schema-graph.xml");
+  }
+  
+  @Test
+  public void testGraph() throws Exception {
+    // 1 -> 2 -> 3 -> ( 4 5 )
+    // 7 -> 1
+    // 8 -> ( 1 2 )
+    assertU(adoc("id", "doc_1", "node_id", "1", "edge_id", "2", "text", "foo", "title", "foo10"));
+    assertU(adoc("id", "doc_2", "node_id", "2", "edge_id", "3", "text", "foo"));
+    assertU(commit());
+    assertU(adoc("id", "doc_3", "node_id", "3", "edge_id", "4", "edge_id", "5", "table", "foo"));
+    assertU(adoc("id", "doc_4", "node_id", "4", "table", "foo"));
+    assertU(commit());
+    assertU(adoc("id", "doc_5", "node_id", "5", "edge_id", "7", "table", "bar"));
+    assertU(adoc("id", "doc_6", "node_id", "6", "edge_id", "3" ));
+    assertU(adoc("id", "doc_7", "node_id", "7", "edge_id", "1" ));
+    assertU(adoc("id", "doc_8", "node_id", "8", "edge_id", "1", "edge_id", "2" ));
+    assertU(adoc("id", "doc_9", "node_id", "9"));
+    assertU(commit());
+    // update docs so they're in a new segment.
+    assertU(adoc("id", "doc_1", "node_id", "1", "edge_id", "2", "text", "foo"));
+    assertU(adoc("id", "doc_2", "node_id", "2", "edge_id", "3", "edge_id", "9", "text", "foo11"));
+    assertU(commit());
+    // a graph for testing traversal filter 10 - 11 -> (12 | 13)
+    assertU(adoc("id", "doc_10", "node_id", "10", "edge_id", "11", "title", "foo"));
+    assertU(adoc("id", "doc_11", "node_id", "11", "edge_id", "12", "edge_id", "13", "text", "foo11"));
+    assertU(adoc("id", "doc_12", "node_id", "12", "text", "foo10"));
+    assertU(adoc("id", "doc_13", "node_id", "13", "edge_id", "12", "text", "foo10"));  
+    assertU(commit());
+    // Now we have created a simple graph
+    // start traversal from node id to edge id
+    String gQuery = "{!graph from=\"node_id\" to=\"edge_id\"}id:doc_1";
+    SolrQueryRequest qr = createRequest(gQuery);
+    assertQ(qr,"//*[@numFound='7']");
+    
+    String g2Query = "{!graph from=\"node_id\" to=\"edge_id\" returnRoot=\"true\" returnOnlyLeaf=\"false\"}id:doc_8";
+    qr = createRequest(g2Query);    
+    assertQ(qr,"//*[@numFound='8']");
+
+    String g3Query = "{!graph from=\"node_id\" to=\"edge_id\" returnRoot=\"false\" returnOnlyLeaf=\"false\"}id:doc_8";
+    qr = createRequest(g3Query);    
+    assertQ(qr,"//*[@numFound='7']");
+    
+    String g4Query = "{!graph from=\"node_id\" to=\"edge_id\" returnRoot=\"true\" returnOnlyLeaf=\"false\" traversalFilter=\"text:foo11\"}id:doc_8";
+    qr = createRequest(g4Query);    
+    assertQ(qr,"//*[@numFound='2']");
+    
+  }
+
+  private SolrQueryRequest createRequest(String query) {
+    SolrQueryRequest qr = req(query);
+    NamedList<Object> par = qr.getParams().toNamedList();
+    par.add("debug", "true");
+    par.add("rows", "10");
+    par.add("fl", "id,node_id,edge_id");
+    par.remove("qt");
+    SolrParams newp = SolrParams.toSolrParams(par);
+    qr.setParams(newp);
+    return qr;
+  }
+  
+}