SOLR-11190: GraphQuery also supports string fields which are indexed=false and docValues=true

2017-08-09 12:15:01 -07:00 · 2017-08-09 12:15:01 -07:00 · e7062b6f91
parent 0250368751
commit e7062b6f91
6 changed files with 68 additions and 2 deletions
--- a/solr/CHANGES.txt
+++ b/solr/CHANGES.txt
@ -88,6 +88,9 @@ Bug Fixes
  may not have a registered searcher. This causes spikes in response times when adding a replica
  in busy clusters. (Ludovic Boutros, Timothy Potter, shalin)

+* SOLR-11190: GraphQuery also supports string fields which are indexed=false and docValues=true. Please refer to the
+  Javadocs for DocValuesTermsQuery for it's performance characteristics. (Karthik Ramachandran, Varun Thacker)
+
 Optimizations
 ----------------------

--- a/solr/core/src/java/org/apache/solr/search/join/GraphQueryParser.java
+++ b/solr/core/src/java/org/apache/solr/search/join/GraphQueryParser.java
@ -19,6 +19,7 @@ package org.apache.solr.search.join;
 import org.apache.lucene.search.Query;
 import org.apache.solr.common.params.SolrParams;
 import org.apache.solr.request.SolrQueryRequest;
+import org.apache.solr.schema.StrField;
 import org.apache.solr.search.QParser;
 import org.apache.solr.search.QueryParsing;
 import org.apache.solr.search.SyntaxError;
@ -45,6 +46,9 @@ public class GraphQueryParser extends QParser {
    String fromField = localParams.get("from", "node_id");
    String toField = localParams.get("to", "edge_ids");

+    validateFields(fromField);
+    validateFields(toField);
+
    // only documents that do not have values in the edge id fields.
    boolean onlyLeafNodes = localParams.getBool("returnOnlyLeaf", false);
    // choose if you want to return documents that match the initial query or not.
@ -66,4 +70,30 @@ public class GraphQueryParser extends QParser {
    return gq;
  }

+  public void validateFields(String field) throws SyntaxError {
+
+    if (req.getSchema().getField(field) == null) {
+      throw new SyntaxError("field " + field + " not defined in schema");
+    }
+
+    if (req.getSchema().getField(field).getType().isPointField()) {
+      if (req.getSchema().getField(field).hasDocValues()) {
+        return;
+      } else {
+        throw new SyntaxError("point field " + field + " must have docValues=true");
+      }
+    }
+
+    if (req.getSchema().getField(field).getType() instanceof StrField) {
+      if ((req.getSchema().getField(field).hasDocValues() || req.getSchema().getField(field).indexed())) {
+        return;
+      } else {
+        throw new SyntaxError("string field " + field + " must have indexed=true or docValues=true");
+      }
+    }
+
+    throw new SyntaxError("FieldType for field=" + field + " not supported");
+
+  }
+  
 }
--- a/solr/core/src/java/org/apache/solr/search/join/GraphTermsCollector.java
+++ b/solr/core/src/java/org/apache/solr/search/join/GraphTermsCollector.java
@ -27,6 +27,7 @@ import org.apache.lucene.index.SortedSetDocValues;
 import org.apache.lucene.index.Term;
 import org.apache.lucene.search.AutomatonQuery;
 import org.apache.lucene.search.Collector;
+import org.apache.lucene.search.DocValuesTermsQuery;
 import org.apache.lucene.search.Query;
 import org.apache.lucene.search.SimpleCollector;
 import org.apache.lucene.search.TermInSetQuery;
@ -173,7 +174,9 @@ class GraphTermsCollector extends GraphEdgeCollector {
          collectorTerms.get(i, ref);
          termList.add(ref);
        }
-        q = new TermInSetQuery(matchField.getName(), termList);
+        q = (matchField.hasDocValues() && !matchField.indexed())
+            ? new DocValuesTermsQuery(matchField.getName(), termList)
+            : new TermInSetQuery(matchField.getName(), termList);
      }

      return q;
--- a/solr/core/src/test-files/solr/collection1/conf/schema_latest.xml
+++ b/solr/core/src/test-files/solr/collection1/conf/schema_latest.xml
@ -240,6 +240,10 @@
  <dynamicField name="*_dtdS" type="date" indexed="true" stored="true" docValues="true"/>
  <dynamicField name="*_dtdsS" type="date" indexed="true" stored="true" multiValued="true" docValues="true"/>
  
+  <!-- docvalues, not indexed (N suffix) and not stored -->
+  <dynamicField name="*_sdN" type="string" indexed="false" stored="false" docValues="true"/>
+  <dynamicField name="*_sdsN" type="string" indexed="false" stored="false" multiValued="true" docValues="true"/>
+
  <!-- explicit points with docValues (since they can't be uninverted with FieldCache -->
  <dynamicField name="*_ip"      type="pint"    indexed="true"  stored="true" docValues="true" multiValued="false"/>
  <dynamicField name="*_ips"     type="pint"    indexed="true"  stored="true" docValues="true" multiValued="true"/>
--- a/solr/core/src/test/org/apache/solr/search/join/GraphQueryTest.java
+++ b/solr/core/src/test/org/apache/solr/search/join/GraphQueryTest.java
@ -17,6 +17,7 @@
 package org.apache.solr.search.join;

 import org.apache.solr.SolrTestCaseJ4;
+import org.apache.solr.common.SolrException;
 import org.apache.solr.common.params.SolrParams;
 import org.junit.BeforeClass;
 import org.junit.Test;
@ -25,7 +26,6 @@ public class GraphQueryTest extends SolrTestCaseJ4 {
  
  @BeforeClass
  public static void beforeTests() throws Exception {
-    
    initCore("solrconfig.xml","schema_latest.xml");
  }

@ -44,6 +44,9 @@ public class GraphQueryTest extends SolrTestCaseJ4 {
    doGraph( params("node_id","node_fps", "edge_id","edge_fps") );
    doGraph( params("node_id","node_dp",  "edge_id","edge_dps") );
    doGraph( params("node_id","node_dps", "edge_id","edge_dps") );
+
+    // string with indexed=false and docValues=true
+    doGraph( params("node_id","node_sdN", "edge_id","edge_sdsN") );
  }

  public void doGraph(SolrParams p) throws Exception {
@ -118,4 +121,23 @@ public class GraphQueryTest extends SolrTestCaseJ4 {
    );
  }
  
+  @Test
+  public void testGraphQueryParserValidation() throws Exception {
+    // from schema field existence
+    doGraphQuery( params("node_id","node_nothere",  "edge_id","edge_ss",
+        "message", "field node_nothere not defined in schema", "errorCode", String.valueOf(SolrException.ErrorCode.BAD_REQUEST.code)) );
+
+    // to schema field existence
+    doGraphQuery( params("node_id","node_s",  "edge_id","edge_notthere",
+        "message", "field node_nothere not defined in schema", "errorCode", String.valueOf(SolrException.ErrorCode.BAD_REQUEST.code)) );
+  }
+  
+  public void doGraphQuery(SolrParams p) throws Exception {
+    String message = p.get("message");
+    int errorCode = p.getInt("errorCode", SolrException.ErrorCode.UNKNOWN.code);
+    
+    assertQEx(message , req(p, "q","{!graph from=${node_id} to=${edge_id} returnRoot=false maxDepth=1}id:doc_1")
+        , errorCode
+    );
+  }
 }
--- a/solr/solr-ref-guide/src/other-parsers.adoc
+++ b/solr/solr-ref-guide/src/other-parsers.adoc
@ -307,6 +307,10 @@ The `graph` query parser does a breadth first, cyclic aware, graph traversal of

 The graph is built according to linkages between documents based on the terms found in `from` and `to` fields that you specify as part of the query.

+The supported fieldTypes are point fields with docValues enabled or string fields with indexed=true or docValues=true.
+For string fields which are indexed=false and docValues=true please refer to the javadocs for `DocValuesTermsQuery`
+for it's performance characteristics so indexed=true will perform better for most use-cases.
+
 === Graph Query Parameters

 `to`::