SOLR-11190: GraphQuery also supports string fields which are indexed=false and docValues=true

This commit is contained in:
Varun Thacker 2017-08-09 12:15:01 -07:00
parent 0250368751
commit e7062b6f91
6 changed files with 68 additions and 2 deletions

View File

@ -88,6 +88,9 @@ Bug Fixes
may not have a registered searcher. This causes spikes in response times when adding a replica
in busy clusters. (Ludovic Boutros, Timothy Potter, shalin)
* SOLR-11190: GraphQuery also supports string fields which are indexed=false and docValues=true. Please refer to the
Javadocs for DocValuesTermsQuery for it's performance characteristics. (Karthik Ramachandran, Varun Thacker)
Optimizations
----------------------

View File

@ -19,6 +19,7 @@ package org.apache.solr.search.join;
import org.apache.lucene.search.Query;
import org.apache.solr.common.params.SolrParams;
import org.apache.solr.request.SolrQueryRequest;
import org.apache.solr.schema.StrField;
import org.apache.solr.search.QParser;
import org.apache.solr.search.QueryParsing;
import org.apache.solr.search.SyntaxError;
@ -45,6 +46,9 @@ public class GraphQueryParser extends QParser {
String fromField = localParams.get("from", "node_id");
String toField = localParams.get("to", "edge_ids");
validateFields(fromField);
validateFields(toField);
// only documents that do not have values in the edge id fields.
boolean onlyLeafNodes = localParams.getBool("returnOnlyLeaf", false);
// choose if you want to return documents that match the initial query or not.
@ -66,4 +70,30 @@ public class GraphQueryParser extends QParser {
return gq;
}
public void validateFields(String field) throws SyntaxError {
if (req.getSchema().getField(field) == null) {
throw new SyntaxError("field " + field + " not defined in schema");
}
if (req.getSchema().getField(field).getType().isPointField()) {
if (req.getSchema().getField(field).hasDocValues()) {
return;
} else {
throw new SyntaxError("point field " + field + " must have docValues=true");
}
}
if (req.getSchema().getField(field).getType() instanceof StrField) {
if ((req.getSchema().getField(field).hasDocValues() || req.getSchema().getField(field).indexed())) {
return;
} else {
throw new SyntaxError("string field " + field + " must have indexed=true or docValues=true");
}
}
throw new SyntaxError("FieldType for field=" + field + " not supported");
}
}

View File

@ -27,6 +27,7 @@ import org.apache.lucene.index.SortedSetDocValues;
import org.apache.lucene.index.Term;
import org.apache.lucene.search.AutomatonQuery;
import org.apache.lucene.search.Collector;
import org.apache.lucene.search.DocValuesTermsQuery;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.SimpleCollector;
import org.apache.lucene.search.TermInSetQuery;
@ -173,7 +174,9 @@ class GraphTermsCollector extends GraphEdgeCollector {
collectorTerms.get(i, ref);
termList.add(ref);
}
q = new TermInSetQuery(matchField.getName(), termList);
q = (matchField.hasDocValues() && !matchField.indexed())
? new DocValuesTermsQuery(matchField.getName(), termList)
: new TermInSetQuery(matchField.getName(), termList);
}
return q;

View File

@ -240,6 +240,10 @@
<dynamicField name="*_dtdS" type="date" indexed="true" stored="true" docValues="true"/>
<dynamicField name="*_dtdsS" type="date" indexed="true" stored="true" multiValued="true" docValues="true"/>
<!-- docvalues, not indexed (N suffix) and not stored -->
<dynamicField name="*_sdN" type="string" indexed="false" stored="false" docValues="true"/>
<dynamicField name="*_sdsN" type="string" indexed="false" stored="false" multiValued="true" docValues="true"/>
<!-- explicit points with docValues (since they can't be uninverted with FieldCache -->
<dynamicField name="*_ip" type="pint" indexed="true" stored="true" docValues="true" multiValued="false"/>
<dynamicField name="*_ips" type="pint" indexed="true" stored="true" docValues="true" multiValued="true"/>

View File

@ -17,6 +17,7 @@
package org.apache.solr.search.join;
import org.apache.solr.SolrTestCaseJ4;
import org.apache.solr.common.SolrException;
import org.apache.solr.common.params.SolrParams;
import org.junit.BeforeClass;
import org.junit.Test;
@ -25,7 +26,6 @@ public class GraphQueryTest extends SolrTestCaseJ4 {
@BeforeClass
public static void beforeTests() throws Exception {
initCore("solrconfig.xml","schema_latest.xml");
}
@ -44,6 +44,9 @@ public class GraphQueryTest extends SolrTestCaseJ4 {
doGraph( params("node_id","node_fps", "edge_id","edge_fps") );
doGraph( params("node_id","node_dp", "edge_id","edge_dps") );
doGraph( params("node_id","node_dps", "edge_id","edge_dps") );
// string with indexed=false and docValues=true
doGraph( params("node_id","node_sdN", "edge_id","edge_sdsN") );
}
public void doGraph(SolrParams p) throws Exception {
@ -118,4 +121,23 @@ public class GraphQueryTest extends SolrTestCaseJ4 {
);
}
@Test
public void testGraphQueryParserValidation() throws Exception {
// from schema field existence
doGraphQuery( params("node_id","node_nothere", "edge_id","edge_ss",
"message", "field node_nothere not defined in schema", "errorCode", String.valueOf(SolrException.ErrorCode.BAD_REQUEST.code)) );
// to schema field existence
doGraphQuery( params("node_id","node_s", "edge_id","edge_notthere",
"message", "field node_nothere not defined in schema", "errorCode", String.valueOf(SolrException.ErrorCode.BAD_REQUEST.code)) );
}
public void doGraphQuery(SolrParams p) throws Exception {
String message = p.get("message");
int errorCode = p.getInt("errorCode", SolrException.ErrorCode.UNKNOWN.code);
assertQEx(message , req(p, "q","{!graph from=${node_id} to=${edge_id} returnRoot=false maxDepth=1}id:doc_1")
, errorCode
);
}
}

View File

@ -307,6 +307,10 @@ The `graph` query parser does a breadth first, cyclic aware, graph traversal of
The graph is built according to linkages between documents based on the terms found in `from` and `to` fields that you specify as part of the query.
The supported fieldTypes are point fields with docValues enabled or string fields with indexed=true or docValues=true.
For string fields which are indexed=false and docValues=true please refer to the javadocs for `DocValuesTermsQuery`
for it's performance characteristics so indexed=true will perform better for most use-cases.
=== Graph Query Parameters
`to`::