From e7062b6f91c161965aec0cef5a9ae68280f306a4 Mon Sep 17 00:00:00 2001 From: Varun Thacker Date: Wed, 9 Aug 2017 12:15:01 -0700 Subject: [PATCH] SOLR-11190: GraphQuery also supports string fields which are indexed=false and docValues=true --- solr/CHANGES.txt | 3 ++ .../solr/search/join/GraphQueryParser.java | 30 +++++++++++++++++++ .../solr/search/join/GraphTermsCollector.java | 5 +++- .../solr/collection1/conf/schema_latest.xml | 4 +++ .../solr/search/join/GraphQueryTest.java | 24 ++++++++++++++- solr/solr-ref-guide/src/other-parsers.adoc | 4 +++ 6 files changed, 68 insertions(+), 2 deletions(-) diff --git a/solr/CHANGES.txt b/solr/CHANGES.txt index 6962e2f2263..ba8222afe0b 100644 --- a/solr/CHANGES.txt +++ b/solr/CHANGES.txt @@ -88,6 +88,9 @@ Bug Fixes may not have a registered searcher. This causes spikes in response times when adding a replica in busy clusters. (Ludovic Boutros, Timothy Potter, shalin) +* SOLR-11190: GraphQuery also supports string fields which are indexed=false and docValues=true. Please refer to the + Javadocs for DocValuesTermsQuery for it's performance characteristics. (Karthik Ramachandran, Varun Thacker) + Optimizations ---------------------- diff --git a/solr/core/src/java/org/apache/solr/search/join/GraphQueryParser.java b/solr/core/src/java/org/apache/solr/search/join/GraphQueryParser.java index 9c9b85234fc..3e2d938162e 100644 --- a/solr/core/src/java/org/apache/solr/search/join/GraphQueryParser.java +++ b/solr/core/src/java/org/apache/solr/search/join/GraphQueryParser.java @@ -19,6 +19,7 @@ package org.apache.solr.search.join; import org.apache.lucene.search.Query; import org.apache.solr.common.params.SolrParams; import org.apache.solr.request.SolrQueryRequest; +import org.apache.solr.schema.StrField; import org.apache.solr.search.QParser; import org.apache.solr.search.QueryParsing; import org.apache.solr.search.SyntaxError; @@ -45,6 +46,9 @@ public class GraphQueryParser extends QParser { String fromField = localParams.get("from", "node_id"); String toField = localParams.get("to", "edge_ids"); + validateFields(fromField); + validateFields(toField); + // only documents that do not have values in the edge id fields. boolean onlyLeafNodes = localParams.getBool("returnOnlyLeaf", false); // choose if you want to return documents that match the initial query or not. @@ -65,5 +69,31 @@ public class GraphQueryParser extends QParser { // return the parsed graph query. return gq; } + + public void validateFields(String field) throws SyntaxError { + + if (req.getSchema().getField(field) == null) { + throw new SyntaxError("field " + field + " not defined in schema"); + } + + if (req.getSchema().getField(field).getType().isPointField()) { + if (req.getSchema().getField(field).hasDocValues()) { + return; + } else { + throw new SyntaxError("point field " + field + " must have docValues=true"); + } + } + + if (req.getSchema().getField(field).getType() instanceof StrField) { + if ((req.getSchema().getField(field).hasDocValues() || req.getSchema().getField(field).indexed())) { + return; + } else { + throw new SyntaxError("string field " + field + " must have indexed=true or docValues=true"); + } + } + + throw new SyntaxError("FieldType for field=" + field + " not supported"); + + } } diff --git a/solr/core/src/java/org/apache/solr/search/join/GraphTermsCollector.java b/solr/core/src/java/org/apache/solr/search/join/GraphTermsCollector.java index 174db3c691f..07cec7d8d43 100644 --- a/solr/core/src/java/org/apache/solr/search/join/GraphTermsCollector.java +++ b/solr/core/src/java/org/apache/solr/search/join/GraphTermsCollector.java @@ -27,6 +27,7 @@ import org.apache.lucene.index.SortedSetDocValues; import org.apache.lucene.index.Term; import org.apache.lucene.search.AutomatonQuery; import org.apache.lucene.search.Collector; +import org.apache.lucene.search.DocValuesTermsQuery; import org.apache.lucene.search.Query; import org.apache.lucene.search.SimpleCollector; import org.apache.lucene.search.TermInSetQuery; @@ -173,7 +174,9 @@ class GraphTermsCollector extends GraphEdgeCollector { collectorTerms.get(i, ref); termList.add(ref); } - q = new TermInSetQuery(matchField.getName(), termList); + q = (matchField.hasDocValues() && !matchField.indexed()) + ? new DocValuesTermsQuery(matchField.getName(), termList) + : new TermInSetQuery(matchField.getName(), termList); } return q; diff --git a/solr/core/src/test-files/solr/collection1/conf/schema_latest.xml b/solr/core/src/test-files/solr/collection1/conf/schema_latest.xml index 1135d20485f..889d171e393 100644 --- a/solr/core/src/test-files/solr/collection1/conf/schema_latest.xml +++ b/solr/core/src/test-files/solr/collection1/conf/schema_latest.xml @@ -239,6 +239,10 @@ + + + + diff --git a/solr/core/src/test/org/apache/solr/search/join/GraphQueryTest.java b/solr/core/src/test/org/apache/solr/search/join/GraphQueryTest.java index e1cfc815c0e..4b550e4a844 100644 --- a/solr/core/src/test/org/apache/solr/search/join/GraphQueryTest.java +++ b/solr/core/src/test/org/apache/solr/search/join/GraphQueryTest.java @@ -17,6 +17,7 @@ package org.apache.solr.search.join; import org.apache.solr.SolrTestCaseJ4; +import org.apache.solr.common.SolrException; import org.apache.solr.common.params.SolrParams; import org.junit.BeforeClass; import org.junit.Test; @@ -25,7 +26,6 @@ public class GraphQueryTest extends SolrTestCaseJ4 { @BeforeClass public static void beforeTests() throws Exception { - initCore("solrconfig.xml","schema_latest.xml"); } @@ -44,6 +44,9 @@ public class GraphQueryTest extends SolrTestCaseJ4 { doGraph( params("node_id","node_fps", "edge_id","edge_fps") ); doGraph( params("node_id","node_dp", "edge_id","edge_dps") ); doGraph( params("node_id","node_dps", "edge_id","edge_dps") ); + + // string with indexed=false and docValues=true + doGraph( params("node_id","node_sdN", "edge_id","edge_sdsN") ); } public void doGraph(SolrParams p) throws Exception { @@ -118,4 +121,23 @@ public class GraphQueryTest extends SolrTestCaseJ4 { ); } + @Test + public void testGraphQueryParserValidation() throws Exception { + // from schema field existence + doGraphQuery( params("node_id","node_nothere", "edge_id","edge_ss", + "message", "field node_nothere not defined in schema", "errorCode", String.valueOf(SolrException.ErrorCode.BAD_REQUEST.code)) ); + + // to schema field existence + doGraphQuery( params("node_id","node_s", "edge_id","edge_notthere", + "message", "field node_nothere not defined in schema", "errorCode", String.valueOf(SolrException.ErrorCode.BAD_REQUEST.code)) ); + } + + public void doGraphQuery(SolrParams p) throws Exception { + String message = p.get("message"); + int errorCode = p.getInt("errorCode", SolrException.ErrorCode.UNKNOWN.code); + + assertQEx(message , req(p, "q","{!graph from=${node_id} to=${edge_id} returnRoot=false maxDepth=1}id:doc_1") + , errorCode + ); + } } diff --git a/solr/solr-ref-guide/src/other-parsers.adoc b/solr/solr-ref-guide/src/other-parsers.adoc index e54910ae279..72ea05c82e8 100644 --- a/solr/solr-ref-guide/src/other-parsers.adoc +++ b/solr/solr-ref-guide/src/other-parsers.adoc @@ -307,6 +307,10 @@ The `graph` query parser does a breadth first, cyclic aware, graph traversal of The graph is built according to linkages between documents based on the terms found in `from` and `to` fields that you specify as part of the query. +The supported fieldTypes are point fields with docValues enabled or string fields with indexed=true or docValues=true. +For string fields which are indexed=false and docValues=true please refer to the javadocs for `DocValuesTermsQuery` +for it's performance characteristics so indexed=true will perform better for most use-cases. + === Graph Query Parameters `to`::