From 4618f46f278c50238ec98bf7021337564e641de0 Mon Sep 17 00:00:00 2001 From: Aldrin Piri Date: Sun, 1 Mar 2015 21:23:01 -0500 Subject: [PATCH] Adding JsonPathExpressionValidator to perform an exception free validation of JsonPath expressions. This is used as a screen before attempting a compile. --- .../standard/AbstractJsonPathProcessor.java | 14 +- .../nifi/processors/standard/SplitJson.java | 2 +- .../util/JsonPathExpressionValidator.java | 487 ++++++++++++++++++ .../standard/TestEvaluateJsonPath.java | 12 + 4 files changed, 507 insertions(+), 8 deletions(-) create mode 100644 nifi/nifi-nar-bundles/nifi-standard-bundle/nifi-standard-processors/src/main/java/org/apache/nifi/processors/standard/util/JsonPathExpressionValidator.java diff --git a/nifi/nifi-nar-bundles/nifi-standard-bundle/nifi-standard-processors/src/main/java/org/apache/nifi/processors/standard/AbstractJsonPathProcessor.java b/nifi/nifi-nar-bundles/nifi-standard-bundle/nifi-standard-processors/src/main/java/org/apache/nifi/processors/standard/AbstractJsonPathProcessor.java index febc3f8b66..94a299ea37 100644 --- a/nifi/nifi-nar-bundles/nifi-standard-bundle/nifi-standard-processors/src/main/java/org/apache/nifi/processors/standard/AbstractJsonPathProcessor.java +++ b/nifi/nifi-nar-bundles/nifi-standard-bundle/nifi-standard-processors/src/main/java/org/apache/nifi/processors/standard/AbstractJsonPathProcessor.java @@ -18,7 +18,6 @@ package org.apache.nifi.processors.standard; import com.jayway.jsonpath.Configuration; import com.jayway.jsonpath.DocumentContext; -import com.jayway.jsonpath.InvalidPathException; import com.jayway.jsonpath.JsonPath; import com.jayway.jsonpath.internal.spi.json.JsonSmartJsonProvider; import com.jayway.jsonpath.spi.json.JsonProvider; @@ -30,6 +29,7 @@ import org.apache.nifi.flowfile.FlowFile; import org.apache.nifi.processor.AbstractProcessor; import org.apache.nifi.processor.ProcessSession; import org.apache.nifi.processor.io.InputStreamCallback; +import org.apache.nifi.processors.standard.util.JsonPathExpressionValidator; import org.apache.nifi.stream.io.BufferedInputStream; import org.apache.nifi.util.ObjectHolder; @@ -90,15 +90,14 @@ public abstract class AbstractJsonPathProcessor extends AbstractProcessor { @Override public ValidationResult validate(final String subject, final String input, final ValidationContext context) { - JsonPath compiledJsonPath = null; String error = null; - try { - if (isStale(subject, input)) { - compiledJsonPath = JsonPath.compile(input); + if (isStale(subject, input)) { + if (JsonPathExpressionValidator.isValidExpression(input)) { + JsonPath compiledJsonPath = JsonPath.compile(input); cacheComputedValue(subject, input, compiledJsonPath); + } else { + error = "specified expression was not valid: " + input; } - } catch (InvalidPathException ipe) { - error = ipe.toString(); } return new ValidationResult.Builder().subject(subject).valid(error == null).explanation(error).build(); } @@ -106,6 +105,7 @@ public abstract class AbstractJsonPathProcessor extends AbstractProcessor { /** * An optional hook to act on the compute value */ + abstract void cacheComputedValue(String subject, String input, JsonPath computedJsonPath); /** diff --git a/nifi/nifi-nar-bundles/nifi-standard-bundle/nifi-standard-processors/src/main/java/org/apache/nifi/processors/standard/SplitJson.java b/nifi/nifi-nar-bundles/nifi-standard-bundle/nifi-standard-processors/src/main/java/org/apache/nifi/processors/standard/SplitJson.java index 5a193a15b8..4d79746db9 100644 --- a/nifi/nifi-nar-bundles/nifi-standard-bundle/nifi-standard-processors/src/main/java/org/apache/nifi/processors/standard/SplitJson.java +++ b/nifi/nifi-nar-bundles/nifi-standard-bundle/nifi-standard-processors/src/main/java/org/apache/nifi/processors/standard/SplitJson.java @@ -57,7 +57,7 @@ public class SplitJson extends AbstractJsonPathProcessor { public static final PropertyDescriptor ARRAY_JSON_PATH_EXPRESSION = new PropertyDescriptor.Builder() .name("JsonPath Expression") .description("A JsonPath expression that indicates the array element to split into JSON/scalar fragments.") - .addValidator(StandardValidators.NON_EMPTY_VALIDATOR) + .addValidator(StandardValidators.NON_EMPTY_VALIDATOR) // Full validation/caching occurs in #customValidate .required(true) .build(); diff --git a/nifi/nifi-nar-bundles/nifi-standard-bundle/nifi-standard-processors/src/main/java/org/apache/nifi/processors/standard/util/JsonPathExpressionValidator.java b/nifi/nifi-nar-bundles/nifi-standard-bundle/nifi-standard-processors/src/main/java/org/apache/nifi/processors/standard/util/JsonPathExpressionValidator.java new file mode 100644 index 0000000000..61f9bbe2e0 --- /dev/null +++ b/nifi/nifi-nar-bundles/nifi-standard-bundle/nifi-standard-processors/src/main/java/org/apache/nifi/processors/standard/util/JsonPathExpressionValidator.java @@ -0,0 +1,487 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.nifi.processors.standard.util; + +import com.jayway.jsonpath.Filter; +import com.jayway.jsonpath.Predicate; +import com.jayway.jsonpath.internal.Utils; +import com.jayway.jsonpath.internal.token.*; +import org.apache.nifi.util.StringUtils; + +import java.util.ArrayList; +import java.util.LinkedList; +import java.util.List; +import java.util.regex.Pattern; + +import static java.util.Arrays.asList; + +/** + * JsonPathExpressionValidator performs the same execution as com.jayway.jsonpath.internal.PathCompiler, but does not throw + * exceptions when an invalid path segment is found. + * Limited access to create JsonPath objects requires a separate flow of execution in avoiding exceptions. + * + * @see https://github.com/jayway/JsonPath + */ +public class JsonPathExpressionValidator { + + private static final String PROPERTY_OPEN = "['"; + private static final String PROPERTY_CLOSE = "']"; + private static final char DOCUMENT = '$'; + private static final char ANY = '*'; + private static final char PERIOD = '.'; + private static final char BRACKET_OPEN = '['; + private static final char BRACKET_CLOSE = ']'; + private static final char SPACE = ' '; + + + /** + * Performs a validation of a provided JsonPath expression. + *

+ * Typically this is used in the context of: + * + *

+     * JsonPath compiledJsonPath = null;
+     * if (JsonPathExpressionValidator.isValidExpression(input)) {
+     *      compiledJsonPath = JsonPath.compile(input);
+     *      ...
+     * } else {
+     *      // error handling
+     * }
+     * 
+ * + * + * @param path to evaluate for validity + * @param filters applied to path expression; this is typically unused in the context of Processors + * @return true if the specified path is valid; false otherwise + */ + public static boolean isValidExpression(String path, Predicate... filters) { + path = path.trim(); + if (StringUtils.isBlank(path)) { + // "Path may not be null empty" + return false; + } + if (path.endsWith("..")) { + // "A path can not end with a scan." + return false; + } + + LinkedList filterList = new LinkedList(asList(filters)); + + if (path.charAt(0) != '$' && path.charAt(0) != '@') { + path = "$." + path; + } + + if (path.charAt(0) == '@') { + path = "$" + path.substring(1); + } + + if (path.length() > 1 && path.charAt(1) != '.' && path.charAt(1) != '[') { + // "Invalid path " + path + return false; + } + + RootPathToken root = null; + + int i = 0; + int positions; + String fragment = ""; + + do { + char current = path.charAt(i); + + switch (current) { + case SPACE: + // "Space not allowed in path" + return false; + case DOCUMENT: + fragment = "$"; + i++; + break; + case BRACKET_OPEN: + positions = fastForwardUntilClosed(path, i); + fragment = path.substring(i, i + positions); + i += positions; + break; + case PERIOD: + i++; + if (path.charAt(i) == PERIOD) { + //This is a deep scan + fragment = ".."; + i++; + } else { + positions = fastForward(path, i); + if (positions == 0) { + continue; + + } else if (positions == 1 && path.charAt(i) == '*') { + fragment = new String("[*]"); + } else { + fragment = PROPERTY_OPEN + path.substring(i, i + positions) + PROPERTY_CLOSE; + } + i += positions; + } + break; + case ANY: + fragment = new String("[*]"); + i++; + break; + default: + positions = fastForward(path, i); + + fragment = PROPERTY_OPEN + path.substring(i, i + positions) + PROPERTY_CLOSE; + i += positions; + break; + } + + /* + * Analyze each component represented by a fragment. If there is a failure to properly evaluate, + * a null result is returned + */ + PathToken analyzedComponent = PathComponentAnalyzer.analyze(fragment, filterList); + if (analyzedComponent == null) { + return false; + } + + if (root == null) { + root = (RootPathToken) analyzedComponent; + } else { + root.append(analyzedComponent); + } + + + } while (i < path.length()); + + return true; + } + + private static int fastForward(String s, int index) { + int skipCount = 0; + while (index < s.length()) { + char current = s.charAt(index); + if (current == PERIOD || current == BRACKET_OPEN || current == SPACE) { + break; + } + index++; + skipCount++; + } + return skipCount; + } + + private static int fastForwardUntilClosed(String s, int index) { + int skipCount = 0; + int nestedBrackets = 0; + + //First char is always '[' no need to check it + index++; + skipCount++; + + while (index < s.length()) { + char current = s.charAt(index); + + index++; + skipCount++; + + if (current == BRACKET_CLOSE && nestedBrackets == 0) { + break; + } + if (current == BRACKET_OPEN) { + nestedBrackets++; + } + if (current == BRACKET_CLOSE) { + nestedBrackets--; + } + } + return skipCount; + } + + static class PathComponentAnalyzer { + + private static final Pattern FILTER_PATTERN = Pattern.compile("^\\[\\s*\\?\\s*[,\\s*\\?]*?\\s*]$"); //[?] or [?, ?, ...] + private int i; + private char current; + + private final LinkedList filterList; + private final String pathFragment; + + PathComponentAnalyzer(String pathFragment, LinkedList filterList) { + this.pathFragment = pathFragment; + this.filterList = filterList; + } + + static PathToken analyze(String pathFragment, LinkedList filterList) { + return new PathComponentAnalyzer(pathFragment, filterList).analyze(); + } + + public PathToken analyze() { + + if ("$".equals(pathFragment)) return new RootPathToken(); + else if ("..".equals(pathFragment)) return new ScanPathToken(); + else if ("[*]".equals(pathFragment)) return new WildcardPathToken(); + else if (".*".equals(pathFragment)) return new WildcardPathToken(); + else if ("[?]".equals(pathFragment)) return new PredicatePathToken(filterList.poll()); + + else if (FILTER_PATTERN.matcher(pathFragment).matches()) { + final int criteriaCount = Utils.countMatches(pathFragment, "?"); + List filters = new ArrayList<>(criteriaCount); + for (int i = 0; i < criteriaCount; i++) { + filters.add(filterList.poll()); + } + return new PredicatePathToken(filters); + } + + this.i = 0; + do { + current = pathFragment.charAt(i); + + switch (current) { + case '?': + return analyzeCriteriaSequence4(); + case '\'': + return analyzeProperty(); + default: + if (Character.isDigit(current) || current == ':' || current == '-' || current == '@') { + return analyzeArraySequence(); + } + i++; + break; + } + + + } while (i < pathFragment.length()); + + //"Could not analyze path component: " + pathFragment + return null; + } + + + public PathToken analyzeCriteriaSequence4() { + int[] bounds = findFilterBounds(); + if (bounds == null) { + return null; + } + i = bounds[1]; + + return new PredicatePathToken(Filter.parse(pathFragment.substring(bounds[0], bounds[1]))); + } + + int[] findFilterBounds() { + int end = 0; + int start = i; + + while (pathFragment.charAt(start) != '[') { + start--; + } + + int mem = ' '; + int curr = start; + boolean inProp = false; + int openSquareBracket = 0; + int openBrackets = 0; + while (end == 0) { + char c = pathFragment.charAt(curr); + switch (c) { + case '(': + if (!inProp) openBrackets++; + break; + case ')': + if (!inProp) openBrackets--; + break; + case '[': + if (!inProp) openSquareBracket++; + break; + case ']': + if (!inProp) { + openSquareBracket--; + if (openBrackets == 0) { + end = curr + 1; + } + } + break; + case '\'': + if (mem == '\\') { + break; + } + inProp = !inProp; + break; + default: + break; + } + mem = c; + curr++; + } + if (openBrackets != 0 || openSquareBracket != 0) { + // "Filter brackets are not balanced" + return null; + } + return new int[]{start, end}; + } + + + //"['foo']" + private PathToken analyzeProperty() { + List properties = new ArrayList(); + StringBuilder buffer = new StringBuilder(); + + boolean propertyIsOpen = false; + + while (current != ']') { + switch (current) { + case '\'': + if (propertyIsOpen) { + properties.add(buffer.toString()); + buffer.setLength(0); + propertyIsOpen = false; + } else { + propertyIsOpen = true; + } + break; + default: + if (propertyIsOpen) { + buffer.append(current); + } + break; + } + current = pathFragment.charAt(++i); + } + return new PropertyPathToken(properties); + } + + + //"[-1:]" sliceFrom + //"[:1]" sliceTo + //"[0:5]" sliceBetween + //"[1]" + //"[1,2,3]" + //"[(@.length - 1)]" + private PathToken analyzeArraySequence() { + StringBuilder buffer = new StringBuilder(); + List numbers = new ArrayList(); + + boolean contextSize = (current == '@'); + boolean sliceTo = false; + boolean sliceFrom = false; + boolean sliceBetween = false; + boolean indexSequence = false; + boolean singleIndex = false; + + if (contextSize) { + + current = pathFragment.charAt(++i); + current = pathFragment.charAt(++i); + while (current != '-') { + if (current == ' ' || current == '(' || current == ')') { + current = pathFragment.charAt(++i); + continue; + } + buffer.append(current); + current = pathFragment.charAt(++i); + } + String function = buffer.toString(); + buffer.setLength(0); + if (!function.equals("size") && !function.equals("length")) { + // "Invalid function: @." + function + ". Supported functions are: [(@.length - n)] and [(@.size() - n)]" + return null; + } + while (current != ')') { + if (current == ' ') { + current = pathFragment.charAt(++i); + continue; + } + buffer.append(current); + current = pathFragment.charAt(++i); + } + + } else { + + + while (Character.isDigit(current) || current == ',' || current == ' ' || current == ':' || current == '-') { + + switch (current) { + case ' ': + break; + case ':': + if (buffer.length() == 0) { + //this is a tail slice [:12] + sliceTo = true; + current = pathFragment.charAt(++i); + while (Character.isDigit(current) || current == ' ' || current == '-') { + if (current != ' ') { + buffer.append(current); + } + current = pathFragment.charAt(++i); + } + numbers.add(Integer.parseInt(buffer.toString())); + buffer.setLength(0); + } else { + //we now this starts with [12:??? + numbers.add(Integer.parseInt(buffer.toString())); + buffer.setLength(0); + current = pathFragment.charAt(++i); + + //this is a tail slice [:12] + while (Character.isDigit(current) || current == ' ' || current == '-') { + if (current != ' ') { + buffer.append(current); + } + current = pathFragment.charAt(++i); + } + + if (buffer.length() == 0) { + sliceFrom = true; + } else { + sliceBetween = true; + numbers.add(Integer.parseInt(buffer.toString())); + buffer.setLength(0); + } + } + break; + case ',': + numbers.add(Integer.parseInt(buffer.toString())); + buffer.setLength(0); + indexSequence = true; + break; + default: + buffer.append(current); + break; + } + if (current == ']') { + break; + } + current = pathFragment.charAt(++i); + } + } + if (buffer.length() > 0) { + numbers.add(Integer.parseInt(buffer.toString())); + } + singleIndex = (numbers.size() == 1) && !sliceTo && !sliceFrom && !contextSize; + + ArrayPathToken.Operation operation = null; + + if (singleIndex) operation = ArrayPathToken.Operation.SINGLE_INDEX; + else if (indexSequence) operation = ArrayPathToken.Operation.INDEX_SEQUENCE; + else if (sliceFrom) operation = ArrayPathToken.Operation.SLICE_FROM; + else if (sliceTo) operation = ArrayPathToken.Operation.SLICE_TO; + else if (sliceBetween) operation = ArrayPathToken.Operation.SLICE_BETWEEN; + else if (contextSize) operation = ArrayPathToken.Operation.CONTEXT_SIZE; + + assert operation != null; + + return new ArrayPathToken(numbers, operation); + + } + } +} \ No newline at end of file diff --git a/nifi/nifi-nar-bundles/nifi-standard-bundle/nifi-standard-processors/src/test/java/org/apache/nifi/processors/standard/TestEvaluateJsonPath.java b/nifi/nifi-nar-bundles/nifi-standard-bundle/nifi-standard-processors/src/test/java/org/apache/nifi/processors/standard/TestEvaluateJsonPath.java index c5ff81476d..058e21c21d 100644 --- a/nifi/nifi-nar-bundles/nifi-standard-bundle/nifi-standard-processors/src/test/java/org/apache/nifi/processors/standard/TestEvaluateJsonPath.java +++ b/nifi/nifi-nar-bundles/nifi-standard-bundle/nifi-standard-processors/src/test/java/org/apache/nifi/processors/standard/TestEvaluateJsonPath.java @@ -67,6 +67,18 @@ public class TestEvaluateJsonPath { Assert.fail("Processor incorrectly ran with an invalid configuration of multiple paths specified as attributes for a destination of content."); } + @Test(expected = AssertionError.class) + public void testInvalidConfiguration_invalidJsonPath_space() throws Exception { + final TestRunner testRunner = TestRunners.newTestRunner(new EvaluateJsonPath()); + testRunner.setProperty(EvaluateJsonPath.DESTINATION, EvaluateJsonPath.DESTINATION_CONTENT); + testRunner.setProperty("JsonPath1", "$[0]. _id"); + + testRunner.enqueue(JSON_SNIPPET); + testRunner.run(); + + Assert.fail("Processor incorrectly ran with an invalid configuration of multiple paths specified as attributes for a destination of content."); + } + @Test public void testConfiguration_destinationAttributes_twoPaths() throws Exception { final TestRunner testRunner = TestRunners.newTestRunner(new EvaluateJsonPath());