Adding JsonPathExpressionValidator to perform an exception free validation of JsonPath expressions. This is used as a screen before attempting a compile.

This commit is contained in:
Aldrin Piri 2015-03-01 21:23:01 -05:00
parent 973b493386
commit 4618f46f27
4 changed files with 507 additions and 8 deletions

View File

@ -18,7 +18,6 @@ package org.apache.nifi.processors.standard;
import com.jayway.jsonpath.Configuration;
import com.jayway.jsonpath.DocumentContext;
import com.jayway.jsonpath.InvalidPathException;
import com.jayway.jsonpath.JsonPath;
import com.jayway.jsonpath.internal.spi.json.JsonSmartJsonProvider;
import com.jayway.jsonpath.spi.json.JsonProvider;
@ -30,6 +29,7 @@ import org.apache.nifi.flowfile.FlowFile;
import org.apache.nifi.processor.AbstractProcessor;
import org.apache.nifi.processor.ProcessSession;
import org.apache.nifi.processor.io.InputStreamCallback;
import org.apache.nifi.processors.standard.util.JsonPathExpressionValidator;
import org.apache.nifi.stream.io.BufferedInputStream;
import org.apache.nifi.util.ObjectHolder;
@ -90,15 +90,14 @@ public abstract class AbstractJsonPathProcessor extends AbstractProcessor {
@Override
public ValidationResult validate(final String subject, final String input, final ValidationContext context) {
JsonPath compiledJsonPath = null;
String error = null;
try {
if (isStale(subject, input)) {
compiledJsonPath = JsonPath.compile(input);
if (isStale(subject, input)) {
if (JsonPathExpressionValidator.isValidExpression(input)) {
JsonPath compiledJsonPath = JsonPath.compile(input);
cacheComputedValue(subject, input, compiledJsonPath);
} else {
error = "specified expression was not valid: " + input;
}
} catch (InvalidPathException ipe) {
error = ipe.toString();
}
return new ValidationResult.Builder().subject(subject).valid(error == null).explanation(error).build();
}
@ -106,6 +105,7 @@ public abstract class AbstractJsonPathProcessor extends AbstractProcessor {
/**
* An optional hook to act on the compute value
*/
abstract void cacheComputedValue(String subject, String input, JsonPath computedJsonPath);
/**

View File

@ -57,7 +57,7 @@ public class SplitJson extends AbstractJsonPathProcessor {
public static final PropertyDescriptor ARRAY_JSON_PATH_EXPRESSION = new PropertyDescriptor.Builder()
.name("JsonPath Expression")
.description("A JsonPath expression that indicates the array element to split into JSON/scalar fragments.")
.addValidator(StandardValidators.NON_EMPTY_VALIDATOR)
.addValidator(StandardValidators.NON_EMPTY_VALIDATOR) // Full validation/caching occurs in #customValidate
.required(true)
.build();

View File

@ -0,0 +1,487 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.nifi.processors.standard.util;
import com.jayway.jsonpath.Filter;
import com.jayway.jsonpath.Predicate;
import com.jayway.jsonpath.internal.Utils;
import com.jayway.jsonpath.internal.token.*;
import org.apache.nifi.util.StringUtils;
import java.util.ArrayList;
import java.util.LinkedList;
import java.util.List;
import java.util.regex.Pattern;
import static java.util.Arrays.asList;
/**
* JsonPathExpressionValidator performs the same execution as com.jayway.jsonpath.internal.PathCompiler, but does not throw
* exceptions when an invalid path segment is found.
* Limited access to create JsonPath objects requires a separate flow of execution in avoiding exceptions.
*
* @see <a href="https://github.com/jayway/JsonPath">https://github.com/jayway/JsonPath</a>
*/
public class JsonPathExpressionValidator {
private static final String PROPERTY_OPEN = "['";
private static final String PROPERTY_CLOSE = "']";
private static final char DOCUMENT = '$';
private static final char ANY = '*';
private static final char PERIOD = '.';
private static final char BRACKET_OPEN = '[';
private static final char BRACKET_CLOSE = ']';
private static final char SPACE = ' ';
/**
* Performs a validation of a provided JsonPath expression.
* <p/>
* Typically this is used in the context of:
* <code>
* <pre>
* JsonPath compiledJsonPath = null;
* if (JsonPathExpressionValidator.isValidExpression(input)) {
* compiledJsonPath = JsonPath.compile(input);
* ...
* } else {
* // error handling
* }
* </pre>
* </code>
*
* @param path to evaluate for validity
* @param filters applied to path expression; this is typically unused in the context of Processors
* @return true if the specified path is valid; false otherwise
*/
public static boolean isValidExpression(String path, Predicate... filters) {
path = path.trim();
if (StringUtils.isBlank(path)) {
// "Path may not be null empty"
return false;
}
if (path.endsWith("..")) {
// "A path can not end with a scan."
return false;
}
LinkedList<Predicate> filterList = new LinkedList<Predicate>(asList(filters));
if (path.charAt(0) != '$' && path.charAt(0) != '@') {
path = "$." + path;
}
if (path.charAt(0) == '@') {
path = "$" + path.substring(1);
}
if (path.length() > 1 && path.charAt(1) != '.' && path.charAt(1) != '[') {
// "Invalid path " + path
return false;
}
RootPathToken root = null;
int i = 0;
int positions;
String fragment = "";
do {
char current = path.charAt(i);
switch (current) {
case SPACE:
// "Space not allowed in path"
return false;
case DOCUMENT:
fragment = "$";
i++;
break;
case BRACKET_OPEN:
positions = fastForwardUntilClosed(path, i);
fragment = path.substring(i, i + positions);
i += positions;
break;
case PERIOD:
i++;
if (path.charAt(i) == PERIOD) {
//This is a deep scan
fragment = "..";
i++;
} else {
positions = fastForward(path, i);
if (positions == 0) {
continue;
} else if (positions == 1 && path.charAt(i) == '*') {
fragment = new String("[*]");
} else {
fragment = PROPERTY_OPEN + path.substring(i, i + positions) + PROPERTY_CLOSE;
}
i += positions;
}
break;
case ANY:
fragment = new String("[*]");
i++;
break;
default:
positions = fastForward(path, i);
fragment = PROPERTY_OPEN + path.substring(i, i + positions) + PROPERTY_CLOSE;
i += positions;
break;
}
/*
* Analyze each component represented by a fragment. If there is a failure to properly evaluate,
* a null result is returned
*/
PathToken analyzedComponent = PathComponentAnalyzer.analyze(fragment, filterList);
if (analyzedComponent == null) {
return false;
}
if (root == null) {
root = (RootPathToken) analyzedComponent;
} else {
root.append(analyzedComponent);
}
} while (i < path.length());
return true;
}
private static int fastForward(String s, int index) {
int skipCount = 0;
while (index < s.length()) {
char current = s.charAt(index);
if (current == PERIOD || current == BRACKET_OPEN || current == SPACE) {
break;
}
index++;
skipCount++;
}
return skipCount;
}
private static int fastForwardUntilClosed(String s, int index) {
int skipCount = 0;
int nestedBrackets = 0;
//First char is always '[' no need to check it
index++;
skipCount++;
while (index < s.length()) {
char current = s.charAt(index);
index++;
skipCount++;
if (current == BRACKET_CLOSE && nestedBrackets == 0) {
break;
}
if (current == BRACKET_OPEN) {
nestedBrackets++;
}
if (current == BRACKET_CLOSE) {
nestedBrackets--;
}
}
return skipCount;
}
static class PathComponentAnalyzer {
private static final Pattern FILTER_PATTERN = Pattern.compile("^\\[\\s*\\?\\s*[,\\s*\\?]*?\\s*]$"); //[?] or [?, ?, ...]
private int i;
private char current;
private final LinkedList<Predicate> filterList;
private final String pathFragment;
PathComponentAnalyzer(String pathFragment, LinkedList<Predicate> filterList) {
this.pathFragment = pathFragment;
this.filterList = filterList;
}
static PathToken analyze(String pathFragment, LinkedList<Predicate> filterList) {
return new PathComponentAnalyzer(pathFragment, filterList).analyze();
}
public PathToken analyze() {
if ("$".equals(pathFragment)) return new RootPathToken();
else if ("..".equals(pathFragment)) return new ScanPathToken();
else if ("[*]".equals(pathFragment)) return new WildcardPathToken();
else if (".*".equals(pathFragment)) return new WildcardPathToken();
else if ("[?]".equals(pathFragment)) return new PredicatePathToken(filterList.poll());
else if (FILTER_PATTERN.matcher(pathFragment).matches()) {
final int criteriaCount = Utils.countMatches(pathFragment, "?");
List<Predicate> filters = new ArrayList<>(criteriaCount);
for (int i = 0; i < criteriaCount; i++) {
filters.add(filterList.poll());
}
return new PredicatePathToken(filters);
}
this.i = 0;
do {
current = pathFragment.charAt(i);
switch (current) {
case '?':
return analyzeCriteriaSequence4();
case '\'':
return analyzeProperty();
default:
if (Character.isDigit(current) || current == ':' || current == '-' || current == '@') {
return analyzeArraySequence();
}
i++;
break;
}
} while (i < pathFragment.length());
//"Could not analyze path component: " + pathFragment
return null;
}
public PathToken analyzeCriteriaSequence4() {
int[] bounds = findFilterBounds();
if (bounds == null) {
return null;
}
i = bounds[1];
return new PredicatePathToken(Filter.parse(pathFragment.substring(bounds[0], bounds[1])));
}
int[] findFilterBounds() {
int end = 0;
int start = i;
while (pathFragment.charAt(start) != '[') {
start--;
}
int mem = ' ';
int curr = start;
boolean inProp = false;
int openSquareBracket = 0;
int openBrackets = 0;
while (end == 0) {
char c = pathFragment.charAt(curr);
switch (c) {
case '(':
if (!inProp) openBrackets++;
break;
case ')':
if (!inProp) openBrackets--;
break;
case '[':
if (!inProp) openSquareBracket++;
break;
case ']':
if (!inProp) {
openSquareBracket--;
if (openBrackets == 0) {
end = curr + 1;
}
}
break;
case '\'':
if (mem == '\\') {
break;
}
inProp = !inProp;
break;
default:
break;
}
mem = c;
curr++;
}
if (openBrackets != 0 || openSquareBracket != 0) {
// "Filter brackets are not balanced"
return null;
}
return new int[]{start, end};
}
//"['foo']"
private PathToken analyzeProperty() {
List<String> properties = new ArrayList<String>();
StringBuilder buffer = new StringBuilder();
boolean propertyIsOpen = false;
while (current != ']') {
switch (current) {
case '\'':
if (propertyIsOpen) {
properties.add(buffer.toString());
buffer.setLength(0);
propertyIsOpen = false;
} else {
propertyIsOpen = true;
}
break;
default:
if (propertyIsOpen) {
buffer.append(current);
}
break;
}
current = pathFragment.charAt(++i);
}
return new PropertyPathToken(properties);
}
//"[-1:]" sliceFrom
//"[:1]" sliceTo
//"[0:5]" sliceBetween
//"[1]"
//"[1,2,3]"
//"[(@.length - 1)]"
private PathToken analyzeArraySequence() {
StringBuilder buffer = new StringBuilder();
List<Integer> numbers = new ArrayList<Integer>();
boolean contextSize = (current == '@');
boolean sliceTo = false;
boolean sliceFrom = false;
boolean sliceBetween = false;
boolean indexSequence = false;
boolean singleIndex = false;
if (contextSize) {
current = pathFragment.charAt(++i);
current = pathFragment.charAt(++i);
while (current != '-') {
if (current == ' ' || current == '(' || current == ')') {
current = pathFragment.charAt(++i);
continue;
}
buffer.append(current);
current = pathFragment.charAt(++i);
}
String function = buffer.toString();
buffer.setLength(0);
if (!function.equals("size") && !function.equals("length")) {
// "Invalid function: @." + function + ". Supported functions are: [(@.length - n)] and [(@.size() - n)]"
return null;
}
while (current != ')') {
if (current == ' ') {
current = pathFragment.charAt(++i);
continue;
}
buffer.append(current);
current = pathFragment.charAt(++i);
}
} else {
while (Character.isDigit(current) || current == ',' || current == ' ' || current == ':' || current == '-') {
switch (current) {
case ' ':
break;
case ':':
if (buffer.length() == 0) {
//this is a tail slice [:12]
sliceTo = true;
current = pathFragment.charAt(++i);
while (Character.isDigit(current) || current == ' ' || current == '-') {
if (current != ' ') {
buffer.append(current);
}
current = pathFragment.charAt(++i);
}
numbers.add(Integer.parseInt(buffer.toString()));
buffer.setLength(0);
} else {
//we now this starts with [12:???
numbers.add(Integer.parseInt(buffer.toString()));
buffer.setLength(0);
current = pathFragment.charAt(++i);
//this is a tail slice [:12]
while (Character.isDigit(current) || current == ' ' || current == '-') {
if (current != ' ') {
buffer.append(current);
}
current = pathFragment.charAt(++i);
}
if (buffer.length() == 0) {
sliceFrom = true;
} else {
sliceBetween = true;
numbers.add(Integer.parseInt(buffer.toString()));
buffer.setLength(0);
}
}
break;
case ',':
numbers.add(Integer.parseInt(buffer.toString()));
buffer.setLength(0);
indexSequence = true;
break;
default:
buffer.append(current);
break;
}
if (current == ']') {
break;
}
current = pathFragment.charAt(++i);
}
}
if (buffer.length() > 0) {
numbers.add(Integer.parseInt(buffer.toString()));
}
singleIndex = (numbers.size() == 1) && !sliceTo && !sliceFrom && !contextSize;
ArrayPathToken.Operation operation = null;
if (singleIndex) operation = ArrayPathToken.Operation.SINGLE_INDEX;
else if (indexSequence) operation = ArrayPathToken.Operation.INDEX_SEQUENCE;
else if (sliceFrom) operation = ArrayPathToken.Operation.SLICE_FROM;
else if (sliceTo) operation = ArrayPathToken.Operation.SLICE_TO;
else if (sliceBetween) operation = ArrayPathToken.Operation.SLICE_BETWEEN;
else if (contextSize) operation = ArrayPathToken.Operation.CONTEXT_SIZE;
assert operation != null;
return new ArrayPathToken(numbers, operation);
}
}
}

View File

@ -67,6 +67,18 @@ public class TestEvaluateJsonPath {
Assert.fail("Processor incorrectly ran with an invalid configuration of multiple paths specified as attributes for a destination of content.");
}
@Test(expected = AssertionError.class)
public void testInvalidConfiguration_invalidJsonPath_space() throws Exception {
final TestRunner testRunner = TestRunners.newTestRunner(new EvaluateJsonPath());
testRunner.setProperty(EvaluateJsonPath.DESTINATION, EvaluateJsonPath.DESTINATION_CONTENT);
testRunner.setProperty("JsonPath1", "$[0]. _id");
testRunner.enqueue(JSON_SNIPPET);
testRunner.run();
Assert.fail("Processor incorrectly ran with an invalid configuration of multiple paths specified as attributes for a destination of content.");
}
@Test
public void testConfiguration_destinationAttributes_twoPaths() throws Exception {
final TestRunner testRunner = TestRunners.newTestRunner(new EvaluateJsonPath());