NIFI-5169 This closes #2705. Upgrade to JSONPath 2.4

NIFI-5169 Added explicit checking for empty strings in JsonPath expression property.

Signed-off-by: joewitt <joewitt@apache.org>
This commit is contained in:
Mike Thomsen 2018-05-15 05:49:45 -04:00 committed by joewitt
parent 06d45c3a6e
commit 716587d09f
6 changed files with 52 additions and 511 deletions

View File

@ -76,7 +76,7 @@
<dependency>
<groupId>com.jayway.jsonpath</groupId>
<artifactId>json-path</artifactId>
<version>2.0.0</version>
<version>2.4.0</version>
</dependency>
<dependency>
<groupId>com.fasterxml.jackson.core</groupId>

View File

@ -29,8 +29,8 @@ import org.apache.nifi.flowfile.FlowFile;
import org.apache.nifi.processor.AbstractProcessor;
import org.apache.nifi.processor.ProcessSession;
import org.apache.nifi.processor.io.InputStreamCallback;
import org.apache.nifi.processors.standard.util.JsonPathExpressionValidator;
import org.apache.nifi.stream.io.BufferedInputStream;
import org.apache.nifi.util.StringUtils;
import java.io.IOException;
import java.io.InputStream;
@ -112,11 +112,15 @@ public abstract class AbstractJsonPathProcessor extends AbstractProcessor {
public ValidationResult validate(final String subject, final String input, final ValidationContext context) {
String error = null;
if (isStale(subject, input)) {
if (JsonPathExpressionValidator.isValidExpression(input)) {
JsonPath compiledJsonPath = JsonPath.compile(input);
cacheComputedValue(subject, input, compiledJsonPath);
if (!StringUtils.isBlank(input)) {
try {
JsonPath compiledJsonPath = JsonPath.compile(input);
cacheComputedValue(subject, input, compiledJsonPath);
} catch (Exception ex) {
error = String.format("specified expression was not valid: %s", input);
}
} else {
error = "specified expression was not valid: " + input;
error = "the expression cannot be empty.";
}
}
return new ValidationResult.Builder().subject(subject).valid(error == null).explanation(error).build();

View File

@ -1,503 +0,0 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.nifi.processors.standard.util;
import com.jayway.jsonpath.Filter;
import com.jayway.jsonpath.Predicate;
import com.jayway.jsonpath.internal.Utils;
import com.jayway.jsonpath.internal.token.ArrayPathToken;
import com.jayway.jsonpath.internal.token.PathToken;
import com.jayway.jsonpath.internal.token.PredicatePathToken;
import com.jayway.jsonpath.internal.token.PropertyPathToken;
import com.jayway.jsonpath.internal.token.RootPathToken;
import com.jayway.jsonpath.internal.token.ScanPathToken;
import com.jayway.jsonpath.internal.token.WildcardPathToken;
import org.apache.nifi.util.StringUtils;
import java.util.ArrayList;
import java.util.LinkedList;
import java.util.List;
import java.util.regex.Pattern;
import static java.util.Arrays.asList;
/**
* JsonPathExpressionValidator performs the same execution as com.jayway.jsonpath.internal.PathCompiler, but does not throw exceptions when an invalid path segment is found. Limited access to create
* JsonPath objects requires a separate flow of execution in avoiding exceptions.
*
* @see
* <a href="https://github.com/jayway/JsonPath">https://github.com/jayway/JsonPath</a>
*/
public class JsonPathExpressionValidator {
private static final String PROPERTY_OPEN = "['";
private static final String PROPERTY_CLOSE = "']";
private static final char DOCUMENT = '$';
private static final char ANY = '*';
private static final char PERIOD = '.';
private static final char BRACKET_OPEN = '[';
private static final char BRACKET_CLOSE = ']';
private static final char SPACE = ' ';
/**
* Performs a validation of a provided JsonPath expression.
* <p/>
* Typically this is used in the context of:
* <code>
* <pre>
* JsonPath compiledJsonPath = null;
* if (JsonPathExpressionValidator.isValidExpression(input)) {
* compiledJsonPath = JsonPath.compile(input);
* ...
* } else {
* // error handling
* }
* </pre>
* </code>
*
* @param path to evaluate for validity
* @param filters applied to path expression; this is typically unused in the context of Processors
* @return true if the specified path is valid; false otherwise
*/
public static boolean isValidExpression(String path, Predicate... filters) {
path = path.trim();
if (StringUtils.isBlank(path)) {
// "Path may not be null empty"
return false;
}
if (path.endsWith("..")) {
// "A path can not end with a scan."
return false;
}
LinkedList<Predicate> filterList = new LinkedList<>(asList(filters));
if (path.charAt(0) != '$' && path.charAt(0) != '@') {
path = "$." + path;
}
if (path.charAt(0) == '@') {
path = "$" + path.substring(1);
}
if (path.length() > 1 && path.charAt(1) != '.' && path.charAt(1) != '[') {
// "Invalid path " + path
return false;
}
RootPathToken root = null;
int i = 0;
int positions;
String fragment = "";
do {
char current = path.charAt(i);
switch (current) {
case SPACE:
// "Space not allowed in path"
return false;
case DOCUMENT:
fragment = "$";
i++;
break;
case BRACKET_OPEN:
positions = fastForwardUntilClosed(path, i);
fragment = path.substring(i, i + positions);
i += positions;
break;
case PERIOD:
i++;
if (path.charAt(i) == PERIOD) {
//This is a deep scan
fragment = "..";
i++;
} else {
positions = fastForward(path, i);
if (positions == 0) {
continue;
} else if (positions == 1 && path.charAt(i) == '*') {
fragment = "[*]";
} else {
fragment = PROPERTY_OPEN + path.substring(i, i + positions) + PROPERTY_CLOSE;
}
i += positions;
}
break;
case ANY:
fragment = "[*]";
i++;
break;
default:
positions = fastForward(path, i);
fragment = PROPERTY_OPEN + path.substring(i, i + positions) + PROPERTY_CLOSE;
i += positions;
break;
}
/*
* Analyze each component represented by a fragment. If there is a failure to properly evaluate,
* a null result is returned
*/
PathToken analyzedComponent = PathComponentAnalyzer.analyze(fragment, filterList);
if (analyzedComponent == null) {
return false;
}
if (root == null) {
root = (RootPathToken) analyzedComponent;
} else {
root.append(analyzedComponent);
}
} while (i < path.length());
return true;
}
private static int fastForward(String s, int index) {
int skipCount = 0;
while (index < s.length()) {
char current = s.charAt(index);
if (current == PERIOD || current == BRACKET_OPEN || current == SPACE) {
break;
}
index++;
skipCount++;
}
return skipCount;
}
private static int fastForwardUntilClosed(String s, int index) {
int skipCount = 0;
int nestedBrackets = 0;
//First char is always '[' no need to check it
index++;
skipCount++;
while (index < s.length()) {
char current = s.charAt(index);
index++;
skipCount++;
if (current == BRACKET_CLOSE && nestedBrackets == 0) {
break;
}
if (current == BRACKET_OPEN) {
nestedBrackets++;
}
if (current == BRACKET_CLOSE) {
nestedBrackets--;
}
}
return skipCount;
}
static class PathComponentAnalyzer {
private static final Pattern FILTER_PATTERN = Pattern.compile("^\\[\\s*\\?\\s*[,\\s*\\?]*?\\s*]$"); //[?] or [?, ?, ...]
private int i;
private char current;
private final LinkedList<Predicate> filterList;
private final String pathFragment;
PathComponentAnalyzer(String pathFragment, LinkedList<Predicate> filterList) {
this.pathFragment = pathFragment;
this.filterList = filterList;
}
static PathToken analyze(String pathFragment, LinkedList<Predicate> filterList) {
return new PathComponentAnalyzer(pathFragment, filterList).analyze();
}
public PathToken analyze() {
if ("$".equals(pathFragment)) {
return new RootPathToken();
} else if ("..".equals(pathFragment)) {
return new ScanPathToken();
} else if ("[*]".equals(pathFragment)) {
return new WildcardPathToken();
} else if (".*".equals(pathFragment)) {
return new WildcardPathToken();
} else if ("[?]".equals(pathFragment)) {
return new PredicatePathToken(filterList.poll());
} else if (FILTER_PATTERN.matcher(pathFragment).matches()) {
final int criteriaCount = Utils.countMatches(pathFragment, "?");
List<Predicate> filters = new ArrayList<>(criteriaCount);
for (int i = 0; i < criteriaCount; i++) {
filters.add(filterList.poll());
}
return new PredicatePathToken(filters);
}
this.i = 0;
do {
current = pathFragment.charAt(i);
switch (current) {
case '?':
return analyzeCriteriaSequence4();
case '\'':
return analyzeProperty();
default:
if (Character.isDigit(current) || current == ':' || current == '-' || current == '@') {
return analyzeArraySequence();
}
i++;
break;
}
} while (i < pathFragment.length());
//"Could not analyze path component: " + pathFragment
return null;
}
public PathToken analyzeCriteriaSequence4() {
int[] bounds = findFilterBounds();
if (bounds == null) {
return null;
}
i = bounds[1];
return new PredicatePathToken(Filter.parse(pathFragment.substring(bounds[0], bounds[1])));
}
int[] findFilterBounds() {
int end = 0;
int start = i;
while (pathFragment.charAt(start) != '[') {
start--;
}
int mem = ' ';
int curr = start;
boolean inProp = false;
int openSquareBracket = 0;
int openBrackets = 0;
while (end == 0) {
char c = pathFragment.charAt(curr);
switch (c) {
case '(':
if (!inProp) {
openBrackets++;
}
break;
case ')':
if (!inProp) {
openBrackets--;
}
break;
case '[':
if (!inProp) {
openSquareBracket++;
}
break;
case ']':
if (!inProp) {
openSquareBracket--;
if (openBrackets == 0) {
end = curr + 1;
}
}
break;
case '\'':
if (mem == '\\') {
break;
}
inProp = !inProp;
break;
default:
break;
}
mem = c;
curr++;
}
if (openBrackets != 0 || openSquareBracket != 0) {
// "Filter brackets are not balanced"
return null;
}
return new int[]{start, end};
}
//"['foo']"
private PathToken analyzeProperty() {
List<String> properties = new ArrayList<>();
StringBuilder buffer = new StringBuilder();
boolean propertyIsOpen = false;
while (current != ']') {
switch (current) {
case '\'':
if (propertyIsOpen) {
properties.add(buffer.toString());
buffer.setLength(0);
propertyIsOpen = false;
} else {
propertyIsOpen = true;
}
break;
default:
if (propertyIsOpen) {
buffer.append(current);
}
break;
}
current = pathFragment.charAt(++i);
}
return new PropertyPathToken(properties);
}
//"[-1:]" sliceFrom
//"[:1]" sliceTo
//"[0:5]" sliceBetween
//"[1]"
//"[1,2,3]"
//"[(@.length - 1)]"
private PathToken analyzeArraySequence() {
StringBuilder buffer = new StringBuilder();
List<Integer> numbers = new ArrayList<>();
boolean contextSize = (current == '@');
boolean sliceTo = false;
boolean sliceFrom = false;
boolean sliceBetween = false;
boolean indexSequence = false;
boolean singleIndex = false;
if (contextSize) {
current = pathFragment.charAt(++i);
current = pathFragment.charAt(++i);
while (current != '-') {
if (current == ' ' || current == '(' || current == ')') {
current = pathFragment.charAt(++i);
continue;
}
buffer.append(current);
current = pathFragment.charAt(++i);
}
String function = buffer.toString();
buffer.setLength(0);
if (!function.equals("size") && !function.equals("length")) {
// "Invalid function: @." + function + ". Supported functions are: [(@.length - n)] and [(@.size() - n)]"
return null;
}
while (current != ')') {
if (current == ' ') {
current = pathFragment.charAt(++i);
continue;
}
buffer.append(current);
current = pathFragment.charAt(++i);
}
} else {
while (Character.isDigit(current) || current == ',' || current == ' ' || current == ':' || current == '-') {
switch (current) {
case ' ':
break;
case ':':
if (buffer.length() == 0) {
//this is a tail slice [:12]
sliceTo = true;
current = pathFragment.charAt(++i);
while (Character.isDigit(current) || current == ' ' || current == '-') {
if (current != ' ') {
buffer.append(current);
}
current = pathFragment.charAt(++i);
}
numbers.add(Integer.parseInt(buffer.toString()));
buffer.setLength(0);
} else {
//we now this starts with [12:???
numbers.add(Integer.parseInt(buffer.toString()));
buffer.setLength(0);
current = pathFragment.charAt(++i);
//this is a tail slice [:12]
while (Character.isDigit(current) || current == ' ' || current == '-') {
if (current != ' ') {
buffer.append(current);
}
current = pathFragment.charAt(++i);
}
if (buffer.length() == 0) {
sliceFrom = true;
} else {
sliceBetween = true;
numbers.add(Integer.parseInt(buffer.toString()));
buffer.setLength(0);
}
}
break;
case ',':
numbers.add(Integer.parseInt(buffer.toString()));
buffer.setLength(0);
indexSequence = true;
break;
default:
buffer.append(current);
break;
}
if (current == ']') {
break;
}
current = pathFragment.charAt(++i);
}
}
if (buffer.length() > 0) {
numbers.add(Integer.parseInt(buffer.toString()));
}
singleIndex = (numbers.size() == 1) && !sliceTo && !sliceFrom && !contextSize;
ArrayPathToken.Operation operation = null;
if (singleIndex) {
operation = ArrayPathToken.Operation.SINGLE_INDEX;
} else if (indexSequence) {
operation = ArrayPathToken.Operation.INDEX_SEQUENCE;
} else if (sliceFrom) {
operation = ArrayPathToken.Operation.SLICE_FROM;
} else if (sliceTo) {
operation = ArrayPathToken.Operation.SLICE_TO;
} else if (sliceBetween) {
operation = ArrayPathToken.Operation.SLICE_BETWEEN;
} else if (contextSize) {
operation = ArrayPathToken.Operation.CONTEXT_SIZE;
}
assert operation != null;
return new ArrayPathToken(numbers, operation);
}
}
}

View File

@ -33,6 +33,8 @@ import java.io.OutputStream;
import java.nio.charset.StandardCharsets;
import java.nio.file.Path;
import java.nio.file.Paths;
import java.util.Arrays;
import java.util.List;
import static org.junit.Assert.assertEquals;
@ -50,6 +52,44 @@ public class TestEvaluateJsonPath {
Assert.fail("An improper JsonPath expression was not detected as being invalid.");
}
@Test
public void testUpgradeToJsonPath24() throws Exception {
final TestRunner testRunner = TestRunners.newTestRunner(new EvaluateJsonPath());
testRunner.setProperty(EvaluateJsonPath.DESTINATION, EvaluateJsonPath.DESTINATION_ATTRIBUTE);
List<String> badInputs = Arrays.asList("LoremIpsum []", "LoremIpsum[]", "$..", "$.xyz.");
for (String bad : badInputs) {
testRunner.setProperty("DefinitelyNotJsonPath", bad);
testRunner.enqueue(JSON_SNIPPET);
testRunner.assertNotValid();
}
/*
* Not sure why these passed with v2, but they do. These are weird strings that oddly worked before.
*/
List<String> testWhatUsedToPass = Arrays.asList("LoremIpsum@$Q#$^Q$%Q#", "TestTest['sdfadsf']#$%#$^#$^.xyz");
for (String old : testWhatUsedToPass) {
testRunner.setProperty("DefinitelyNotJsonPath", old);
testRunner.enqueue(JSON_SNIPPET);
testRunner.assertValid();
}
/*
* Test some obviously good JsonPath strings
*/
List<String> goodStrings = Arrays.asList("$", "$.xyz", "$['xyz']", "$.*['xyz']");
for (String good : goodStrings) {
testRunner.setProperty("DefinitelyNotJsonPath", good);
testRunner.enqueue(JSON_SNIPPET);
testRunner.assertValid();
}
testRunner.setProperty("DefinitelyNotJsonPath", " ");
testRunner.assertNotValid();
}
@Test
public void testInvalidJsonDocument() throws Exception {
final TestRunner testRunner = TestRunners.newTestRunner(new EvaluateJsonPath());

View File

@ -265,7 +265,7 @@
<dependency>
<groupId>com.jayway.jsonpath</groupId>
<artifactId>json-path</artifactId>
<version>2.0.0</version>
<version>2.4.0</version>
</dependency>
<dependency>
<groupId>org.apache.tika</groupId>

View File

@ -49,7 +49,7 @@
<dependency>
<groupId>com.jayway.jsonpath</groupId>
<artifactId>json-path</artifactId>
<version>2.0.0</version>
<version>2.4.0</version>
</dependency>
<dependency>
<groupId>com.fasterxml.jackson.core</groupId>