NIFI-12513 Added UriUtils to nifi-utils and updated InvokeHTTP

The UriUtils class allows the construction of a valid java.net.URI with a single string even though there maybe illegal characters in the path, query and/or fragment sections(s) of the URI. The create method uses regular expressions from Spring Framework UriComponentsBuilder and provides capabilities closer to the deprecated constructor for java.net.URL.

This closes #8189

Signed-off-by: David Handermann <exceptionfactory@apache.org>
This commit is contained in:
dan-s1 2023-12-25 14:48:35 +00:00 committed by exceptionfactory
parent ccf3f35076
commit 3b8ff2299f
No known key found for this signature in database
GPG Key ID: 29B6A52D2AAE8DBA
6 changed files with 135 additions and 10 deletions

9
NOTICE
View File

@ -127,4 +127,11 @@ This includes derived works from Apache Kafka available under Apache Software Li
The derived work is adapted from
https://github.com/apache/kafka/blob/trunk/clients/src/main/java/org/apache/kafka/common/security/kerberos/KerberosLogin.java
and can be found in
nifi-nar-bundles/nifi-kafka-bundle/nifi-kafka-2-6-processors/src/main/java/org/apache/nifi/processors/kafka/pubsub/CustomKerberosLogin.java
nifi-nar-bundles/nifi-kafka-bundle/nifi-kafka-2-6-processors/src/main/java/org/apache/nifi/processors/kafka/pubsub/CustomKerberosLogin.java
This includes derived works from Spring Framework available under Apache Software License V2
Copyright 2002-2023 the original author or authors.
The derived work is adapted from
https://github.com/spring-projects/spring-framework/blob/main/spring-web/src/main/java/org/springframework/web/util/UriComponentsBuilder.java
and can be found in
nifi-commons/nifi-utils/src/main/java/org/apache/nifi/util/UriUtils.java

View File

@ -35,6 +35,7 @@ import org.apache.nifi.expression.AttributeExpression.ResultType;
import org.apache.nifi.flowfile.FlowFile;
import org.apache.nifi.processor.DataUnit;
import org.apache.nifi.util.FormatUtils;
import org.apache.nifi.util.UriUtils;
public class StandardValidators {
@ -540,7 +541,8 @@ public class StandardValidators {
try {
final String evaluatedInput = context.newPropertyValue(input).evaluateAttributeExpressions().getValue();
URI.create(evaluatedInput).toURL();
final URI uri = UriUtils.create(evaluatedInput);
uri.toURL();
return new ValidationResult.Builder().subject(subject).input(input).explanation("Valid URL").valid(true).build();
} catch (final Exception e) {
return new ValidationResult.Builder().subject(subject).input(input).explanation("Not a valid URL").valid(false).build();

View File

@ -0,0 +1,78 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.nifi.util;
import java.net.URI;
import java.net.URISyntaxException;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
/**
* Utility class providing java.net.URI utilities.
* The regular expressions in this class used to capture the various components of a URI were adapted from
* <a href="https://github.com/spring-projects/spring-framework/blob/main/spring-web/src/main/java/org/springframework/web/util/UriComponentsBuilder.java">UriComponentsBuilder</a>
*/
public class UriUtils {
private static final String SCHEME_PATTERN = "([^:/?#]+):";
private static final String USERINFO_PATTERN = "([^@\\[/?#]*)";
private static final String HOST_IPV4_PATTERN = "[^\\[/?#:]*";
private static final String HOST_IPV6_PATTERN = "\\[[\\p{XDigit}:.]*[%\\p{Alnum}]*]";
private static final String HOST_PATTERN = "(" + HOST_IPV6_PATTERN + "|" + HOST_IPV4_PATTERN + ")";
private static final String PORT_PATTERN = "(\\{[^}]+\\}?|[^/?#]*)";
private static final String PATH_PATTERN = "([^?#]*)";
private static final String QUERY_PATTERN = "([^#]*)";
private static final String LAST_PATTERN = "(.*)";
// Regex patterns that matches URIs. See RFC 3986, appendix B
private static final Pattern URI_PATTERN = Pattern.compile(
"^(" + SCHEME_PATTERN + ")?" + "(//(" + USERINFO_PATTERN + "@)?" + HOST_PATTERN + "(:" + PORT_PATTERN +
")?" + ")?" + PATH_PATTERN + "(\\?" + QUERY_PATTERN + ")?" + "(#" + LAST_PATTERN + ")?");
private UriUtils() {}
/**
* This method provides an alternative to the use of java.net.URI's single argument constructor and 'create' method.
* The drawbacks of the java.net.URI's single argument constructor and 'create' method are:
* <ul>
* <li>They do not provide quoting in the path section for any character not in the unreserved, punct, escaped, or other categories,
* and not equal to the slash character ('/') or the commercial-at character ('{@literal @}').</li>
* <li>They do not provide quoting for any illegal characters found in the query and fragment sections.</li>
* </ul>
* On the other hand, java.net.URI's seven argument constructor provides these quoting capabilities. In order
* to take advantage of this constructor, this method parses the given string into the arguments needed
* thereby allowing for instantiating a java.net.URI with the quoting of all illegal characters.
* @param uri String representing a URI.
* @return Instance of java.net.URI
* @throws URISyntaxException Thrown on parsing failures
*/
public static URI create(String uri) throws URISyntaxException {
final Matcher matcher = URI_PATTERN.matcher(uri);
if (matcher.matches()) {
final String scheme = matcher.group(2);
final String userInfo = matcher.group(5);
final String host = matcher.group(6);
final String port = matcher.group(8);
final String path = matcher.group(9);
final String query = matcher.group(11);
final String fragment = matcher.group(13);
return new URI(scheme, userInfo, host, port != null ? Integer.parseInt(port) : -1, path, query, fragment);
} else {
throw new IllegalArgumentException(uri + " is not a valid URI");
}
}
}

View File

@ -0,0 +1,39 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.nifi.util;
import org.junit.jupiter.api.Test;
import org.junit.jupiter.api.function.Executable;
import java.net.URISyntaxException;
import static org.junit.jupiter.api.Assertions.assertDoesNotThrow;
import static org.junit.jupiter.api.Assertions.assertThrows;
public class TestUriUtils {
@Test
void testValidUri() {
final String uri = "https://en.wikipedia.org/w/api.php?action=query&list=recentchanges&format=json&rcprop=user|comment|parsedcomment|timestamp|title|sizes|tags";
assertDoesNotThrow((Executable) () -> UriUtils.create(uri));
}
@Test
void testInvalidUri() {
final String uri = "http:// _";
assertThrows(URISyntaxException.class, () -> UriUtils.create(uri));
}
}

View File

@ -85,6 +85,7 @@ import org.apache.nifi.security.util.TlsConfiguration;
import org.apache.nifi.security.util.TlsException;
import org.apache.nifi.ssl.SSLContextService;
import org.apache.nifi.stream.io.StreamUtils;
import org.apache.nifi.util.UriUtils;
import javax.annotation.Nullable;
import javax.net.ssl.SSLContext;
@ -839,7 +840,8 @@ public class InvokeHTTP extends AbstractProcessor {
FlowFile responseFlowFile = null;
try {
final String urlProperty = trimToEmpty(context.getProperty(HTTP_URL).evaluateAttributeExpressions(requestFlowFile).getValue());
final URL url = URI.create(urlProperty).toURL();
final URI uri = UriUtils.create(urlProperty);
final URL url = uri.toURL();
Request httpRequest = configureRequest(context, session, requestFlowFile, url);
logRequest(logger, httpRequest);

View File

@ -48,11 +48,8 @@ import org.mockito.Answers;
import javax.net.ssl.SSLContext;
import javax.net.ssl.SSLSocketFactory;
import java.io.IOException;
import java.net.MalformedURLException;
import java.net.Proxy;
import java.net.URI;
import java.net.URISyntaxException;
import java.net.URL;
import java.nio.charset.StandardCharsets;
import java.security.GeneralSecurityException;
import java.time.ZonedDateTime;
@ -764,11 +761,11 @@ public class InvokeHTTPTest {
@ParameterizedTest(name = "{index} => When {0} http://baseUrl/{1}, filename of the response FlowFile should be {2}")
@MethodSource
public void testResponseFlowFileFilenameExtractedFromRemoteUrl(String httpMethod, String relativePath, String expectedFileName) throws MalformedURLException, URISyntaxException {
URL targetUrl = new URI("http", null, mockWebServer.getHostName(), mockWebServer.getPort(), String.format("/%s", relativePath), null, null).toURL();
public void testResponseFlowFileFilenameExtractedFromRemoteUrl(String httpMethod, String relativePath, String expectedFileName) {
//Build URL as a string to prevent double encoding
final String targetUrl = String.format("http://%s:%d/%s", mockWebServer.getHostName(), mockWebServer.getPort(), relativePath);
runner.setProperty(InvokeHTTP.HTTP_METHOD, httpMethod);
runner.setProperty(InvokeHTTP.HTTP_URL, targetUrl.toString());
runner.setProperty(InvokeHTTP.HTTP_URL, targetUrl);
runner.setProperty(InvokeHTTP.RESPONSE_FLOW_FILE_NAMING_STRATEGY, FlowFileNamingStrategy.URL_PATH.name());
Map<String, String> ffAttributes = new HashMap<>();