NIFI-6255 NIFI-6287: Hash function for expression language and record path.

NIFI-6255 NIFI-6287: Rebased to match the new expression language interface
NIFI-6255 NIFI-6287: Fix wildcard imports and unused imports
NIFI-6255 NIFI-6287: Move to the common codec DigetUtils
Update commons-codec

This closes #3624

Signed-off-by: Mike Thomsen <mthomsen@apache.org>
This commit is contained in:
Phillip Grenier 2019-07-24 15:45:13 -04:00 committed by Mike Thomsen
parent 45e626f6a8
commit 0f4b79b55e
No known key found for this signature in database
GPG Key ID: 88511C3D4CAD246F
11 changed files with 238 additions and 3 deletions

View File

@ -185,6 +185,7 @@ JSON_PATH_DELETE : 'jsonPathDelete';
REPEAT : 'repeat';
UUID3 : 'UUID3';
UUID5 : 'UUID5';
HASH : 'hash';
// 2 arg functions
SUBSTRING : 'substring';

View File

@ -73,10 +73,11 @@ tokens {
}
}
// functions that return Strings
zeroArgString : (TO_UPPER | TO_LOWER | TRIM | TO_STRING | URL_ENCODE | URL_DECODE | BASE64_ENCODE | BASE64_DECODE | ESCAPE_JSON | ESCAPE_XML | ESCAPE_CSV | ESCAPE_HTML3 | ESCAPE_HTML4 | UNESCAPE_JSON | UNESCAPE_XML | UNESCAPE_CSV | UNESCAPE_HTML3 | UNESCAPE_HTML4 | EVALUATE_EL_STRING) LPAREN! RPAREN!;
oneArgString : ((SUBSTRING_BEFORE | SUBSTRING_BEFORE_LAST | SUBSTRING_AFTER | SUBSTRING_AFTER_LAST | REPLACE_NULL | REPLACE_EMPTY |
PREPEND | APPEND | STARTS_WITH | ENDS_WITH | CONTAINS | JOIN | JSON_PATH | JSON_PATH_DELETE | FROM_RADIX | UUID3 | UUID5) LPAREN! anyArg RPAREN!) |
PREPEND | APPEND | STARTS_WITH | ENDS_WITH | CONTAINS | JOIN | JSON_PATH | JSON_PATH_DELETE | FROM_RADIX | UUID3 | UUID5 | HASH) LPAREN! anyArg RPAREN!) |
(TO_RADIX LPAREN! anyArg (COMMA! anyArg)? RPAREN!);
twoArgString : ((REPLACE | REPLACE_FIRST | REPLACE_ALL | IF_ELSE | JSON_PATH_SET | JSON_PATH_ADD) LPAREN! anyArg COMMA! anyArg RPAREN!) |
((SUBSTRING | FORMAT | PAD_LEFT | PAD_RIGHT | REPEAT) LPAREN! anyArg (COMMA! anyArg)? RPAREN!);

View File

@ -57,6 +57,7 @@ import org.apache.nifi.attribute.expression.language.evaluation.functions.GetDel
import org.apache.nifi.attribute.expression.language.evaluation.functions.GetStateVariableEvaluator;
import org.apache.nifi.attribute.expression.language.evaluation.functions.GreaterThanEvaluator;
import org.apache.nifi.attribute.expression.language.evaluation.functions.GreaterThanOrEqualEvaluator;
import org.apache.nifi.attribute.expression.language.evaluation.functions.HashEvaluator;
import org.apache.nifi.attribute.expression.language.evaluation.functions.HostnameEvaluator;
import org.apache.nifi.attribute.expression.language.evaluation.functions.IPEvaluator;
import org.apache.nifi.attribute.expression.language.evaluation.functions.IfElseEvaluator;
@ -177,6 +178,7 @@ import static org.apache.nifi.attribute.expression.language.antlr.AttributeExpre
import static org.apache.nifi.attribute.expression.language.antlr.AttributeExpressionParser.GET_STATE_VALUE;
import static org.apache.nifi.attribute.expression.language.antlr.AttributeExpressionParser.GREATER_THAN;
import static org.apache.nifi.attribute.expression.language.antlr.AttributeExpressionParser.GREATER_THAN_OR_EQUAL;
import static org.apache.nifi.attribute.expression.language.antlr.AttributeExpressionParser.HASH;
import static org.apache.nifi.attribute.expression.language.antlr.AttributeExpressionParser.HOSTNAME;
import static org.apache.nifi.attribute.expression.language.antlr.AttributeExpressionParser.IF_ELSE;
import static org.apache.nifi.attribute.expression.language.antlr.AttributeExpressionParser.IN;
@ -682,6 +684,11 @@ public class ExpressionCompiler {
toStringEvaluator(argEvaluators.get(0), "first argument to replaceAll"),
toStringEvaluator(argEvaluators.get(1), "second argument to replaceAll")), "replaceAll");
}
case HASH: {
verifyArgCount(argEvaluators, 1, "hash");
return addToken(new HashEvaluator(toStringEvaluator(subjectEvaluator),
toStringEvaluator(argEvaluators.get(0), "first argument to hash")), "hash");
}
case PAD_LEFT: {
if (argEvaluators.size() == 1) {
return addToken(new PadLeftEvaluator(toStringEvaluator(subjectEvaluator),

View File

@ -0,0 +1,67 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.nifi.attribute.expression.language.evaluation.functions;
import java.security.MessageDigest;
import java.security.NoSuchAlgorithmException;
import java.security.Security;
import org.apache.commons.codec.digest.DigestUtils;
import org.apache.nifi.attribute.expression.language.EvaluationContext;
import org.apache.nifi.attribute.expression.language.evaluation.Evaluator;
import org.apache.nifi.attribute.expression.language.evaluation.QueryResult;
import org.apache.nifi.attribute.expression.language.evaluation.StringEvaluator;
import org.apache.nifi.attribute.expression.language.evaluation.StringQueryResult;
import org.apache.nifi.attribute.expression.language.exception.AttributeExpressionLanguageException;
public class HashEvaluator extends StringEvaluator {
private static final String SUPPORTED_ALGORITHMS = String.join(", ", Security.getAlgorithms("MessageDigest"));
private final Evaluator<String> algorithm;
private final Evaluator<String> subject;
public HashEvaluator(final Evaluator<String> subject, final Evaluator<String> algorithm) {
this.subject = subject;
this.algorithm = algorithm;
}
@Override
public QueryResult<String> evaluate(EvaluationContext context) {
final String subjectValue = subject.evaluate(context).getValue();
if (subjectValue == null) {
return new StringQueryResult(null);
}
final String algorithmValue = algorithm.evaluate(context).getValue();
final MessageDigest digest = getDigest(algorithmValue);
String encoded = new DigestUtils(digest).digestAsHex(subjectValue);
return new StringQueryResult(encoded);
}
@Override
public Evaluator<?> getSubjectEvaluator() {
return subject;
}
private MessageDigest getDigest(String algorithm){
try {
return MessageDigest.getInstance(algorithm);
} catch (NoSuchAlgorithmException e) {
throw new AttributeExpressionLanguageException("Invalid hash algorithm: " + algorithm + " not in set [" + SUPPORTED_ALGORITHMS + "]", e);
}
}
}

View File

@ -2058,6 +2058,25 @@ public class TestQuery {
verifyEquals("${literal(true):ifElse(false, 'b')}", attributes, "false");
}
@Test
public void testHash() {
final Map<String, String> attributes = new HashMap<>();
attributes.put("str_attr", "string value");
attributes.put("nbr_attr", "10");
verifyEquals("${literal('john'):hash('MD5')}", attributes, "527bd5b5d689e2c32ae974c6229ff785");
verifyEquals("${str_attr:hash('MD5')}", attributes, "64e58419496c7248b4ef25731f88b8c3");
verifyEquals("${str_attr:hash('SHA-1')}", attributes, "34990db823e7bb2b47278a7fbf08c62d9e8e4307");
verifyEquals("${str_attr:hash('SHA-256')}", attributes, "9b6a1a9167a5caf3f5948413faa89e0ec0de89e12bef55327442e60dcc0e8c9b");
verifyEquals("${nbr_attr:toNumber():hash('MD5')}", attributes, "d3d9446802a44259755d38e6d163e820");
verifyEquals("${nbr_attr:hash('MD5')}", attributes, "d3d9446802a44259755d38e6d163e820");
}
@Test(expected = AttributeExpressionLanguageException.class)
public void testHashFailure() {
final Map<String, String> attributes = new HashMap<>();
verifyEquals("${literal('john'):hash('NOT_A_ALGO')}", attributes, "527bd5b5d689e2c32ae974c6229ff785");
}
@Test
public void testThread() {
final Map<String, String> attributes = new HashMap<>();

View File

@ -81,5 +81,10 @@
<artifactId>caffeine</artifactId>
<version>2.8.1</version>
</dependency>
</dependencies>
<dependency>
<groupId>commons-codec</groupId>
<artifactId>commons-codec</artifactId>
<version>1.14</version>
</dependency>
</dependencies>
</project>

View File

@ -0,0 +1,72 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.nifi.record.path.functions;
import org.apache.commons.codec.digest.DigestUtils;
import org.apache.nifi.record.path.FieldValue;
import org.apache.nifi.record.path.RecordPathEvaluationContext;
import org.apache.nifi.record.path.StandardFieldValue;
import org.apache.nifi.record.path.exception.RecordPathException;
import org.apache.nifi.record.path.paths.RecordPathSegment;
import org.apache.nifi.record.path.util.RecordPathUtils;
import org.apache.nifi.serialization.record.util.DataTypeUtils;
import java.security.MessageDigest;
import java.security.NoSuchAlgorithmException;
import java.security.Security;
import java.util.stream.Stream;
public class Hash extends RecordPathSegment {
private static final String SUPPORTED_ALGORITHMS = String.join(", ", Security.getAlgorithms("MessageDigest"));
private final RecordPathSegment recordPath;
private final RecordPathSegment algorithmPath;
public Hash(final RecordPathSegment recordPath, final RecordPathSegment algorithmPath, final boolean absolute) {
super("hash", null, absolute);
this.recordPath = recordPath;
this.algorithmPath = algorithmPath;
}
@Override
public Stream<FieldValue> evaluate(final RecordPathEvaluationContext context) {
final Stream<FieldValue> fieldValues = recordPath.evaluate(context);
return fieldValues.filter(fv -> fv.getValue() != null)
.map(fv -> {
final String algorithmValue = RecordPathUtils.getFirstStringValue(algorithmPath, context);
if (algorithmValue == null || algorithmValue.isEmpty()) {
return fv;
}
final MessageDigest digest = getDigest(algorithmValue);
final String value = DataTypeUtils.toString(fv.getValue(), (String) null);
String encoded = new DigestUtils(digest).digestAsHex(value);
return new StandardFieldValue(encoded, fv.getField(), fv.getParent().orElse(null));
});
}
private MessageDigest getDigest(String algorithm){
try {
return MessageDigest.getInstance(algorithm);
} catch (NoSuchAlgorithmException e) {
throw new RecordPathException("Invalid hash algorithm: " + algorithm + "not in set [" + SUPPORTED_ALGORITHMS + "]", e);
}
}
}

View File

@ -69,6 +69,7 @@ import org.apache.nifi.record.path.functions.Base64Encode;
import org.apache.nifi.record.path.functions.Concat;
import org.apache.nifi.record.path.functions.Format;
import org.apache.nifi.record.path.functions.FieldName;
import org.apache.nifi.record.path.functions.Hash;
import org.apache.nifi.record.path.functions.PadLeft;
import org.apache.nifi.record.path.functions.PadRight;
import org.apache.nifi.record.path.functions.Replace;
@ -305,6 +306,10 @@ public class RecordPathCompiler {
final RecordPathSegment[] args = getArgPaths(argumentListTree, 1, functionName, absolute);
return new Base64Decode(args[0], absolute);
}
case "hash":{
final RecordPathSegment[] args = getArgPaths(argumentListTree, 2, functionName, absolute);
return new Hash(args[0], args[1], absolute);
}
case "padLeft": {
final int numArgs = argumentListTree.getChildCount();

View File

@ -1581,6 +1581,19 @@ public class TestRecordPath {
});
}
@Test
public void testHash() {
final Record record = getCaseTestRecord();
assertEquals("61409aa1fd47d4a5332de23cbf59a36f", RecordPath.compile("hash(/firstName, 'MD5')").evaluate(record).getSelectedFields().findFirst().get().getValue());
assertEquals("5753a498f025464d72e088a9d5d6e872592d5f91", RecordPath.compile("hash(/firstName, 'SHA-1')").evaluate(record).getSelectedFields().findFirst().get().getValue());
}
@Test(expected = RecordPathException.class)
public void testHashFailure() {
final Record record = getCaseTestRecord();
assertEquals("61409aa1fd47d4a5332de23cbf59a36f", RecordPath.compile("hash(/firstName, 'NOT_A_ALGO')").evaluate(record).getSelectedFields().findFirst().get().getValue());
}
@Test
public void testPadLeft() {
final List<RecordField> fields = new ArrayList<>();

View File

@ -1431,6 +1431,11 @@ Each of the following functions will encode a string according the rules of the
*Description*: [.description]#Returns a type 3 (MD5 hashed) namespace name-based UUID.#
[.function]
=== hash
*Description*: [.description]#Returns a hex encoded string using the hash algorithm provided. This can be used to generate unique keys.#
*Subject Type*: [.subject]#String#
*Arguments*:
@ -1460,6 +1465,15 @@ Each of the following functions will encode a string according the rules of the
*Examples*: ${attr:UUID5('245b55a8-397d-4480-a41e-16603c8cf9ad')} returns a value similar to 6448f0c0-fd2b-30ad-b05a-deb456f8b060
- [.argName]#_algorithm_# : [.argDesc]#An algorithm to hash value.
Supports one of [SHA-384, SHA-224, SHA-256, MD2, SHA, SHA-512, MD5]#
*Return Type*: [.returnType]#String#
*Examples*: We can hash an attribute named "payload" by using the Expression
`${payload:hash('MD5')}` If the attribute payload had a value of "string value"
then the Expression `${payload:hash('MD5')}` will return "64e58419496c7248b4ef25731f88b8c3".

View File

@ -3,7 +3,7 @@
// contributor license agreements. See the NOTICE file distributed with
// this work for additional information regarding copyright ownership.
// The ASF licenses this file to You under the Apache License, Version 2.0
// (the "License"); you may not use this file except ixn compliance with
// (the "License"); you may not use this file except in compliance with
// the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
@ -778,6 +778,37 @@ The following record path expression would decode the String using Base64:
| `base64Decode(/name)` | John
|==========================================================
=== hash
Converts a String using a hash algorithm. For example, given a schema such as:
----
{
"type": "record",
"name": "events",
"fields": [
{ "name": "name", "type": "string" }
]
}
----
and a record such as:
----
{
"name" : "John"
}
----
The following record path expression would hash the String using one of these, [SHA-384, SHA-224, SHA-256, MD2, SHA, SHA-512, MD5] algorithms.
|==========================================================
| RecordPath | Return value
| `hash(/name, 'MD5')` | 527bd5b5d689e2c32ae974c6229ff785
|==========================================================
=== padLeft
Prepends characters to the input String until it reaches the desired length.