mirror of
https://github.com/apache/nifi.git
synced 2025-02-06 01:58:32 +00:00
NIFI-6255 NIFI-6287: Hash function for expression language and record path.
NIFI-6255 NIFI-6287: Rebased to match the new expression language interface NIFI-6255 NIFI-6287: Fix wildcard imports and unused imports NIFI-6255 NIFI-6287: Move to the common codec DigetUtils Update commons-codec This closes #3624 Signed-off-by: Mike Thomsen <mthomsen@apache.org>
This commit is contained in:
parent
45e626f6a8
commit
0f4b79b55e
@ -185,6 +185,7 @@ JSON_PATH_DELETE : 'jsonPathDelete';
|
||||
REPEAT : 'repeat';
|
||||
UUID3 : 'UUID3';
|
||||
UUID5 : 'UUID5';
|
||||
HASH : 'hash';
|
||||
|
||||
// 2 arg functions
|
||||
SUBSTRING : 'substring';
|
||||
|
@ -73,10 +73,11 @@ tokens {
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
// functions that return Strings
|
||||
zeroArgString : (TO_UPPER | TO_LOWER | TRIM | TO_STRING | URL_ENCODE | URL_DECODE | BASE64_ENCODE | BASE64_DECODE | ESCAPE_JSON | ESCAPE_XML | ESCAPE_CSV | ESCAPE_HTML3 | ESCAPE_HTML4 | UNESCAPE_JSON | UNESCAPE_XML | UNESCAPE_CSV | UNESCAPE_HTML3 | UNESCAPE_HTML4 | EVALUATE_EL_STRING) LPAREN! RPAREN!;
|
||||
oneArgString : ((SUBSTRING_BEFORE | SUBSTRING_BEFORE_LAST | SUBSTRING_AFTER | SUBSTRING_AFTER_LAST | REPLACE_NULL | REPLACE_EMPTY |
|
||||
PREPEND | APPEND | STARTS_WITH | ENDS_WITH | CONTAINS | JOIN | JSON_PATH | JSON_PATH_DELETE | FROM_RADIX | UUID3 | UUID5) LPAREN! anyArg RPAREN!) |
|
||||
PREPEND | APPEND | STARTS_WITH | ENDS_WITH | CONTAINS | JOIN | JSON_PATH | JSON_PATH_DELETE | FROM_RADIX | UUID3 | UUID5 | HASH) LPAREN! anyArg RPAREN!) |
|
||||
(TO_RADIX LPAREN! anyArg (COMMA! anyArg)? RPAREN!);
|
||||
twoArgString : ((REPLACE | REPLACE_FIRST | REPLACE_ALL | IF_ELSE | JSON_PATH_SET | JSON_PATH_ADD) LPAREN! anyArg COMMA! anyArg RPAREN!) |
|
||||
((SUBSTRING | FORMAT | PAD_LEFT | PAD_RIGHT | REPEAT) LPAREN! anyArg (COMMA! anyArg)? RPAREN!);
|
||||
|
@ -57,6 +57,7 @@ import org.apache.nifi.attribute.expression.language.evaluation.functions.GetDel
|
||||
import org.apache.nifi.attribute.expression.language.evaluation.functions.GetStateVariableEvaluator;
|
||||
import org.apache.nifi.attribute.expression.language.evaluation.functions.GreaterThanEvaluator;
|
||||
import org.apache.nifi.attribute.expression.language.evaluation.functions.GreaterThanOrEqualEvaluator;
|
||||
import org.apache.nifi.attribute.expression.language.evaluation.functions.HashEvaluator;
|
||||
import org.apache.nifi.attribute.expression.language.evaluation.functions.HostnameEvaluator;
|
||||
import org.apache.nifi.attribute.expression.language.evaluation.functions.IPEvaluator;
|
||||
import org.apache.nifi.attribute.expression.language.evaluation.functions.IfElseEvaluator;
|
||||
@ -177,6 +178,7 @@ import static org.apache.nifi.attribute.expression.language.antlr.AttributeExpre
|
||||
import static org.apache.nifi.attribute.expression.language.antlr.AttributeExpressionParser.GET_STATE_VALUE;
|
||||
import static org.apache.nifi.attribute.expression.language.antlr.AttributeExpressionParser.GREATER_THAN;
|
||||
import static org.apache.nifi.attribute.expression.language.antlr.AttributeExpressionParser.GREATER_THAN_OR_EQUAL;
|
||||
import static org.apache.nifi.attribute.expression.language.antlr.AttributeExpressionParser.HASH;
|
||||
import static org.apache.nifi.attribute.expression.language.antlr.AttributeExpressionParser.HOSTNAME;
|
||||
import static org.apache.nifi.attribute.expression.language.antlr.AttributeExpressionParser.IF_ELSE;
|
||||
import static org.apache.nifi.attribute.expression.language.antlr.AttributeExpressionParser.IN;
|
||||
@ -682,6 +684,11 @@ public class ExpressionCompiler {
|
||||
toStringEvaluator(argEvaluators.get(0), "first argument to replaceAll"),
|
||||
toStringEvaluator(argEvaluators.get(1), "second argument to replaceAll")), "replaceAll");
|
||||
}
|
||||
case HASH: {
|
||||
verifyArgCount(argEvaluators, 1, "hash");
|
||||
return addToken(new HashEvaluator(toStringEvaluator(subjectEvaluator),
|
||||
toStringEvaluator(argEvaluators.get(0), "first argument to hash")), "hash");
|
||||
}
|
||||
case PAD_LEFT: {
|
||||
if (argEvaluators.size() == 1) {
|
||||
return addToken(new PadLeftEvaluator(toStringEvaluator(subjectEvaluator),
|
||||
|
@ -0,0 +1,67 @@
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.nifi.attribute.expression.language.evaluation.functions;
|
||||
|
||||
|
||||
import java.security.MessageDigest;
|
||||
import java.security.NoSuchAlgorithmException;
|
||||
import java.security.Security;
|
||||
import org.apache.commons.codec.digest.DigestUtils;
|
||||
import org.apache.nifi.attribute.expression.language.EvaluationContext;
|
||||
import org.apache.nifi.attribute.expression.language.evaluation.Evaluator;
|
||||
import org.apache.nifi.attribute.expression.language.evaluation.QueryResult;
|
||||
import org.apache.nifi.attribute.expression.language.evaluation.StringEvaluator;
|
||||
import org.apache.nifi.attribute.expression.language.evaluation.StringQueryResult;
|
||||
import org.apache.nifi.attribute.expression.language.exception.AttributeExpressionLanguageException;
|
||||
|
||||
public class HashEvaluator extends StringEvaluator {
|
||||
private static final String SUPPORTED_ALGORITHMS = String.join(", ", Security.getAlgorithms("MessageDigest"));
|
||||
|
||||
private final Evaluator<String> algorithm;
|
||||
private final Evaluator<String> subject;
|
||||
|
||||
public HashEvaluator(final Evaluator<String> subject, final Evaluator<String> algorithm) {
|
||||
this.subject = subject;
|
||||
this.algorithm = algorithm;
|
||||
}
|
||||
|
||||
@Override
|
||||
public QueryResult<String> evaluate(EvaluationContext context) {
|
||||
final String subjectValue = subject.evaluate(context).getValue();
|
||||
if (subjectValue == null) {
|
||||
return new StringQueryResult(null);
|
||||
}
|
||||
|
||||
final String algorithmValue = algorithm.evaluate(context).getValue();
|
||||
final MessageDigest digest = getDigest(algorithmValue);
|
||||
String encoded = new DigestUtils(digest).digestAsHex(subjectValue);
|
||||
return new StringQueryResult(encoded);
|
||||
}
|
||||
|
||||
@Override
|
||||
public Evaluator<?> getSubjectEvaluator() {
|
||||
return subject;
|
||||
}
|
||||
|
||||
private MessageDigest getDigest(String algorithm){
|
||||
try {
|
||||
return MessageDigest.getInstance(algorithm);
|
||||
} catch (NoSuchAlgorithmException e) {
|
||||
throw new AttributeExpressionLanguageException("Invalid hash algorithm: " + algorithm + " not in set [" + SUPPORTED_ALGORITHMS + "]", e);
|
||||
}
|
||||
}
|
||||
}
|
@ -2058,6 +2058,25 @@ public class TestQuery {
|
||||
verifyEquals("${literal(true):ifElse(false, 'b')}", attributes, "false");
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testHash() {
|
||||
final Map<String, String> attributes = new HashMap<>();
|
||||
attributes.put("str_attr", "string value");
|
||||
attributes.put("nbr_attr", "10");
|
||||
verifyEquals("${literal('john'):hash('MD5')}", attributes, "527bd5b5d689e2c32ae974c6229ff785");
|
||||
verifyEquals("${str_attr:hash('MD5')}", attributes, "64e58419496c7248b4ef25731f88b8c3");
|
||||
verifyEquals("${str_attr:hash('SHA-1')}", attributes, "34990db823e7bb2b47278a7fbf08c62d9e8e4307");
|
||||
verifyEquals("${str_attr:hash('SHA-256')}", attributes, "9b6a1a9167a5caf3f5948413faa89e0ec0de89e12bef55327442e60dcc0e8c9b");
|
||||
verifyEquals("${nbr_attr:toNumber():hash('MD5')}", attributes, "d3d9446802a44259755d38e6d163e820");
|
||||
verifyEquals("${nbr_attr:hash('MD5')}", attributes, "d3d9446802a44259755d38e6d163e820");
|
||||
}
|
||||
|
||||
@Test(expected = AttributeExpressionLanguageException.class)
|
||||
public void testHashFailure() {
|
||||
final Map<String, String> attributes = new HashMap<>();
|
||||
verifyEquals("${literal('john'):hash('NOT_A_ALGO')}", attributes, "527bd5b5d689e2c32ae974c6229ff785");
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testThread() {
|
||||
final Map<String, String> attributes = new HashMap<>();
|
||||
|
@ -81,5 +81,10 @@
|
||||
<artifactId>caffeine</artifactId>
|
||||
<version>2.8.1</version>
|
||||
</dependency>
|
||||
</dependencies>
|
||||
<dependency>
|
||||
<groupId>commons-codec</groupId>
|
||||
<artifactId>commons-codec</artifactId>
|
||||
<version>1.14</version>
|
||||
</dependency>
|
||||
</dependencies>
|
||||
</project>
|
||||
|
@ -0,0 +1,72 @@
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.apache.nifi.record.path.functions;
|
||||
|
||||
import org.apache.commons.codec.digest.DigestUtils;
|
||||
import org.apache.nifi.record.path.FieldValue;
|
||||
import org.apache.nifi.record.path.RecordPathEvaluationContext;
|
||||
import org.apache.nifi.record.path.StandardFieldValue;
|
||||
import org.apache.nifi.record.path.exception.RecordPathException;
|
||||
import org.apache.nifi.record.path.paths.RecordPathSegment;
|
||||
import org.apache.nifi.record.path.util.RecordPathUtils;
|
||||
import org.apache.nifi.serialization.record.util.DataTypeUtils;
|
||||
|
||||
import java.security.MessageDigest;
|
||||
import java.security.NoSuchAlgorithmException;
|
||||
import java.security.Security;
|
||||
import java.util.stream.Stream;
|
||||
|
||||
public class Hash extends RecordPathSegment {
|
||||
private static final String SUPPORTED_ALGORITHMS = String.join(", ", Security.getAlgorithms("MessageDigest"));
|
||||
|
||||
private final RecordPathSegment recordPath;
|
||||
private final RecordPathSegment algorithmPath;
|
||||
|
||||
public Hash(final RecordPathSegment recordPath, final RecordPathSegment algorithmPath, final boolean absolute) {
|
||||
super("hash", null, absolute);
|
||||
|
||||
this.recordPath = recordPath;
|
||||
this.algorithmPath = algorithmPath;
|
||||
}
|
||||
|
||||
@Override
|
||||
public Stream<FieldValue> evaluate(final RecordPathEvaluationContext context) {
|
||||
final Stream<FieldValue> fieldValues = recordPath.evaluate(context);
|
||||
return fieldValues.filter(fv -> fv.getValue() != null)
|
||||
.map(fv -> {
|
||||
final String algorithmValue = RecordPathUtils.getFirstStringValue(algorithmPath, context);
|
||||
if (algorithmValue == null || algorithmValue.isEmpty()) {
|
||||
return fv;
|
||||
}
|
||||
|
||||
final MessageDigest digest = getDigest(algorithmValue);
|
||||
final String value = DataTypeUtils.toString(fv.getValue(), (String) null);
|
||||
String encoded = new DigestUtils(digest).digestAsHex(value);
|
||||
return new StandardFieldValue(encoded, fv.getField(), fv.getParent().orElse(null));
|
||||
});
|
||||
}
|
||||
|
||||
private MessageDigest getDigest(String algorithm){
|
||||
try {
|
||||
return MessageDigest.getInstance(algorithm);
|
||||
} catch (NoSuchAlgorithmException e) {
|
||||
throw new RecordPathException("Invalid hash algorithm: " + algorithm + "not in set [" + SUPPORTED_ALGORITHMS + "]", e);
|
||||
}
|
||||
}
|
||||
|
||||
}
|
@ -69,6 +69,7 @@ import org.apache.nifi.record.path.functions.Base64Encode;
|
||||
import org.apache.nifi.record.path.functions.Concat;
|
||||
import org.apache.nifi.record.path.functions.Format;
|
||||
import org.apache.nifi.record.path.functions.FieldName;
|
||||
import org.apache.nifi.record.path.functions.Hash;
|
||||
import org.apache.nifi.record.path.functions.PadLeft;
|
||||
import org.apache.nifi.record.path.functions.PadRight;
|
||||
import org.apache.nifi.record.path.functions.Replace;
|
||||
@ -305,6 +306,10 @@ public class RecordPathCompiler {
|
||||
final RecordPathSegment[] args = getArgPaths(argumentListTree, 1, functionName, absolute);
|
||||
return new Base64Decode(args[0], absolute);
|
||||
}
|
||||
case "hash":{
|
||||
final RecordPathSegment[] args = getArgPaths(argumentListTree, 2, functionName, absolute);
|
||||
return new Hash(args[0], args[1], absolute);
|
||||
}
|
||||
case "padLeft": {
|
||||
final int numArgs = argumentListTree.getChildCount();
|
||||
|
||||
|
@ -1581,6 +1581,19 @@ public class TestRecordPath {
|
||||
});
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testHash() {
|
||||
final Record record = getCaseTestRecord();
|
||||
assertEquals("61409aa1fd47d4a5332de23cbf59a36f", RecordPath.compile("hash(/firstName, 'MD5')").evaluate(record).getSelectedFields().findFirst().get().getValue());
|
||||
assertEquals("5753a498f025464d72e088a9d5d6e872592d5f91", RecordPath.compile("hash(/firstName, 'SHA-1')").evaluate(record).getSelectedFields().findFirst().get().getValue());
|
||||
}
|
||||
|
||||
@Test(expected = RecordPathException.class)
|
||||
public void testHashFailure() {
|
||||
final Record record = getCaseTestRecord();
|
||||
assertEquals("61409aa1fd47d4a5332de23cbf59a36f", RecordPath.compile("hash(/firstName, 'NOT_A_ALGO')").evaluate(record).getSelectedFields().findFirst().get().getValue());
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testPadLeft() {
|
||||
final List<RecordField> fields = new ArrayList<>();
|
||||
|
@ -1431,6 +1431,11 @@ Each of the following functions will encode a string according the rules of the
|
||||
|
||||
*Description*: [.description]#Returns a type 3 (MD5 hashed) namespace name-based UUID.#
|
||||
|
||||
[.function]
|
||||
=== hash
|
||||
|
||||
*Description*: [.description]#Returns a hex encoded string using the hash algorithm provided. This can be used to generate unique keys.#
|
||||
|
||||
*Subject Type*: [.subject]#String#
|
||||
|
||||
*Arguments*:
|
||||
@ -1460,6 +1465,15 @@ Each of the following functions will encode a string according the rules of the
|
||||
|
||||
*Examples*: ${attr:UUID5('245b55a8-397d-4480-a41e-16603c8cf9ad')} returns a value similar to 6448f0c0-fd2b-30ad-b05a-deb456f8b060
|
||||
|
||||
- [.argName]#_algorithm_# : [.argDesc]#An algorithm to hash value.
|
||||
Supports one of [SHA-384, SHA-224, SHA-256, MD2, SHA, SHA-512, MD5]#
|
||||
|
||||
|
||||
*Return Type*: [.returnType]#String#
|
||||
|
||||
*Examples*: We can hash an attribute named "payload" by using the Expression
|
||||
`${payload:hash('MD5')}` If the attribute payload had a value of "string value"
|
||||
then the Expression `${payload:hash('MD5')}` will return "64e58419496c7248b4ef25731f88b8c3".
|
||||
|
||||
|
||||
|
||||
|
@ -3,7 +3,7 @@
|
||||
// contributor license agreements. See the NOTICE file distributed with
|
||||
// this work for additional information regarding copyright ownership.
|
||||
// The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
// (the "License"); you may not use this file except ixn compliance with
|
||||
// (the "License"); you may not use this file except in compliance with
|
||||
// the License. You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
@ -778,6 +778,37 @@ The following record path expression would decode the String using Base64:
|
||||
| `base64Decode(/name)` | John
|
||||
|==========================================================
|
||||
|
||||
=== hash
|
||||
|
||||
Converts a String using a hash algorithm. For example, given a schema such as:
|
||||
|
||||
----
|
||||
{
|
||||
"type": "record",
|
||||
"name": "events",
|
||||
"fields": [
|
||||
{ "name": "name", "type": "string" }
|
||||
]
|
||||
}
|
||||
----
|
||||
|
||||
and a record such as:
|
||||
|
||||
----
|
||||
{
|
||||
"name" : "John"
|
||||
}
|
||||
----
|
||||
|
||||
The following record path expression would hash the String using one of these, [SHA-384, SHA-224, SHA-256, MD2, SHA, SHA-512, MD5] algorithms.
|
||||
|
||||
|==========================================================
|
||||
| RecordPath | Return value
|
||||
| `hash(/name, 'MD5')` | 527bd5b5d689e2c32ae974c6229ff785
|
||||
|==========================================================
|
||||
|
||||
|
||||
|
||||
=== padLeft
|
||||
|
||||
Prepends characters to the input String until it reaches the desired length.
|
||||
|
Loading…
x
Reference in New Issue
Block a user