diff --git a/nifi-assembly/pom.xml b/nifi-assembly/pom.xml
index a83fc52cf3..cd8c0d1a1f 100755
--- a/nifi-assembly/pom.xml
+++ b/nifi-assembly/pom.xml
@@ -359,6 +359,11 @@
nifi-beats-narnar
+
+ org.apache.nifi
+ nifi-cybersecurity-nar
+ nar
+ org.apache.nifinifi-email-nar
diff --git a/nifi-nar-bundles/nifi-cybersecurity-bundle/nifi-cybersecurity-processors/pom.xml b/nifi-nar-bundles/nifi-cybersecurity-bundle/nifi-cybersecurity-processors/pom.xml
index 092f75ef36..81ad8b5f3f 100644
--- a/nifi-nar-bundles/nifi-cybersecurity-bundle/nifi-cybersecurity-processors/pom.xml
+++ b/nifi-nar-bundles/nifi-cybersecurity-bundle/nifi-cybersecurity-processors/pom.xml
@@ -44,12 +44,6 @@
tlsh1.0.0
-
- org.apache.nifi
- nifi-standard-processors
- 1.2.0-SNAPSHOT
- provided
- org.apache.nifinifi-mock
@@ -65,5 +59,26 @@
junittest
+
+ org.apache.nifi
+ nifi-properties
+ compile
+
+
+
+
+ org.apache.rat
+ apache-rat-plugin
+
+
+ src/test/resources/blank_ssdeep.list
+ src/test/resources/empty.list
+ src/test/resources/ssdeep.list
+ src/test/resources/tlsh.list
+
+
+
+
+
diff --git a/nifi-nar-bundles/nifi-cybersecurity-bundle/nifi-cybersecurity-processors/src/main/java/org/apache/nifi/processors/cybersecurity/AbstractFuzzyHashProcessor.java b/nifi-nar-bundles/nifi-cybersecurity-bundle/nifi-cybersecurity-processors/src/main/java/org/apache/nifi/processors/cybersecurity/AbstractFuzzyHashProcessor.java
new file mode 100644
index 0000000000..c5b560d7d2
--- /dev/null
+++ b/nifi-nar-bundles/nifi-cybersecurity-bundle/nifi-cybersecurity-processors/src/main/java/org/apache/nifi/processors/cybersecurity/AbstractFuzzyHashProcessor.java
@@ -0,0 +1,88 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.nifi.processors.cybersecurity;
+
+
+import com.idealista.tlsh.TLSH;
+import info.debatty.java.spamsum.SpamSum;
+import org.apache.nifi.components.AllowableValue;
+import org.apache.nifi.components.PropertyDescriptor;
+import org.apache.nifi.flowfile.FlowFile;
+import org.apache.nifi.processor.AbstractProcessor;
+import org.apache.nifi.processor.Relationship;
+import org.apache.nifi.processor.util.StandardValidators;
+
+import java.util.List;
+import java.util.Set;
+
+abstract class AbstractFuzzyHashProcessor extends AbstractProcessor {
+ final protected static String ssdeep = "ssdeep";
+ final protected static String tlsh = "tlsh";
+
+ public static final AllowableValue allowableValueSSDEEP = new AllowableValue(
+ ssdeep,
+ ssdeep,
+ "Uses ssdeep / SpamSum 'context triggered piecewise hash'.");
+ public static final AllowableValue allowableValueTLSH = new AllowableValue(
+ tlsh,
+ tlsh,
+ "Uses TLSH (Trend 'Locality Sensitive Hash'). Note: FlowFile Content must be at least 512 characters long");
+
+ public static final PropertyDescriptor ATTRIBUTE_NAME = new PropertyDescriptor.Builder()
+ .name("ATTRIBUTE_NAME")
+ .displayName("Hash Attribute Name")
+ .description("The name of the FlowFile Attribute that should hold the Fuzzy Hash Value")
+ .required(true)
+ .addValidator(StandardValidators.NON_EMPTY_VALIDATOR)
+ .defaultValue("fuzzyhash.value")
+ .build();
+
+ public static final PropertyDescriptor HASH_ALGORITHM = new PropertyDescriptor.Builder()
+ .name("HASH_ALGORITHM")
+ .displayName("Hashing Algorithm")
+ .description("The hashing algorithm utilised")
+ .allowableValues(allowableValueSSDEEP, allowableValueTLSH)
+ .required(true)
+ .addValidator(StandardValidators.NON_EMPTY_VALIDATOR)
+ .build();
+
+
+ protected List descriptors;
+
+ protected Set relationships;
+
+ protected boolean checkMinimumAlgorithmRequirements(String algorithm, FlowFile flowFile) {
+ // Check if content matches minimum length requirement
+ if (algorithm.equals(tlsh) && flowFile.getSize() < 512 ) {
+ return false;
+ } else {
+ return true;
+ }
+ }
+
+
+ protected String generateHash(String algorithm, String content) {
+ switch (algorithm) {
+ case tlsh:
+ return new TLSH(content).hash();
+ case ssdeep:
+ return new SpamSum().HashString(content);
+ default:
+ return null;
+ }
+ }
+}
diff --git a/nifi-nar-bundles/nifi-cybersecurity-bundle/nifi-cybersecurity-processors/src/main/java/org/apache/nifi/processors/cybersecurity/CompareFuzzyHash.java b/nifi-nar-bundles/nifi-cybersecurity-bundle/nifi-cybersecurity-processors/src/main/java/org/apache/nifi/processors/cybersecurity/CompareFuzzyHash.java
new file mode 100644
index 0000000000..f08ac79beb
--- /dev/null
+++ b/nifi-nar-bundles/nifi-cybersecurity-bundle/nifi-cybersecurity-processors/src/main/java/org/apache/nifi/processors/cybersecurity/CompareFuzzyHash.java
@@ -0,0 +1,277 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.nifi.processors.cybersecurity;
+
+import org.apache.nifi.annotation.behavior.EventDriven;
+import org.apache.nifi.annotation.behavior.InputRequirement;
+import org.apache.nifi.annotation.behavior.SideEffectFree;
+import org.apache.nifi.annotation.behavior.SupportsBatching;
+import org.apache.nifi.annotation.behavior.WritesAttribute;
+import org.apache.nifi.annotation.behavior.WritesAttributes;
+import org.apache.nifi.annotation.documentation.CapabilityDescription;
+import org.apache.nifi.annotation.documentation.SeeAlso;
+import org.apache.nifi.annotation.documentation.Tags;
+import org.apache.nifi.annotation.lifecycle.OnScheduled;
+import org.apache.nifi.components.AllowableValue;
+import org.apache.nifi.components.PropertyDescriptor;
+import org.apache.nifi.flowfile.FlowFile;
+import org.apache.nifi.logging.ComponentLog;
+import org.apache.nifi.processor.ProcessContext;
+import org.apache.nifi.processor.ProcessSession;
+import org.apache.nifi.processor.ProcessorInitializationContext;
+import org.apache.nifi.processor.Relationship;
+import org.apache.nifi.processor.exception.ProcessException;
+import org.apache.nifi.processor.util.StandardValidators;
+import org.apache.nifi.processors.cybersecurity.matchers.FuzzyHashMatcher;
+import org.apache.nifi.processors.cybersecurity.matchers.SSDeepHashMatcher;
+import org.apache.nifi.processors.cybersecurity.matchers.TLSHHashMatcher;
+import org.apache.nifi.util.StringUtils;
+
+import java.io.BufferedReader;
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.HashSet;
+import java.util.List;
+import java.util.Map;
+import java.util.Set;
+import java.util.concurrent.ConcurrentHashMap;
+
+
+@EventDriven
+@SideEffectFree
+@SupportsBatching
+@InputRequirement(InputRequirement.Requirement.INPUT_REQUIRED)
+@SeeAlso({FuzzyHashContent.class})
+@Tags({"hashing", "fuzzy-hashing", "cyber-security"})
+@CapabilityDescription("Compares an attribute containing a Fuzzy Hash against a file containing a list of fuzzy hashes, " +
+ "appending an attribute to the FlowFile in case of a successful match.")
+
+@WritesAttributes({
+ @WritesAttribute(attribute = "XXXX.N.match", description = "The match that resembles the attribute specified " +
+ "by the property. Note that: 'XXX' gets replaced with the "),
+ @WritesAttribute(attribute = "XXXX.N.similarity", description = "The similarity score between this flowfile" +
+ "and its match of the same number N. Note that: 'XXX' gets replaced with the ")})
+
+public class CompareFuzzyHash extends AbstractFuzzyHashProcessor {
+ public static final AllowableValue singleMatch = new AllowableValue(
+ "single",
+ "single",
+ "Send FlowFile to matched after the first match above threshold");
+ public static final AllowableValue multiMatch = new AllowableValue(
+ "multi-match",
+ "multi-match",
+ "Iterate full list of hashes before deciding to send FlowFile to matched or unmatched");
+
+ public static final PropertyDescriptor HASH_LIST_FILE = new PropertyDescriptor.Builder()
+ .name("HASH_LIST_FILE")
+ .displayName("Hash List source file")
+ .description("Path to the file containing hashes to be validated against")
+ .required(true)
+ .addValidator(StandardValidators.FILE_EXISTS_VALIDATOR)
+ .build();
+
+ // Note we add a PropertyDescriptor HASH_ALGORITHM and ATTRIBUTE_NAME from parent class
+
+ public static final PropertyDescriptor MATCH_THRESHOLD = new PropertyDescriptor.Builder()
+ // Note that while both TLSH and SSDeep seems to return int, we treat them as double in code.
+ // The rationale behind being the expectation that other algorithms thatmay return double values
+ // may be added to the processor later on.
+ .name("MATCH_THRESHOLD")
+ .displayName("Match threshold")
+ .description("The similarity score must exceed or be equal to in order for" +
+ "match to be considered true. Refer to Additional Information for differences between TLSH " +
+ "and SSDEEP scores and how they relate to this property.")
+ .required(true)
+ .addValidator(StandardValidators.NUMBER_VALIDATOR)
+ .build();
+
+ public static final PropertyDescriptor MATCHING_MODE = new PropertyDescriptor.Builder()
+ .name("MATCHING_MODE")
+ .displayName("Matching mode")
+ .description("Defines if the Processor should try to match as many entries as possible (" + multiMatch.getDisplayName() +
+ ") or if it should stop after the first match (" + singleMatch.getDisplayName() + ")")
+ .required(true)
+ .allowableValues(singleMatch,multiMatch)
+ .defaultValue(singleMatch.getValue())
+ .build();
+
+ public static final Relationship REL_FOUND = new Relationship.Builder()
+ .name("found")
+ .description("Any FlowFile that is successfully matched to an existing hash will be sent to this Relationship.")
+ .build();
+
+ public static final Relationship REL_NOT_FOUND = new Relationship.Builder()
+ .name("not-found")
+ .description("Any FlowFile that cannot be matched to an existing hash will be sent to this Relationship.")
+ .build();
+
+ public static final Relationship REL_FAILURE = new Relationship.Builder()
+ .name("failure")
+ .description("Any FlowFile that cannot be matched, e.g. (lacks the attribute) will be sent to this Relationship.")
+ .build();
+
+ @Override
+ protected void init(final ProcessorInitializationContext context) {
+ final List descriptors = new ArrayList();
+ descriptors.add(HASH_LIST_FILE);
+ // As mentioned above, add the PropertyDescriptor HASH_ALGORITHM and ATTRIBUTE_NAME from parent class
+ descriptors.add(HASH_ALGORITHM);
+ descriptors.add(ATTRIBUTE_NAME);
+ descriptors.add(MATCH_THRESHOLD);
+ descriptors.add(MATCHING_MODE);
+ this.descriptors = Collections.unmodifiableList(descriptors);
+
+ final Set relationships = new HashSet();
+ relationships.add(REL_FOUND);
+ relationships.add(REL_NOT_FOUND);
+ relationships.add(REL_FAILURE);
+ this.relationships = Collections.unmodifiableSet(relationships);
+ }
+
+ @Override
+ public Set getRelationships() {
+ return this.relationships;
+ }
+
+ @Override
+ public final List getSupportedPropertyDescriptors() {
+ return descriptors;
+ }
+
+ @OnScheduled
+ public void onScheduled(final ProcessContext context) {
+ }
+
+ @Override
+ public void onTrigger(ProcessContext context, ProcessSession session) throws ProcessException {
+
+ FlowFile flowFile = session.get();
+ if (flowFile == null) {
+ return;
+ }
+
+ final ComponentLog logger = getLogger();
+ String algorithm = context.getProperty(HASH_ALGORITHM).getValue();
+
+ final String attributeName = context.getProperty(ATTRIBUTE_NAME).getValue();
+ String inputHash = flowFile.getAttribute(attributeName);
+
+ if (inputHash == null) {
+ getLogger().info("FlowFile {} lacks the required '{}' attribute, routing to failure.",
+ new Object[]{flowFile, attributeName});
+ session.transfer(flowFile, REL_FAILURE);
+ return;
+ }
+
+ FuzzyHashMatcher fuzzyHashMatcher = null;
+
+ switch (algorithm) {
+ case tlsh:
+ fuzzyHashMatcher = new TLSHHashMatcher(getLogger());
+ break;
+ case ssdeep:
+ fuzzyHashMatcher = new SSDeepHashMatcher(getLogger());
+ break;
+ default:
+ getLogger().error("Seems like the processor is configured to use unsupported algorithm '{}' ? Yielding.",
+ new Object[]{algorithm});
+ context.yield();
+ return;
+ }
+
+ if (fuzzyHashMatcher.isValidHash(inputHash) == false) {
+ // and if that is the case we log
+ logger.error("Invalid hash provided. Sending to failure");
+ // and send to failure
+ session.transfer(flowFile, REL_FAILURE);
+ session.commit();
+ return;
+ }
+
+ double similarity = 0;
+ double matchThreshold = context.getProperty(MATCH_THRESHOLD).asDouble();
+
+ try {
+ Map matched = new ConcurrentHashMap();
+
+ BufferedReader reader = fuzzyHashMatcher.getReader(context.getProperty(HASH_LIST_FILE).getValue());
+
+ String line = null;
+
+ iterateFile: while ((line = reader.readLine()) != null) {
+ if (line != null) {
+ similarity = fuzzyHashMatcher.getSimilarity(inputHash, line);
+
+ if (fuzzyHashMatcher.matchExceedsThreshold(similarity, matchThreshold)) {
+ String match = fuzzyHashMatcher.getMatch(line);
+ // A malformed file may cause a match with no filename
+ // Because this would simply look odd, we ignore such entry and log
+ if (!StringUtils.isEmpty(match)) {
+ matched.put(match, similarity);
+ } else {
+ logger.error("Found a match against a malformed entry '{}'. Please inspect the contents of" +
+ "the {} file and ensure they are properly formatted",
+ new Object[]{line, HASH_LIST_FILE.getDisplayName()});
+ }
+ }
+ }
+
+ // Check if single match is desired and if a match has been made
+ if (context.getProperty(MATCHING_MODE).getValue() == singleMatch.getValue() && (matched.size() > 0)) {
+ // and save time by breaking the outer loop
+ break iterateFile;
+ }
+ }
+ // no matter if the break was called or not, Continue processing
+ // First by creating a new map to hold attributes
+ Map attributes = new ConcurrentHashMap();
+
+ // Then by iterating over the hashmap of matches
+ if (matched.size() > 0) {
+ int x = 0;
+ for (Map.Entry entry : matched.entrySet()) {
+ // defining attributes accordingly
+ attributes.put(
+ attributeName + "." + x + ".match",
+ entry.getKey());
+ attributes.put(
+ attributeName + "." + x + ".similarity",
+ String.valueOf(entry.getValue()));
+ x++;
+ }
+ // Finally, append the attributes to the flowfile and sent to match
+ flowFile = session.putAllAttributes(flowFile, attributes);
+ session.transfer(flowFile, REL_FOUND);
+ session.commit();
+ return;
+ } else {
+ // Otherwise send it to non-match
+ session.transfer(flowFile, REL_NOT_FOUND);
+ session.commit();
+ return;
+ }
+ } catch (IOException e) {
+ logger.error("Error while reading the hash input source" );
+ context.yield();
+ }
+ }
+
+
+
+}
diff --git a/nifi-nar-bundles/nifi-cybersecurity-bundle/nifi-cybersecurity-processors/src/main/java/org/apache/nifi/processors/cybersecurity/FuzzyHashContent.java b/nifi-nar-bundles/nifi-cybersecurity-bundle/nifi-cybersecurity-processors/src/main/java/org/apache/nifi/processors/cybersecurity/FuzzyHashContent.java
index e3f8a63d44..78ed94fde1 100644
--- a/nifi-nar-bundles/nifi-cybersecurity-bundle/nifi-cybersecurity-processors/src/main/java/org/apache/nifi/processors/cybersecurity/FuzzyHashContent.java
+++ b/nifi-nar-bundles/nifi-cybersecurity-bundle/nifi-cybersecurity-processors/src/main/java/org/apache/nifi/processors/cybersecurity/FuzzyHashContent.java
@@ -16,16 +16,12 @@
*/
package org.apache.nifi.processors.cybersecurity;
-import com.idealista.tlsh.TLSH;
import com.idealista.tlsh.exceptions.InsufficientComplexityException;
-import info.debatty.java.spamsum.SpamSum;
import org.apache.nifi.annotation.behavior.EventDriven;
import org.apache.nifi.annotation.behavior.InputRequirement;
import org.apache.nifi.annotation.behavior.SideEffectFree;
import org.apache.nifi.annotation.behavior.SupportsBatching;
-import org.apache.nifi.annotation.behavior.ReadsAttribute;
-import org.apache.nifi.annotation.behavior.ReadsAttributes;
import org.apache.nifi.annotation.behavior.WritesAttribute;
import org.apache.nifi.annotation.behavior.WritesAttributes;
import org.apache.nifi.annotation.documentation.CapabilityDescription;
@@ -33,19 +29,17 @@ import org.apache.nifi.annotation.documentation.SeeAlso;
import org.apache.nifi.annotation.documentation.Tags;
import org.apache.nifi.annotation.lifecycle.OnScheduled;
-import org.apache.nifi.components.AllowableValue;
import org.apache.nifi.components.PropertyDescriptor;
import org.apache.nifi.flowfile.FlowFile;
import org.apache.nifi.logging.ComponentLog;
import org.apache.nifi.processor.exception.ProcessException;
-import org.apache.nifi.processor.AbstractProcessor;
import org.apache.nifi.processor.ProcessContext;
import org.apache.nifi.processor.ProcessSession;
import org.apache.nifi.processor.ProcessorInitializationContext;
import org.apache.nifi.processor.Relationship;
import org.apache.nifi.processor.io.InputStreamCallback;
import org.apache.nifi.processor.util.StandardValidators;
-import org.apache.nifi.processors.standard.HashContent;
+import org.apache.nifi.util.StringUtils;
import org.apache.nifi.stream.io.StreamUtils;
@@ -73,31 +67,13 @@ import java.util.concurrent.atomic.AtomicReference;
"evaluations in memory. Accordingly, it is important to consider the anticipated profile of content being " +
"evaluated by this processor and the hardware supporting it especially when working against large files.")
-@SeeAlso({HashContent.class})
-@ReadsAttributes({@ReadsAttribute(attribute="", description="")})
+@SeeAlso(classNames = {"org.apache.nifi.processors.standard.HashContent"}, value = {CompareFuzzyHash.class})
@WritesAttributes({@WritesAttribute(attribute = "", description = "This Processor adds an attribute whose value is the result of Hashing the "
+ "existing FlowFile content. The name of this attribute is specified by the property")})
-public class FuzzyHashContent extends AbstractProcessor {
+public class FuzzyHashContent extends AbstractFuzzyHashProcessor {
- public static final AllowableValue allowableValueSSDEEP = new AllowableValue(
- "ssdeep",
- "ssdeep",
- "Uses ssdeep / SpamSum 'context triggered piecewise hash'.");
- public static final AllowableValue allowableValueTLSH = new AllowableValue(
- "tlsh",
- "tlsh",
- "Uses TLSH (Trend 'Locality Sensitive Hash'). Note: FlowFile Content must be at least 512 characters long");
- public static final PropertyDescriptor ATTRIBUTE_NAME = new PropertyDescriptor.Builder()
- .name("ATTRIBUTE_NAME")
- .displayName("Hash Attribute Name")
- .description("The name of the FlowFile Attribute into which the Hash Value should be written. " +
- "If the value already exists, it will be overwritten")
- .required(true)
- .addValidator(StandardValidators.NON_EMPTY_VALIDATOR)
- .defaultValue("fuzzyhash.value")
- .build();
public static final PropertyDescriptor HASH_ALGORITHM = new PropertyDescriptor.Builder()
.name("HASH_ALGORITHM")
@@ -109,12 +85,12 @@ public class FuzzyHashContent extends AbstractProcessor {
.build();
public static final Relationship REL_SUCCESS = new Relationship.Builder()
- .name("Success")
+ .name("success")
.description("Any FlowFile that is successfully hashed will be sent to this Relationship.")
.build();
public static final Relationship REL_FAILURE = new Relationship.Builder()
- .name("Failure")
+ .name("failure")
.description("Any FlowFile that is successfully hashed will be sent to this Relationship.")
.build();
@@ -157,18 +133,17 @@ public class FuzzyHashContent extends AbstractProcessor {
}
final ComponentLog logger = getLogger();
+ String algorithm = context.getProperty(HASH_ALGORITHM).getValue();
// Check if content matches minimum length requirement
- if (context.getProperty(HASH_ALGORITHM).equals(allowableValueTLSH) && flowFile.getSize() < 512 ) {
- logger.info("The content of {} is smaller than the minimum required by TLSH, routing to failure", new Object[]{flowFile});
+
+ if (checkMinimumAlgorithmRequirements(algorithm, flowFile) == false) {
+ logger.error("The content of '{}' is smaller than the minimum required by {}, routing to failure",
+ new Object[]{flowFile, algorithm});
session.transfer(flowFile, REL_FAILURE);
return;
}
-
-
-
-
final AtomicReference hashValueHolder = new AtomicReference<>(null);
try {
@@ -178,13 +153,12 @@ public class FuzzyHashContent extends AbstractProcessor {
try (ByteArrayOutputStream holder = new ByteArrayOutputStream()) {
StreamUtils.copy(in,holder);
- if (context.getProperty(HASH_ALGORITHM).getValue().equals(allowableValueSSDEEP.getValue())) {
- hashValueHolder.set(new SpamSum().HashString(holder.toString()));
+ String hashValue = generateHash(algorithm, holder.toString());
+ if (StringUtils.isBlank(hashValue) == false) {
+ hashValueHolder.set(hashValue);
}
- if (context.getProperty(HASH_ALGORITHM).getValue().equals(allowableValueTLSH.getValue())) {
- hashValueHolder.set(new TLSH(holder.toString()).hash());
- }
+
}
}
});
@@ -199,4 +173,5 @@ public class FuzzyHashContent extends AbstractProcessor {
session.transfer(flowFile, REL_FAILURE);
}
}
+
}
diff --git a/nifi-nar-bundles/nifi-cybersecurity-bundle/nifi-cybersecurity-processors/src/main/java/org/apache/nifi/processors/cybersecurity/matchers/FuzzyHashMatcher.java b/nifi-nar-bundles/nifi-cybersecurity-bundle/nifi-cybersecurity-processors/src/main/java/org/apache/nifi/processors/cybersecurity/matchers/FuzzyHashMatcher.java
new file mode 100644
index 0000000000..91dbd1237c
--- /dev/null
+++ b/nifi-nar-bundles/nifi-cybersecurity-bundle/nifi-cybersecurity-processors/src/main/java/org/apache/nifi/processors/cybersecurity/matchers/FuzzyHashMatcher.java
@@ -0,0 +1,35 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.nifi.processors.cybersecurity.matchers;
+
+import java.io.BufferedReader;
+import java.io.IOException;
+
+public interface FuzzyHashMatcher {
+
+ BufferedReader getReader(String source) throws IOException;
+
+ boolean matchExceedsThreshold(double similarity, double matchThreshold) ;
+
+ double getSimilarity(String inputHash, String existingHash);
+
+ boolean isValidHash(String inputHash);
+
+ String getHash(String line);
+
+ String getMatch(String line);
+}
diff --git a/nifi-nar-bundles/nifi-cybersecurity-bundle/nifi-cybersecurity-processors/src/main/java/org/apache/nifi/processors/cybersecurity/matchers/SSDeepHashMatcher.java b/nifi-nar-bundles/nifi-cybersecurity-bundle/nifi-cybersecurity-processors/src/main/java/org/apache/nifi/processors/cybersecurity/matchers/SSDeepHashMatcher.java
new file mode 100644
index 0000000000..9371bfd527
--- /dev/null
+++ b/nifi-nar-bundles/nifi-cybersecurity-bundle/nifi-cybersecurity-processors/src/main/java/org/apache/nifi/processors/cybersecurity/matchers/SSDeepHashMatcher.java
@@ -0,0 +1,126 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.nifi.processors.cybersecurity.matchers;
+
+import info.debatty.java.spamsum.SpamSum;
+import org.apache.nifi.logging.ComponentLog;
+
+import java.io.BufferedReader;
+import java.io.File;
+import java.io.FileInputStream;
+import java.io.IOException;
+import java.io.InputStreamReader;
+import java.util.Scanner;
+
+public class SSDeepHashMatcher implements FuzzyHashMatcher {
+
+ ComponentLog logger;
+
+ public SSDeepHashMatcher() {
+
+ }
+
+ public SSDeepHashMatcher(ComponentLog logger) {
+ this.logger = logger;
+ }
+
+ @Override
+ public BufferedReader getReader(String source) throws IOException {
+
+ File file = new File(source);
+
+ FileInputStream fileInputStream = new FileInputStream(file);
+ BufferedReader reader = new BufferedReader(new InputStreamReader(fileInputStream));
+
+ // If SSdeep skip the first line (as the usual format used by other tools add a header line
+ // to a file list
+ reader.readLine();
+
+ return reader;
+ }
+
+ @Override
+ public boolean matchExceedsThreshold(double similarity, double matchThreshold) {
+ if (similarity >= matchThreshold) {
+ return true;
+ } else {
+ return false;
+ }
+ }
+
+ @Override
+ public double getSimilarity(String inputHash, String existingHash) {
+ String[] hashToCompare = existingHash.split(",", 2);
+ if (hashToCompare.length > 0) {
+ return new SpamSum().match(inputHash, hashToCompare[0]);
+ } else {
+ return Double.NaN;
+ }
+ }
+
+ @Override
+ public boolean isValidHash(String inputHash) {
+ // format looks like
+ // blocksize:hash:hash
+
+ String [] fields = inputHash.split(":", 3);
+
+ if (fields.length == 3) {
+ Scanner sc = new Scanner(fields[0]);
+
+ boolean isNumber = sc.hasNextInt();
+ if (isNumber == false && logger != null) {
+ if (logger.isDebugEnabled()) {
+ logger.debug("Field should be numeric but got '{}'. Will tell processor to ignore.",
+ new Object[] {fields[0]});
+ }
+ }
+
+ boolean hashOneIsNotEmpty = !fields[1].isEmpty();
+ boolean hashTwoIsNotEmpty = !fields[2].isEmpty();
+
+ if (isNumber && hashOneIsNotEmpty && hashTwoIsNotEmpty) {
+ return true;
+ }
+ }
+ return false;
+ }
+
+ @Override
+ public String getHash(String line) {
+ if (isValidHash(line)) {
+ return line.split(",", 2)[0];
+ } else {
+ return null;
+ }
+ }
+
+ @Override
+ public String getMatch(String line) {
+ if (isValidHash(line)) {
+ String[] match = line.split(",", 2);
+ // Because the file can be malformed and contain an unammed match,
+ // if match has a filename...
+ if (match.length == 2) {
+ // Return it.
+ return match[1];
+ }
+ }
+ // Or return null
+ return null;
+ }
+}
diff --git a/nifi-nar-bundles/nifi-cybersecurity-bundle/nifi-cybersecurity-processors/src/main/java/org/apache/nifi/processors/cybersecurity/matchers/TLSHHashMatcher.java b/nifi-nar-bundles/nifi-cybersecurity-bundle/nifi-cybersecurity-processors/src/main/java/org/apache/nifi/processors/cybersecurity/matchers/TLSHHashMatcher.java
new file mode 100644
index 0000000000..73a140a5bb
--- /dev/null
+++ b/nifi-nar-bundles/nifi-cybersecurity-bundle/nifi-cybersecurity-processors/src/main/java/org/apache/nifi/processors/cybersecurity/matchers/TLSHHashMatcher.java
@@ -0,0 +1,117 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.nifi.processors.cybersecurity.matchers;
+
+
+import com.idealista.tlsh.digests.Digest;
+import com.idealista.tlsh.digests.DigestBuilder;
+import org.apache.nifi.logging.ComponentLog;
+
+import java.io.BufferedReader;
+import java.io.File;
+import java.io.FileInputStream;
+import java.io.IOException;
+import java.io.InputStreamReader;
+
+import static org.apache.nifi.processors.cybersecurity.CompareFuzzyHash.HASH_LIST_FILE;
+
+public class TLSHHashMatcher implements FuzzyHashMatcher {
+
+ ComponentLog logger;
+
+ public TLSHHashMatcher(ComponentLog logger) {
+ this.logger = logger;
+ }
+
+ @Override
+ public BufferedReader getReader(String source) throws IOException {
+
+ File file = new File(source);
+
+ FileInputStream fileInputStream = new FileInputStream(file);
+ BufferedReader reader = new BufferedReader(new InputStreamReader(fileInputStream));
+
+ return reader;
+ }
+
+ @Override
+ public boolean matchExceedsThreshold(double similarity, double matchThreshold) {
+ if (similarity <= matchThreshold) {
+ return true;
+ } else {
+ return false;
+ }
+ }
+
+ @Override
+ public double getSimilarity(String inputHash, String existingHash) {
+ String[] hashToCompare = existingHash.split("\t", 2);
+ // This will return null in case it fails validation
+ if (isValidHash(inputHash) && isValidHash(hashToCompare[0])) {
+ Digest inputDigest = new DigestBuilder().withHash(inputHash).build();
+ Digest existingHashDigest = new DigestBuilder().withHash(hashToCompare[0]).build();
+
+ return inputDigest.calculateDifference(existingHashDigest, true);
+ } else {
+ return Double.NaN;
+ }
+ }
+
+ @Override
+ public boolean isValidHash(String stringFromHashList) {
+ String[] hashToCompare = stringFromHashList.split("\t", 2);
+ // This will return null in case it fails validation
+ if (hashToCompare.length > 0) {
+ // Because DigestBuilder raises all sort of exceptions, so in order to keep the onTrigger loop a
+ // bit cleaner, we capture them here and return NaN to the loop above, otherwise simply return the
+ // similarity score.
+ try {
+ Digest digest = new DigestBuilder().withHash(hashToCompare[0]).build();
+ return true;
+ } catch (ArrayIndexOutOfBoundsException | StringIndexOutOfBoundsException | NumberFormatException e) {
+ logger.error("Got {} while processing the string '{}'. This usually means the file " +
+ "defined by '{}' property contains invalid entries.",
+ new Object[]{e.getCause(), hashToCompare[0], HASH_LIST_FILE.getDisplayName()});
+ }
+ }
+ return false;
+ }
+
+ @Override
+ public String getHash(String line) {
+ if (isValidHash(line)) {
+ return line.split("\t", 2)[0];
+ } else {
+ return null;
+ }
+ }
+
+ @Override
+ public String getMatch(String line) {
+ if (isValidHash(line)) {
+ String[] match = line.split("\t", 2);
+ // Because the file can be malformed and contain an unammed match,
+ // if match has a filename...
+ if (match.length == 2) {
+ // Return it.
+ return match[1];
+ }
+ }
+ // Or return null
+ return null;
+ }
+}
diff --git a/nifi-nar-bundles/nifi-cybersecurity-bundle/nifi-cybersecurity-processors/src/main/resources/META-INF/services/org.apache.nifi.processor.Processor b/nifi-nar-bundles/nifi-cybersecurity-bundle/nifi-cybersecurity-processors/src/main/resources/META-INF/services/org.apache.nifi.processor.Processor
index b46c4940c5..e7cb5f7fca 100644
--- a/nifi-nar-bundles/nifi-cybersecurity-bundle/nifi-cybersecurity-processors/src/main/resources/META-INF/services/org.apache.nifi.processor.Processor
+++ b/nifi-nar-bundles/nifi-cybersecurity-bundle/nifi-cybersecurity-processors/src/main/resources/META-INF/services/org.apache.nifi.processor.Processor
@@ -12,4 +12,5 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
-org.apache.nifi.processors.cybersecurity.FuzzyHashContent
\ No newline at end of file
+org.apache.nifi.processors.cybersecurity.FuzzyHashContent
+org.apache.nifi.processors.cybersecurity.CompareFuzzyHash
\ No newline at end of file
diff --git a/nifi-nar-bundles/nifi-cybersecurity-bundle/nifi-cybersecurity-processors/src/main/resources/docs/org/apache/nifi/processors/cybersecurity/CompareFuzzyHash/additionalDetails.html b/nifi-nar-bundles/nifi-cybersecurity-bundle/nifi-cybersecurity-processors/src/main/resources/docs/org/apache/nifi/processors/cybersecurity/CompareFuzzyHash/additionalDetails.html
new file mode 100644
index 0000000000..fe0f6bd5b3
--- /dev/null
+++ b/nifi-nar-bundles/nifi-cybersecurity-bundle/nifi-cybersecurity-processors/src/main/resources/docs/org/apache/nifi/processors/cybersecurity/CompareFuzzyHash/additionalDetails.html
@@ -0,0 +1,45 @@
+
+
+
+
+
+ CompareFuzzyHash
+
+
+
+
+
+
Description:
+
This Processor compares a an attribute containing a Fuzzy Hash (TLSH or SSDeep) value and compares it against a list
+ of hashes of the same family (i.e. TLSH is compared with a list of TLSH hashes), routing them to match or non-match
+ depending on a user configured threshold for similarity.
+
+
+
It is important to note that:
+
+
+
TLSH similarity increases as product of its comparison function decreases (i.e. 0 indicates nearly identical files)
+
SSDeep similarity directly relates to the product of its comparison function (e.g. 99 indicates nearly identical files
+
+
Based on the above, this processor when referring to "exceed the score" may be referring to:
+
+
+
a value equal or lower than the configured threshold (in case of TLSH)
+
a value equal or higher than the configured threshold (in case of SSDeep)
+
+
+
\ No newline at end of file
diff --git a/nifi-nar-bundles/nifi-cybersecurity-bundle/nifi-cybersecurity-processors/src/test/java/org/apache/nifi/processors/cybersecurity/TestCompareFuzzyHash.java b/nifi-nar-bundles/nifi-cybersecurity-bundle/nifi-cybersecurity-processors/src/test/java/org/apache/nifi/processors/cybersecurity/TestCompareFuzzyHash.java
new file mode 100644
index 0000000000..17c42ad299
--- /dev/null
+++ b/nifi-nar-bundles/nifi-cybersecurity-bundle/nifi-cybersecurity-processors/src/test/java/org/apache/nifi/processors/cybersecurity/TestCompareFuzzyHash.java
@@ -0,0 +1,380 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.nifi.processors.cybersecurity;
+
+
+import org.apache.nifi.processors.cybersecurity.matchers.FuzzyHashMatcher;
+import org.apache.nifi.processors.cybersecurity.matchers.SSDeepHashMatcher;
+import org.apache.nifi.util.MockFlowFile;
+import org.apache.nifi.util.TestRunner;
+import org.apache.nifi.util.TestRunners;
+import org.junit.After;
+import org.junit.Assert;
+import org.junit.Test;
+
+import java.util.Arrays;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+
+
+public class TestCompareFuzzyHash {
+ String ssdeepInput = "48:c1xs8Z/m6H0eRH31S8p8bHENANkPrNy4tkPytwPyh2jTytxPythPytNdPytDgYyF:OuO/mg3HFSRHEb44RNMi6uHU2hcq3";
+ String tlshInput = "EB519EA4A8F95171A2A409C1DEEB9872AF55C137E00A5289F1CCD0CE4F6CCD784BB4B7";
+
+ final CompareFuzzyHash proc = new CompareFuzzyHash();
+ final private TestRunner runner = TestRunners.newTestRunner(proc);
+
+ @After
+ public void stop() {
+ runner.shutdown();
+ }
+
+ @Test
+ public void testSsdeepCompareFuzzyHash() {
+ double matchingSimilarity = 80;
+ runner.setProperty(CompareFuzzyHash.HASH_ALGORITHM, CompareFuzzyHash.allowableValueSSDEEP.getValue());
+ runner.setProperty(CompareFuzzyHash.ATTRIBUTE_NAME, "fuzzyhash.value");
+ runner.setProperty(CompareFuzzyHash.HASH_LIST_FILE, "src/test/resources/ssdeep.list");
+ runner.setProperty(CompareFuzzyHash.MATCH_THRESHOLD, String.valueOf(matchingSimilarity));
+ runner.setProperty(CompareFuzzyHash.MATCHING_MODE, CompareFuzzyHash.singleMatch.getValue());
+
+ Map attributes = new HashMap<>();
+ attributes.put("fuzzyhash.value", ssdeepInput);
+
+ runner.enqueue("bogus".getBytes(), attributes);
+ runner.run();
+
+ runner.assertQueueEmpty();
+ runner.assertAllFlowFilesTransferred(CompareFuzzyHash.REL_FOUND, 1);
+
+ final MockFlowFile outFile = runner.getFlowFilesForRelationship(CompareFuzzyHash.REL_FOUND).get(0);
+
+
+ outFile.assertAttributeEquals(
+ "fuzzyhash.value.0.match",
+ "\"nifi/nifi-nar-bundles/nifi-beats-bundle/nifi-beats-processors/pom.xml\""
+ );
+ double similarity = Double.valueOf(outFile.getAttribute("fuzzyhash.value.0.similarity"));
+ Assert.assertTrue(similarity >= matchingSimilarity);
+
+ outFile.assertAttributeNotExists("fuzzyhash.value.1.match");
+ }
+
+ @Test
+ public void testSsdeepCompareFuzzyHashMultipleMatches() {
+ double matchingSimilarity = 80;
+ runner.setProperty(CompareFuzzyHash.HASH_ALGORITHM, CompareFuzzyHash.allowableValueSSDEEP.getValue());
+ runner.setProperty(CompareFuzzyHash.ATTRIBUTE_NAME, "fuzzyhash.value");
+ runner.setProperty(CompareFuzzyHash.HASH_LIST_FILE, "src/test/resources/ssdeep.list");
+ runner.setProperty(CompareFuzzyHash.MATCH_THRESHOLD, String.valueOf(matchingSimilarity));
+ runner.setProperty(CompareFuzzyHash.MATCHING_MODE, CompareFuzzyHash.multiMatch.getValue());
+
+ Map attributes = new HashMap<>();
+ attributes.put("fuzzyhash.value", ssdeepInput );
+
+ runner.enqueue("bogus".getBytes(), attributes);
+ runner.run();
+
+ runner.assertQueueEmpty();
+ runner.assertAllFlowFilesTransferred(CompareFuzzyHash.REL_FOUND, 1);
+
+ final MockFlowFile outFile = runner.getFlowFilesForRelationship(CompareFuzzyHash.REL_FOUND).get(0);
+
+
+ outFile.assertAttributeEquals("fuzzyhash.value.0.match",
+ "\"nifi/nifi-nar-bundles/nifi-lumberjack-bundle/nifi-lumberjack-processors/pom.xml\""
+ );
+
+ double similarity = Double.valueOf(outFile.getAttribute("fuzzyhash.value.0.similarity"));
+ Assert.assertTrue(similarity >= matchingSimilarity);
+
+ outFile.assertAttributeEquals("fuzzyhash.value.1.match",
+ "\"nifi/nifi-nar-bundles/nifi-beats-bundle/nifi-beats-processors/pom.xml\""
+ );
+ similarity = Double.valueOf(outFile.getAttribute("fuzzyhash.value.1.similarity"));
+ Assert.assertTrue(similarity >= matchingSimilarity);
+ }
+
+ @Test
+ public void testSsdeepCompareFuzzyHashWithBlankHashList() {
+ double matchingSimilarity = 80;
+ runner.setProperty(CompareFuzzyHash.HASH_ALGORITHM, CompareFuzzyHash.allowableValueSSDEEP.getValue());
+ runner.setProperty(CompareFuzzyHash.ATTRIBUTE_NAME, "fuzzyhash.value");
+ runner.setProperty(CompareFuzzyHash.HASH_LIST_FILE, "src/test/resources/blank_ssdeep.list");
+ runner.setProperty(CompareFuzzyHash.MATCH_THRESHOLD, String.valueOf(matchingSimilarity));
+
+ Map attributes = new HashMap<>();
+ attributes.put("fuzzyhash.value", "6:hERjIfhRrlB63J0FDw1NBQmEH68xwMSELN:hZrlB62IwMS");
+
+ runner.enqueue("bogus".getBytes(), attributes);
+ runner.run();
+
+ runner.assertQueueEmpty();
+ runner.assertAllFlowFilesTransferred(CompareFuzzyHash.REL_NOT_FOUND, 1);
+
+ final MockFlowFile outFile = runner.getFlowFilesForRelationship(CompareFuzzyHash.REL_NOT_FOUND).get(0);
+ }
+
+ @Test
+ public void testSsdeepCompareFuzzyHashWithInvalidHashList() {
+ // This is different from "BlankHashList series of tests in that the file lacks headers and as such is totally
+ // invalid
+ double matchingSimilarity = 80;
+ runner.setProperty(CompareFuzzyHash.HASH_ALGORITHM, CompareFuzzyHash.allowableValueSSDEEP.getValue());
+ runner.setProperty(CompareFuzzyHash.ATTRIBUTE_NAME, "fuzzyhash.value");
+ runner.setProperty(CompareFuzzyHash.HASH_LIST_FILE, "src/test/resources/empty.list");
+ runner.setProperty(CompareFuzzyHash.MATCH_THRESHOLD, String.valueOf(matchingSimilarity));
+
+ Map attributes = new HashMap<>();
+ attributes.put("fuzzyhash.value", "6:hERjIfhRrlB63J0FDw1NBQmEH68xwMSELN:hZrlB62IwMS");
+
+ runner.enqueue("bogus".getBytes(), attributes);
+ runner.run();
+
+ runner.assertQueueEmpty();
+ runner.assertAllFlowFilesTransferred(CompareFuzzyHash.REL_NOT_FOUND, 1);
+
+ final MockFlowFile outFile = runner.getFlowFilesForRelationship(CompareFuzzyHash.REL_NOT_FOUND).get(0);
+
+ outFile.assertAttributeNotExists("fuzzyhash.value.0.match");
+ }
+
+ @Test
+ public void testSsdeepCompareFuzzyHashWithInvalidHash() {
+ double matchingSimilarity = 80;
+ runner.setProperty(CompareFuzzyHash.HASH_ALGORITHM, CompareFuzzyHash.allowableValueSSDEEP.getValue());
+ runner.setProperty(CompareFuzzyHash.ATTRIBUTE_NAME, "fuzzyhash.value");
+ runner.setProperty(CompareFuzzyHash.HASH_LIST_FILE, "src/test/resources/ssdeep.list");
+ runner.setProperty(CompareFuzzyHash.MATCH_THRESHOLD, String.valueOf(matchingSimilarity));
+ runner.setProperty(CompareFuzzyHash.MATCHING_MODE, CompareFuzzyHash.singleMatch.getValue());
+
+ Map attributes = new HashMap<>();
+ attributes.put("fuzzyhash.value", "Test test test chocolate!");
+
+ runner.enqueue("bogus".getBytes(), attributes);
+ runner.run();
+
+ runner.assertQueueEmpty();
+ runner.assertAllFlowFilesTransferred(CompareFuzzyHash.REL_FAILURE, 1);
+
+ final MockFlowFile outFile = runner.getFlowFilesForRelationship(CompareFuzzyHash.REL_FAILURE).get(0);
+
+ outFile.assertAttributeNotExists("fuzzyhash.value.0.match");
+ }
+
+
+ @Test
+ public void testTLSHCompareFuzzyHash() {
+ double matchingSimilarity = 200;
+ runner.setProperty(CompareFuzzyHash.HASH_ALGORITHM, CompareFuzzyHash.allowableValueTLSH.getValue());
+ runner.setProperty(CompareFuzzyHash.ATTRIBUTE_NAME, "fuzzyhash.value");
+ runner.setProperty(CompareFuzzyHash.HASH_LIST_FILE, "src/test/resources/tlsh.list");
+ runner.setProperty(CompareFuzzyHash.MATCH_THRESHOLD, String.valueOf(matchingSimilarity));
+ runner.setProperty(CompareFuzzyHash.MATCHING_MODE, CompareFuzzyHash.singleMatch.getValue());
+
+ Map attributes = new HashMap<>();
+ attributes.put("fuzzyhash.value", tlshInput);
+
+ runner.enqueue("bogus".getBytes(), attributes);
+ runner.run();
+
+ runner.assertQueueEmpty();
+ runner.assertAllFlowFilesTransferred(CompareFuzzyHash.REL_FOUND, 1);
+
+ final MockFlowFile outFile = runner.getFlowFilesForRelationship(CompareFuzzyHash.REL_FOUND).get(0);
+
+ outFile.assertAttributeEquals(
+ "fuzzyhash.value.0.match",
+ "nifi-nar-bundles/nifi-lumberjack-bundle/nifi-lumberjack-processors/pom.xml"
+ );
+ double similarity = Double.valueOf(outFile.getAttribute("fuzzyhash.value.0.similarity"));
+ Assert.assertTrue(similarity <= matchingSimilarity);
+
+ outFile.assertAttributeNotExists("fuzzyhash.value.1.match");
+ }
+
+ @Test
+ public void testTLSHCompareFuzzyHashMultipleMatches() {
+ double matchingSimilarity = 200;
+ runner.setProperty(CompareFuzzyHash.HASH_ALGORITHM, CompareFuzzyHash.allowableValueTLSH.getValue());
+ runner.setProperty(CompareFuzzyHash.ATTRIBUTE_NAME, "fuzzyhash.value");
+ runner.setProperty(CompareFuzzyHash.HASH_LIST_FILE, "src/test/resources/tlsh.list");
+ runner.setProperty(CompareFuzzyHash.MATCH_THRESHOLD, String.valueOf(matchingSimilarity));
+ runner.setProperty(CompareFuzzyHash.MATCHING_MODE, CompareFuzzyHash.multiMatch.getValue());
+
+ Map attributes = new HashMap<>();
+ attributes.put("fuzzyhash.value", tlshInput);
+
+ runner.enqueue("bogus".getBytes(), attributes);
+ runner.run();
+
+ runner.assertQueueEmpty();
+ runner.assertAllFlowFilesTransferred(CompareFuzzyHash.REL_FOUND, 1);
+
+ final MockFlowFile outFile = runner.getFlowFilesForRelationship(CompareFuzzyHash.REL_FOUND).get(0);
+
+ outFile.assertAttributeEquals(
+ "fuzzyhash.value.0.match",
+ "nifi-nar-bundles/nifi-lumberjack-bundle/nifi-lumberjack-processors/pom.xml"
+ );
+ double similarity = Double.valueOf(outFile.getAttribute("fuzzyhash.value.0.similarity"));
+ Assert.assertTrue(similarity <= matchingSimilarity);
+
+ outFile.assertAttributeEquals(
+ "fuzzyhash.value.1.match",
+ "nifi-nar-bundles/nifi-beats-bundle/nifi-beats-processors/pom.xml"
+ );
+ similarity = Double.valueOf(outFile.getAttribute("fuzzyhash.value.1.similarity"));
+ Assert.assertTrue(similarity <= matchingSimilarity);
+ }
+
+
+ @Test
+ public void testTLSHCompareFuzzyHashWithBlankFile() {
+ // This is different from "BlankHashList series of tests in that the file lacks headers and as such is totally
+ // invalid
+ double matchingSimilarity = 200;
+ runner.setProperty(CompareFuzzyHash.HASH_ALGORITHM, CompareFuzzyHash.allowableValueTLSH.getValue());
+ runner.setProperty(CompareFuzzyHash.ATTRIBUTE_NAME, "fuzzyhash.value");
+ runner.setProperty(CompareFuzzyHash.HASH_LIST_FILE, "src/test/resources/empty.list");
+ runner.setProperty(CompareFuzzyHash.MATCH_THRESHOLD, String.valueOf(matchingSimilarity));
+
+ Map attributes = new HashMap<>();
+ attributes.put("fuzzyhash.value", "E2F0818B7AE7173906A72221570E30979B11C0FC47B518A1E89D257E2343CEC02381ED");
+
+ runner.enqueue("bogus".getBytes(), attributes);
+ runner.run();
+
+ runner.assertQueueEmpty();
+ runner.assertAllFlowFilesTransferred(CompareFuzzyHash.REL_NOT_FOUND, 1);
+
+ final MockFlowFile outFile = runner.getFlowFilesForRelationship(CompareFuzzyHash.REL_NOT_FOUND).get(0);
+
+ outFile.assertAttributeNotExists("fuzzyhash.value.0.match");
+ }
+
+ @Test
+ public void testTLSHCompareFuzzyHashWithEmptyHashList() {
+ double matchingSimilarity = 200;
+ runner.setProperty(CompareFuzzyHash.HASH_ALGORITHM, CompareFuzzyHash.allowableValueTLSH.getValue());
+ runner.setProperty(CompareFuzzyHash.ATTRIBUTE_NAME, "fuzzyhash.value");
+ runner.setProperty(CompareFuzzyHash.HASH_LIST_FILE, "src/test/resources/empty.list");
+ runner.setProperty(CompareFuzzyHash.MATCH_THRESHOLD, String.valueOf(matchingSimilarity));
+
+ Map attributes = new HashMap<>();
+ attributes.put("fuzzyhash.value", "E2F0818B7AE7173906A72221570E30979B11C0FC47B518A1E89D257E2343CEC02381ED");
+
+ runner.enqueue("bogus".getBytes(), attributes);
+ runner.run();
+
+ runner.assertQueueEmpty();
+ runner.assertAllFlowFilesTransferred(CompareFuzzyHash.REL_NOT_FOUND, 1);
+
+ final MockFlowFile outFile = runner.getFlowFilesForRelationship(CompareFuzzyHash.REL_NOT_FOUND).get(0);
+
+ outFile.assertAttributeNotExists("fuzzyhash.value.0.match");
+ }
+
+ @Test
+ public void testTLSHCompareFuzzyHashWithInvalidHash() {
+ double matchingSimilarity = 200;
+ runner.setProperty(CompareFuzzyHash.HASH_ALGORITHM, CompareFuzzyHash.allowableValueTLSH.getValue());
+ runner.setProperty(CompareFuzzyHash.ATTRIBUTE_NAME, "fuzzyhash.value");
+ runner.setProperty(CompareFuzzyHash.HASH_LIST_FILE, "src/test/resources/empty.list");
+ runner.setProperty(CompareFuzzyHash.MATCH_THRESHOLD, String.valueOf(matchingSimilarity));
+
+ Map attributes = new HashMap<>();
+ attributes.put("fuzzyhash.value", "Test test test chocolate");
+
+ runner.enqueue("bogus".getBytes(), attributes);
+ runner.run();
+
+ runner.assertQueueEmpty();
+ runner.assertAllFlowFilesTransferred(CompareFuzzyHash.REL_FAILURE, 1);
+
+ final MockFlowFile outFile = runner.getFlowFilesForRelationship(CompareFuzzyHash.REL_FAILURE).get(0);
+
+ outFile.assertAttributeNotExists("fuzzyhash.value.0.match");
+
+ }
+
+ @Test
+ public void testMissingAttribute() {
+ double matchingSimilarity = 200;
+ runner.setProperty(CompareFuzzyHash.HASH_ALGORITHM, CompareFuzzyHash.allowableValueTLSH.getValue());
+ runner.setProperty(CompareFuzzyHash.ATTRIBUTE_NAME, "fuzzyhash.value");
+ runner.setProperty(CompareFuzzyHash.HASH_LIST_FILE, "src/test/resources/empty.list");
+ runner.setProperty(CompareFuzzyHash.MATCH_THRESHOLD, String.valueOf(matchingSimilarity));
+ runner.setProperty(CompareFuzzyHash.MATCHING_MODE, CompareFuzzyHash.multiMatch.getValue());
+
+ runner.enqueue("bogus".getBytes());
+ runner.run();
+
+ runner.assertQueueEmpty();
+ runner.assertAllFlowFilesTransferred(CompareFuzzyHash.REL_FAILURE, 1);
+
+ final MockFlowFile outFile = runner.getFlowFilesForRelationship(CompareFuzzyHash.REL_FAILURE).get(0);
+
+ outFile.assertAttributeNotExists("fuzzyhash.value.0.match");
+ }
+
+ @Test
+ public void testAttributeIsEmptyString() {
+ double matchingSimilarity = 200;
+ runner.setProperty(CompareFuzzyHash.HASH_ALGORITHM, CompareFuzzyHash.allowableValueTLSH.getValue());
+ runner.setProperty(CompareFuzzyHash.ATTRIBUTE_NAME, "fuzzyhash.value");
+ runner.setProperty(CompareFuzzyHash.HASH_LIST_FILE, "src/test/resources/empty.list");
+ runner.setProperty(CompareFuzzyHash.MATCH_THRESHOLD, String.valueOf(matchingSimilarity));
+ runner.setProperty(CompareFuzzyHash.MATCHING_MODE, CompareFuzzyHash.multiMatch.getValue());
+
+ Map attributes = new HashMap<>();
+ attributes.put("fuzzyhash.value", "");
+ runner.enqueue("bogus".getBytes(), attributes);
+
+ runner.run();
+
+ runner.assertQueueEmpty();
+ runner.assertAllFlowFilesTransferred(CompareFuzzyHash.REL_FAILURE, 1);
+
+ final MockFlowFile outFile = runner.getFlowFilesForRelationship(CompareFuzzyHash.REL_FAILURE).get(0);
+
+ outFile.assertAttributeNotExists("fuzzyhash.value.0.match");
+ }
+
+ @Test
+ public void testlooksLikeSpamSum() {
+ FuzzyHashMatcher matcher = new SSDeepHashMatcher();
+
+ List invalidPayloads = Arrays.asList(
+ "4AD:c1xs8Z/m6H0eRH31S8p8bHENANkPrNy4tkPytwPyh2jTytxPythPytNdPytDgYyF:OuO/mg3HFSRHEb44RNMi6uHU2hcq3", // invalidFirstField
+ ":c1xs8Z/m6H0eRH31S8p8bHENANkPrNy4tkPytwPyh2jTytxPythPytNdPytDgYyF:OuO/mg3HFSRHEb44RNMi6uHU2hcq3", // emptyFirstField
+ "48::OuO/mg3HFSRHEb44RNMi6uHU2hcq3", // emptySecondField
+ "48:c1xs8Z/m6H0eRH31S8p8bHENANkPrNy4tkPytwPyh2jTytxPythPytNdPytDgYyF:", // emptyThirdField
+ "48:c1xs8Z/m6H0eRH31S8p8bHENANkPrNy4tkPytwPyh2jTytxPythPytNdPytDgYyF", // withoutThirdField
+ "c1xs8Z/m6H0eRH31S8p8bHENANkPrNy4tkPytwPyh2jTytxPythPytNdPytDgYyF" // Just a simple string
+ );
+
+ for (String item : invalidPayloads) {
+ Assert.assertTrue("item '" + item + "' should have failed validation", !matcher.isValidHash(item));
+ }
+
+ // Now test with a valid string
+ Assert.assertTrue(matcher.isValidHash(ssdeepInput));
+
+ }
+}
diff --git a/nifi-nar-bundles/nifi-cybersecurity-bundle/nifi-cybersecurity-processors/src/test/resources/blank_ssdeep.list b/nifi-nar-bundles/nifi-cybersecurity-bundle/nifi-cybersecurity-processors/src/test/resources/blank_ssdeep.list
new file mode 100644
index 0000000000..0393719323
--- /dev/null
+++ b/nifi-nar-bundles/nifi-cybersecurity-bundle/nifi-cybersecurity-processors/src/test/resources/blank_ssdeep.list
@@ -0,0 +1 @@
+ssdeep,1.0--blocksize:hash:hash,filename
\ No newline at end of file
diff --git a/nifi-nar-bundles/nifi-cybersecurity-bundle/nifi-cybersecurity-processors/src/test/resources/empty.list b/nifi-nar-bundles/nifi-cybersecurity-bundle/nifi-cybersecurity-processors/src/test/resources/empty.list
new file mode 100644
index 0000000000..e69de29bb2
diff --git a/nifi-nar-bundles/nifi-cybersecurity-bundle/nifi-cybersecurity-processors/src/test/resources/ssdeep.list b/nifi-nar-bundles/nifi-cybersecurity-bundle/nifi-cybersecurity-processors/src/test/resources/ssdeep.list
new file mode 100644
index 0000000000..05b505e22f
--- /dev/null
+++ b/nifi-nar-bundles/nifi-cybersecurity-bundle/nifi-cybersecurity-processors/src/test/resources/ssdeep.list
@@ -0,0 +1,11 @@
+ssdeep,1.0--blocksize:hash:hash,filename
+96:KQhaGCVZGhr83h3bc0ok3892m12wzgnH5w2pw+sxNEI58:FIVkH4x73h39LH+2w+sxaD,"config.h"
+96:EQOJvOl4ab3hhiNFXc4wwcweomr0cNJDBoqXjmAHKX8dEt001nfEhVIuX0dDcs:3mzpAsZpprbshfu3oujjdENdp21,"doc\README"
+48:c1xs8Z/m6H0eRH31S8p8bHENRNkPSNy4tkPytwPytyYytxPythPytNdPytDgYyse:OuO/mg3HFSRHE+H4RNc6uHU2hqoMkh
+96,MD9fHjsEuddrg31904l8bgx5ROg2MQZHZqpAlycowOsexbHDbk:MJwz/l2PqGqqbr2yk6pVgrwPV,"Whatever.txt-INVALID-DUE-TO-COMMA-AFTER-96"
+48:c1xs8Z/m6H0eRH31S8p8bHENANkPrNy4tkPytwPyh2jTytxPythPytNdPytDgYyF:OuO/mg3HFSRHEb44RNMi6uHU2hcq3,"nifi/nifi-nar-bundles/nifi-beats-bundle/nifi-beats-processors/pom.xml"
+6:hERjIfhRrlB63J0FDw1NBQmEH68xwMSELN:hZrlB62IwMS,"c:\this_is_valid_but_should_not_match"
+96:MD9fHjsEuddrg31904l8bgx5ROg2MQZHZqpAlycowOsexbHDbk:MJwz/l2PqGqqbr2yk6pVgrwPV,"INSTALL"
+48:c1xs8Z/m6H0eRH31S8p8bHENRNkPSNy4tkPytwPytyYytxPythPytNdPytDgYyse:OuO/mg3HFSRHE+H4RNc6uHU2hqoMkh,"nifi/nifi-nar-bundles/nifi-lumberjack-bundle/nifi-lumberjack-processors/pom.xml"
+48:c1xs8Z/m6H0eRH31S8p8bHENRNkPSNy4tkPytwPytyYytxPythPytNdPytDgYyse:OuO/mg3HFSRHE+H4RNc6uHU2hqoMkh,
+
diff --git a/nifi-nar-bundles/nifi-cybersecurity-bundle/nifi-cybersecurity-processors/src/test/resources/tlsh.list b/nifi-nar-bundles/nifi-cybersecurity-bundle/nifi-cybersecurity-processors/src/test/resources/tlsh.list
new file mode 100644
index 0000000000..0f31c8ff63
--- /dev/null
+++ b/nifi-nar-bundles/nifi-cybersecurity-bundle/nifi-cybersecurity-processors/src/test/resources/tlsh.list
@@ -0,0 +1,9 @@
+A4518DA4A8F9517162A409C1DEEA9872AF55C137E00A62C9F0CDD0CE4F6CCD784BB4B7 nifi-nar-bundles/nifi-lumberjack-bundle/nifi-lumberjack-processors/pom.xml
+ THERE SEEMS TO BE SOMETHING MISSING
+6FF02BEF718027B0160B4391212923ED7F1A463D563B1549B86CF62973B197AD2731F Synthetic shorter-INVALID
+EB519EA4A8F95171A2A409C1DEEB9872AF55C137E00A5289F1CCD0CE4F6CCD784BB4B7
+E2F0818B7AE7173906A72221570E30979B11C0FC47B518A1E89D257E2343CEC02381ED /this/is/also/valid/but/should/not/match
+EB519EA4A8F95171A2A409C1DEEB9872AF55C137E00A5289F1CCD0CE4F6CCD784BB4B7 nifi-nar-bundles/nifi-beats-bundle/nifi-beats-processors/pom.xml
+EB519EA4A8F95171A2A409C1DEEB9872AF55C137E00A5289F1CCD0CE4F6CCD784BB4B7
+
+6FF02BEF718027B0160B4391212923ED7F1A463D563B1549B86CF62973B197AD2731F8 /this/is/valid/but/should/not/match
diff --git a/pom.xml b/pom.xml
index 53b46a17e3..9ddaf53cf0 100644
--- a/pom.xml
+++ b/pom.xml
@@ -1293,6 +1293,12 @@
1.2.0-SNAPSHOTnar
+
+ org.apache.nifi
+ nifi-cybersecurity-nar
+ 1.2.0-SNAPSHOT
+ nar
+ org.apache.nifinifi-email-nar