mirror of https://github.com/apache/nifi.git
NIFI-3726 - Introduces CompareFuzzyHash processor
- Abstract FuzzyhashContent to reduce a bit of code duplication NIFI-3726 - Attempt to address peer review comments NIFI-3726 - Addresses additional PR comments NIFI-3726 - Fix checkstyle violations NIFI-3726 - Fix issues with POMs NIFI-3726 - Fixes the logging level to error in case of failure due to message too short NIFI-3726 - Capture ArrayIndexOutOfBoundsException when consuming a hash source that matches but lacks a separator AND filename or matches but lacks a filename(i.e. ends with trailing separator) Signed-off-by: Matt Burgess <mattyb149@apache.org> This closes #1692
This commit is contained in:
parent
960ef9142d
commit
54d47c7f74
|
@ -359,6 +359,11 @@
|
||||||
<artifactId>nifi-beats-nar</artifactId>
|
<artifactId>nifi-beats-nar</artifactId>
|
||||||
<type>nar</type>
|
<type>nar</type>
|
||||||
</dependency>
|
</dependency>
|
||||||
|
<dependency>
|
||||||
|
<groupId>org.apache.nifi</groupId>
|
||||||
|
<artifactId>nifi-cybersecurity-nar</artifactId>
|
||||||
|
<type>nar</type>
|
||||||
|
</dependency>
|
||||||
<dependency>
|
<dependency>
|
||||||
<groupId>org.apache.nifi</groupId>
|
<groupId>org.apache.nifi</groupId>
|
||||||
<artifactId>nifi-email-nar</artifactId>
|
<artifactId>nifi-email-nar</artifactId>
|
||||||
|
|
|
@ -44,12 +44,6 @@
|
||||||
<artifactId>tlsh</artifactId>
|
<artifactId>tlsh</artifactId>
|
||||||
<version>1.0.0</version>
|
<version>1.0.0</version>
|
||||||
</dependency>
|
</dependency>
|
||||||
<dependency>
|
|
||||||
<groupId>org.apache.nifi</groupId>
|
|
||||||
<artifactId>nifi-standard-processors</artifactId>
|
|
||||||
<version>1.2.0-SNAPSHOT</version>
|
|
||||||
<scope>provided</scope>
|
|
||||||
</dependency>
|
|
||||||
<dependency>
|
<dependency>
|
||||||
<groupId>org.apache.nifi</groupId>
|
<groupId>org.apache.nifi</groupId>
|
||||||
<artifactId>nifi-mock</artifactId>
|
<artifactId>nifi-mock</artifactId>
|
||||||
|
@ -65,5 +59,26 @@
|
||||||
<artifactId>junit</artifactId>
|
<artifactId>junit</artifactId>
|
||||||
<scope>test</scope>
|
<scope>test</scope>
|
||||||
</dependency>
|
</dependency>
|
||||||
|
<dependency>
|
||||||
|
<groupId>org.apache.nifi</groupId>
|
||||||
|
<artifactId>nifi-properties</artifactId>
|
||||||
|
<scope>compile</scope>
|
||||||
|
</dependency>
|
||||||
</dependencies>
|
</dependencies>
|
||||||
|
<build>
|
||||||
|
<plugins>
|
||||||
|
<plugin>
|
||||||
|
<groupId>org.apache.rat</groupId>
|
||||||
|
<artifactId>apache-rat-plugin</artifactId>
|
||||||
|
<configuration>
|
||||||
|
<excludes combine.children="append">
|
||||||
|
<exclude>src/test/resources/blank_ssdeep.list</exclude>
|
||||||
|
<exclude>src/test/resources/empty.list</exclude>
|
||||||
|
<exclude>src/test/resources/ssdeep.list</exclude>
|
||||||
|
<exclude>src/test/resources/tlsh.list</exclude>
|
||||||
|
</excludes>
|
||||||
|
</configuration>
|
||||||
|
</plugin>
|
||||||
|
</plugins>
|
||||||
|
</build>
|
||||||
</project>
|
</project>
|
||||||
|
|
|
@ -0,0 +1,88 @@
|
||||||
|
/*
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||||
|
* contributor license agreements. See the NOTICE file distributed with
|
||||||
|
* this work for additional information regarding copyright ownership.
|
||||||
|
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||||
|
* (the "License"); you may not use this file except in compliance with
|
||||||
|
* the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
package org.apache.nifi.processors.cybersecurity;
|
||||||
|
|
||||||
|
|
||||||
|
import com.idealista.tlsh.TLSH;
|
||||||
|
import info.debatty.java.spamsum.SpamSum;
|
||||||
|
import org.apache.nifi.components.AllowableValue;
|
||||||
|
import org.apache.nifi.components.PropertyDescriptor;
|
||||||
|
import org.apache.nifi.flowfile.FlowFile;
|
||||||
|
import org.apache.nifi.processor.AbstractProcessor;
|
||||||
|
import org.apache.nifi.processor.Relationship;
|
||||||
|
import org.apache.nifi.processor.util.StandardValidators;
|
||||||
|
|
||||||
|
import java.util.List;
|
||||||
|
import java.util.Set;
|
||||||
|
|
||||||
|
abstract class AbstractFuzzyHashProcessor extends AbstractProcessor {
|
||||||
|
final protected static String ssdeep = "ssdeep";
|
||||||
|
final protected static String tlsh = "tlsh";
|
||||||
|
|
||||||
|
public static final AllowableValue allowableValueSSDEEP = new AllowableValue(
|
||||||
|
ssdeep,
|
||||||
|
ssdeep,
|
||||||
|
"Uses ssdeep / SpamSum 'context triggered piecewise hash'.");
|
||||||
|
public static final AllowableValue allowableValueTLSH = new AllowableValue(
|
||||||
|
tlsh,
|
||||||
|
tlsh,
|
||||||
|
"Uses TLSH (Trend 'Locality Sensitive Hash'). Note: FlowFile Content must be at least 512 characters long");
|
||||||
|
|
||||||
|
public static final PropertyDescriptor ATTRIBUTE_NAME = new PropertyDescriptor.Builder()
|
||||||
|
.name("ATTRIBUTE_NAME")
|
||||||
|
.displayName("Hash Attribute Name")
|
||||||
|
.description("The name of the FlowFile Attribute that should hold the Fuzzy Hash Value")
|
||||||
|
.required(true)
|
||||||
|
.addValidator(StandardValidators.NON_EMPTY_VALIDATOR)
|
||||||
|
.defaultValue("fuzzyhash.value")
|
||||||
|
.build();
|
||||||
|
|
||||||
|
public static final PropertyDescriptor HASH_ALGORITHM = new PropertyDescriptor.Builder()
|
||||||
|
.name("HASH_ALGORITHM")
|
||||||
|
.displayName("Hashing Algorithm")
|
||||||
|
.description("The hashing algorithm utilised")
|
||||||
|
.allowableValues(allowableValueSSDEEP, allowableValueTLSH)
|
||||||
|
.required(true)
|
||||||
|
.addValidator(StandardValidators.NON_EMPTY_VALIDATOR)
|
||||||
|
.build();
|
||||||
|
|
||||||
|
|
||||||
|
protected List<PropertyDescriptor> descriptors;
|
||||||
|
|
||||||
|
protected Set<Relationship> relationships;
|
||||||
|
|
||||||
|
protected boolean checkMinimumAlgorithmRequirements(String algorithm, FlowFile flowFile) {
|
||||||
|
// Check if content matches minimum length requirement
|
||||||
|
if (algorithm.equals(tlsh) && flowFile.getSize() < 512 ) {
|
||||||
|
return false;
|
||||||
|
} else {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
protected String generateHash(String algorithm, String content) {
|
||||||
|
switch (algorithm) {
|
||||||
|
case tlsh:
|
||||||
|
return new TLSH(content).hash();
|
||||||
|
case ssdeep:
|
||||||
|
return new SpamSum().HashString(content);
|
||||||
|
default:
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,277 @@
|
||||||
|
/*
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||||
|
* contributor license agreements. See the NOTICE file distributed with
|
||||||
|
* this work for additional information regarding copyright ownership.
|
||||||
|
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||||
|
* (the "License"); you may not use this file except in compliance with
|
||||||
|
* the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
package org.apache.nifi.processors.cybersecurity;
|
||||||
|
|
||||||
|
import org.apache.nifi.annotation.behavior.EventDriven;
|
||||||
|
import org.apache.nifi.annotation.behavior.InputRequirement;
|
||||||
|
import org.apache.nifi.annotation.behavior.SideEffectFree;
|
||||||
|
import org.apache.nifi.annotation.behavior.SupportsBatching;
|
||||||
|
import org.apache.nifi.annotation.behavior.WritesAttribute;
|
||||||
|
import org.apache.nifi.annotation.behavior.WritesAttributes;
|
||||||
|
import org.apache.nifi.annotation.documentation.CapabilityDescription;
|
||||||
|
import org.apache.nifi.annotation.documentation.SeeAlso;
|
||||||
|
import org.apache.nifi.annotation.documentation.Tags;
|
||||||
|
import org.apache.nifi.annotation.lifecycle.OnScheduled;
|
||||||
|
import org.apache.nifi.components.AllowableValue;
|
||||||
|
import org.apache.nifi.components.PropertyDescriptor;
|
||||||
|
import org.apache.nifi.flowfile.FlowFile;
|
||||||
|
import org.apache.nifi.logging.ComponentLog;
|
||||||
|
import org.apache.nifi.processor.ProcessContext;
|
||||||
|
import org.apache.nifi.processor.ProcessSession;
|
||||||
|
import org.apache.nifi.processor.ProcessorInitializationContext;
|
||||||
|
import org.apache.nifi.processor.Relationship;
|
||||||
|
import org.apache.nifi.processor.exception.ProcessException;
|
||||||
|
import org.apache.nifi.processor.util.StandardValidators;
|
||||||
|
import org.apache.nifi.processors.cybersecurity.matchers.FuzzyHashMatcher;
|
||||||
|
import org.apache.nifi.processors.cybersecurity.matchers.SSDeepHashMatcher;
|
||||||
|
import org.apache.nifi.processors.cybersecurity.matchers.TLSHHashMatcher;
|
||||||
|
import org.apache.nifi.util.StringUtils;
|
||||||
|
|
||||||
|
import java.io.BufferedReader;
|
||||||
|
import java.io.IOException;
|
||||||
|
import java.util.ArrayList;
|
||||||
|
import java.util.Collections;
|
||||||
|
import java.util.HashSet;
|
||||||
|
import java.util.List;
|
||||||
|
import java.util.Map;
|
||||||
|
import java.util.Set;
|
||||||
|
import java.util.concurrent.ConcurrentHashMap;
|
||||||
|
|
||||||
|
|
||||||
|
@EventDriven
|
||||||
|
@SideEffectFree
|
||||||
|
@SupportsBatching
|
||||||
|
@InputRequirement(InputRequirement.Requirement.INPUT_REQUIRED)
|
||||||
|
@SeeAlso({FuzzyHashContent.class})
|
||||||
|
@Tags({"hashing", "fuzzy-hashing", "cyber-security"})
|
||||||
|
@CapabilityDescription("Compares an attribute containing a Fuzzy Hash against a file containing a list of fuzzy hashes, " +
|
||||||
|
"appending an attribute to the FlowFile in case of a successful match.")
|
||||||
|
|
||||||
|
@WritesAttributes({
|
||||||
|
@WritesAttribute(attribute = "XXXX.N.match", description = "The match that resembles the attribute specified " +
|
||||||
|
"by the <Hash Attribute Name> property. Note that: 'XXX' gets replaced with the <Hash Attribute Name>"),
|
||||||
|
@WritesAttribute(attribute = "XXXX.N.similarity", description = "The similarity score between this flowfile" +
|
||||||
|
"and its match of the same number N. Note that: 'XXX' gets replaced with the <Hash Attribute Name>")})
|
||||||
|
|
||||||
|
public class CompareFuzzyHash extends AbstractFuzzyHashProcessor {
|
||||||
|
public static final AllowableValue singleMatch = new AllowableValue(
|
||||||
|
"single",
|
||||||
|
"single",
|
||||||
|
"Send FlowFile to matched after the first match above threshold");
|
||||||
|
public static final AllowableValue multiMatch = new AllowableValue(
|
||||||
|
"multi-match",
|
||||||
|
"multi-match",
|
||||||
|
"Iterate full list of hashes before deciding to send FlowFile to matched or unmatched");
|
||||||
|
|
||||||
|
public static final PropertyDescriptor HASH_LIST_FILE = new PropertyDescriptor.Builder()
|
||||||
|
.name("HASH_LIST_FILE")
|
||||||
|
.displayName("Hash List source file")
|
||||||
|
.description("Path to the file containing hashes to be validated against")
|
||||||
|
.required(true)
|
||||||
|
.addValidator(StandardValidators.FILE_EXISTS_VALIDATOR)
|
||||||
|
.build();
|
||||||
|
|
||||||
|
// Note we add a PropertyDescriptor HASH_ALGORITHM and ATTRIBUTE_NAME from parent class
|
||||||
|
|
||||||
|
public static final PropertyDescriptor MATCH_THRESHOLD = new PropertyDescriptor.Builder()
|
||||||
|
// Note that while both TLSH and SSDeep seems to return int, we treat them as double in code.
|
||||||
|
// The rationale behind being the expectation that other algorithms thatmay return double values
|
||||||
|
// may be added to the processor later on.
|
||||||
|
.name("MATCH_THRESHOLD")
|
||||||
|
.displayName("Match threshold")
|
||||||
|
.description("The similarity score must exceed or be equal to in order for" +
|
||||||
|
"match to be considered true. Refer to Additional Information for differences between TLSH " +
|
||||||
|
"and SSDEEP scores and how they relate to this property.")
|
||||||
|
.required(true)
|
||||||
|
.addValidator(StandardValidators.NUMBER_VALIDATOR)
|
||||||
|
.build();
|
||||||
|
|
||||||
|
public static final PropertyDescriptor MATCHING_MODE = new PropertyDescriptor.Builder()
|
||||||
|
.name("MATCHING_MODE")
|
||||||
|
.displayName("Matching mode")
|
||||||
|
.description("Defines if the Processor should try to match as many entries as possible (" + multiMatch.getDisplayName() +
|
||||||
|
") or if it should stop after the first match (" + singleMatch.getDisplayName() + ")")
|
||||||
|
.required(true)
|
||||||
|
.allowableValues(singleMatch,multiMatch)
|
||||||
|
.defaultValue(singleMatch.getValue())
|
||||||
|
.build();
|
||||||
|
|
||||||
|
public static final Relationship REL_FOUND = new Relationship.Builder()
|
||||||
|
.name("found")
|
||||||
|
.description("Any FlowFile that is successfully matched to an existing hash will be sent to this Relationship.")
|
||||||
|
.build();
|
||||||
|
|
||||||
|
public static final Relationship REL_NOT_FOUND = new Relationship.Builder()
|
||||||
|
.name("not-found")
|
||||||
|
.description("Any FlowFile that cannot be matched to an existing hash will be sent to this Relationship.")
|
||||||
|
.build();
|
||||||
|
|
||||||
|
public static final Relationship REL_FAILURE = new Relationship.Builder()
|
||||||
|
.name("failure")
|
||||||
|
.description("Any FlowFile that cannot be matched, e.g. (lacks the attribute) will be sent to this Relationship.")
|
||||||
|
.build();
|
||||||
|
|
||||||
|
@Override
|
||||||
|
protected void init(final ProcessorInitializationContext context) {
|
||||||
|
final List<PropertyDescriptor> descriptors = new ArrayList<PropertyDescriptor>();
|
||||||
|
descriptors.add(HASH_LIST_FILE);
|
||||||
|
// As mentioned above, add the PropertyDescriptor HASH_ALGORITHM and ATTRIBUTE_NAME from parent class
|
||||||
|
descriptors.add(HASH_ALGORITHM);
|
||||||
|
descriptors.add(ATTRIBUTE_NAME);
|
||||||
|
descriptors.add(MATCH_THRESHOLD);
|
||||||
|
descriptors.add(MATCHING_MODE);
|
||||||
|
this.descriptors = Collections.unmodifiableList(descriptors);
|
||||||
|
|
||||||
|
final Set<Relationship> relationships = new HashSet<Relationship>();
|
||||||
|
relationships.add(REL_FOUND);
|
||||||
|
relationships.add(REL_NOT_FOUND);
|
||||||
|
relationships.add(REL_FAILURE);
|
||||||
|
this.relationships = Collections.unmodifiableSet(relationships);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public Set<Relationship> getRelationships() {
|
||||||
|
return this.relationships;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public final List<PropertyDescriptor> getSupportedPropertyDescriptors() {
|
||||||
|
return descriptors;
|
||||||
|
}
|
||||||
|
|
||||||
|
@OnScheduled
|
||||||
|
public void onScheduled(final ProcessContext context) {
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void onTrigger(ProcessContext context, ProcessSession session) throws ProcessException {
|
||||||
|
|
||||||
|
FlowFile flowFile = session.get();
|
||||||
|
if (flowFile == null) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
final ComponentLog logger = getLogger();
|
||||||
|
String algorithm = context.getProperty(HASH_ALGORITHM).getValue();
|
||||||
|
|
||||||
|
final String attributeName = context.getProperty(ATTRIBUTE_NAME).getValue();
|
||||||
|
String inputHash = flowFile.getAttribute(attributeName);
|
||||||
|
|
||||||
|
if (inputHash == null) {
|
||||||
|
getLogger().info("FlowFile {} lacks the required '{}' attribute, routing to failure.",
|
||||||
|
new Object[]{flowFile, attributeName});
|
||||||
|
session.transfer(flowFile, REL_FAILURE);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
FuzzyHashMatcher fuzzyHashMatcher = null;
|
||||||
|
|
||||||
|
switch (algorithm) {
|
||||||
|
case tlsh:
|
||||||
|
fuzzyHashMatcher = new TLSHHashMatcher(getLogger());
|
||||||
|
break;
|
||||||
|
case ssdeep:
|
||||||
|
fuzzyHashMatcher = new SSDeepHashMatcher(getLogger());
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
getLogger().error("Seems like the processor is configured to use unsupported algorithm '{}' ? Yielding.",
|
||||||
|
new Object[]{algorithm});
|
||||||
|
context.yield();
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (fuzzyHashMatcher.isValidHash(inputHash) == false) {
|
||||||
|
// and if that is the case we log
|
||||||
|
logger.error("Invalid hash provided. Sending to failure");
|
||||||
|
// and send to failure
|
||||||
|
session.transfer(flowFile, REL_FAILURE);
|
||||||
|
session.commit();
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
double similarity = 0;
|
||||||
|
double matchThreshold = context.getProperty(MATCH_THRESHOLD).asDouble();
|
||||||
|
|
||||||
|
try {
|
||||||
|
Map<String, Double> matched = new ConcurrentHashMap<String, Double>();
|
||||||
|
|
||||||
|
BufferedReader reader = fuzzyHashMatcher.getReader(context.getProperty(HASH_LIST_FILE).getValue());
|
||||||
|
|
||||||
|
String line = null;
|
||||||
|
|
||||||
|
iterateFile: while ((line = reader.readLine()) != null) {
|
||||||
|
if (line != null) {
|
||||||
|
similarity = fuzzyHashMatcher.getSimilarity(inputHash, line);
|
||||||
|
|
||||||
|
if (fuzzyHashMatcher.matchExceedsThreshold(similarity, matchThreshold)) {
|
||||||
|
String match = fuzzyHashMatcher.getMatch(line);
|
||||||
|
// A malformed file may cause a match with no filename
|
||||||
|
// Because this would simply look odd, we ignore such entry and log
|
||||||
|
if (!StringUtils.isEmpty(match)) {
|
||||||
|
matched.put(match, similarity);
|
||||||
|
} else {
|
||||||
|
logger.error("Found a match against a malformed entry '{}'. Please inspect the contents of" +
|
||||||
|
"the {} file and ensure they are properly formatted",
|
||||||
|
new Object[]{line, HASH_LIST_FILE.getDisplayName()});
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Check if single match is desired and if a match has been made
|
||||||
|
if (context.getProperty(MATCHING_MODE).getValue() == singleMatch.getValue() && (matched.size() > 0)) {
|
||||||
|
// and save time by breaking the outer loop
|
||||||
|
break iterateFile;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
// no matter if the break was called or not, Continue processing
|
||||||
|
// First by creating a new map to hold attributes
|
||||||
|
Map<String, String> attributes = new ConcurrentHashMap<String, String>();
|
||||||
|
|
||||||
|
// Then by iterating over the hashmap of matches
|
||||||
|
if (matched.size() > 0) {
|
||||||
|
int x = 0;
|
||||||
|
for (Map.Entry<String, Double> entry : matched.entrySet()) {
|
||||||
|
// defining attributes accordingly
|
||||||
|
attributes.put(
|
||||||
|
attributeName + "." + x + ".match",
|
||||||
|
entry.getKey());
|
||||||
|
attributes.put(
|
||||||
|
attributeName + "." + x + ".similarity",
|
||||||
|
String.valueOf(entry.getValue()));
|
||||||
|
x++;
|
||||||
|
}
|
||||||
|
// Finally, append the attributes to the flowfile and sent to match
|
||||||
|
flowFile = session.putAllAttributes(flowFile, attributes);
|
||||||
|
session.transfer(flowFile, REL_FOUND);
|
||||||
|
session.commit();
|
||||||
|
return;
|
||||||
|
} else {
|
||||||
|
// Otherwise send it to non-match
|
||||||
|
session.transfer(flowFile, REL_NOT_FOUND);
|
||||||
|
session.commit();
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
} catch (IOException e) {
|
||||||
|
logger.error("Error while reading the hash input source" );
|
||||||
|
context.yield();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
}
|
|
@ -16,16 +16,12 @@
|
||||||
*/
|
*/
|
||||||
package org.apache.nifi.processors.cybersecurity;
|
package org.apache.nifi.processors.cybersecurity;
|
||||||
|
|
||||||
import com.idealista.tlsh.TLSH;
|
|
||||||
import com.idealista.tlsh.exceptions.InsufficientComplexityException;
|
import com.idealista.tlsh.exceptions.InsufficientComplexityException;
|
||||||
import info.debatty.java.spamsum.SpamSum;
|
|
||||||
|
|
||||||
import org.apache.nifi.annotation.behavior.EventDriven;
|
import org.apache.nifi.annotation.behavior.EventDriven;
|
||||||
import org.apache.nifi.annotation.behavior.InputRequirement;
|
import org.apache.nifi.annotation.behavior.InputRequirement;
|
||||||
import org.apache.nifi.annotation.behavior.SideEffectFree;
|
import org.apache.nifi.annotation.behavior.SideEffectFree;
|
||||||
import org.apache.nifi.annotation.behavior.SupportsBatching;
|
import org.apache.nifi.annotation.behavior.SupportsBatching;
|
||||||
import org.apache.nifi.annotation.behavior.ReadsAttribute;
|
|
||||||
import org.apache.nifi.annotation.behavior.ReadsAttributes;
|
|
||||||
import org.apache.nifi.annotation.behavior.WritesAttribute;
|
import org.apache.nifi.annotation.behavior.WritesAttribute;
|
||||||
import org.apache.nifi.annotation.behavior.WritesAttributes;
|
import org.apache.nifi.annotation.behavior.WritesAttributes;
|
||||||
import org.apache.nifi.annotation.documentation.CapabilityDescription;
|
import org.apache.nifi.annotation.documentation.CapabilityDescription;
|
||||||
|
@ -33,19 +29,17 @@ import org.apache.nifi.annotation.documentation.SeeAlso;
|
||||||
import org.apache.nifi.annotation.documentation.Tags;
|
import org.apache.nifi.annotation.documentation.Tags;
|
||||||
import org.apache.nifi.annotation.lifecycle.OnScheduled;
|
import org.apache.nifi.annotation.lifecycle.OnScheduled;
|
||||||
|
|
||||||
import org.apache.nifi.components.AllowableValue;
|
|
||||||
import org.apache.nifi.components.PropertyDescriptor;
|
import org.apache.nifi.components.PropertyDescriptor;
|
||||||
import org.apache.nifi.flowfile.FlowFile;
|
import org.apache.nifi.flowfile.FlowFile;
|
||||||
import org.apache.nifi.logging.ComponentLog;
|
import org.apache.nifi.logging.ComponentLog;
|
||||||
import org.apache.nifi.processor.exception.ProcessException;
|
import org.apache.nifi.processor.exception.ProcessException;
|
||||||
import org.apache.nifi.processor.AbstractProcessor;
|
|
||||||
import org.apache.nifi.processor.ProcessContext;
|
import org.apache.nifi.processor.ProcessContext;
|
||||||
import org.apache.nifi.processor.ProcessSession;
|
import org.apache.nifi.processor.ProcessSession;
|
||||||
import org.apache.nifi.processor.ProcessorInitializationContext;
|
import org.apache.nifi.processor.ProcessorInitializationContext;
|
||||||
import org.apache.nifi.processor.Relationship;
|
import org.apache.nifi.processor.Relationship;
|
||||||
import org.apache.nifi.processor.io.InputStreamCallback;
|
import org.apache.nifi.processor.io.InputStreamCallback;
|
||||||
import org.apache.nifi.processor.util.StandardValidators;
|
import org.apache.nifi.processor.util.StandardValidators;
|
||||||
import org.apache.nifi.processors.standard.HashContent;
|
import org.apache.nifi.util.StringUtils;
|
||||||
|
|
||||||
import org.apache.nifi.stream.io.StreamUtils;
|
import org.apache.nifi.stream.io.StreamUtils;
|
||||||
|
|
||||||
|
@ -73,31 +67,13 @@ import java.util.concurrent.atomic.AtomicReference;
|
||||||
"evaluations in memory. Accordingly, it is important to consider the anticipated profile of content being " +
|
"evaluations in memory. Accordingly, it is important to consider the anticipated profile of content being " +
|
||||||
"evaluated by this processor and the hardware supporting it especially when working against large files.")
|
"evaluated by this processor and the hardware supporting it especially when working against large files.")
|
||||||
|
|
||||||
@SeeAlso({HashContent.class})
|
@SeeAlso(classNames = {"org.apache.nifi.processors.standard.HashContent"}, value = {CompareFuzzyHash.class})
|
||||||
@ReadsAttributes({@ReadsAttribute(attribute="", description="")})
|
|
||||||
@WritesAttributes({@WritesAttribute(attribute = "<Hash Attribute Name>", description = "This Processor adds an attribute whose value is the result of Hashing the "
|
@WritesAttributes({@WritesAttribute(attribute = "<Hash Attribute Name>", description = "This Processor adds an attribute whose value is the result of Hashing the "
|
||||||
+ "existing FlowFile content. The name of this attribute is specified by the <Hash Attribute Name> property")})
|
+ "existing FlowFile content. The name of this attribute is specified by the <Hash Attribute Name> property")})
|
||||||
|
|
||||||
public class FuzzyHashContent extends AbstractProcessor {
|
public class FuzzyHashContent extends AbstractFuzzyHashProcessor {
|
||||||
|
|
||||||
public static final AllowableValue allowableValueSSDEEP = new AllowableValue(
|
|
||||||
"ssdeep",
|
|
||||||
"ssdeep",
|
|
||||||
"Uses ssdeep / SpamSum 'context triggered piecewise hash'.");
|
|
||||||
public static final AllowableValue allowableValueTLSH = new AllowableValue(
|
|
||||||
"tlsh",
|
|
||||||
"tlsh",
|
|
||||||
"Uses TLSH (Trend 'Locality Sensitive Hash'). Note: FlowFile Content must be at least 512 characters long");
|
|
||||||
|
|
||||||
public static final PropertyDescriptor ATTRIBUTE_NAME = new PropertyDescriptor.Builder()
|
|
||||||
.name("ATTRIBUTE_NAME")
|
|
||||||
.displayName("Hash Attribute Name")
|
|
||||||
.description("The name of the FlowFile Attribute into which the Hash Value should be written. " +
|
|
||||||
"If the value already exists, it will be overwritten")
|
|
||||||
.required(true)
|
|
||||||
.addValidator(StandardValidators.NON_EMPTY_VALIDATOR)
|
|
||||||
.defaultValue("fuzzyhash.value")
|
|
||||||
.build();
|
|
||||||
|
|
||||||
public static final PropertyDescriptor HASH_ALGORITHM = new PropertyDescriptor.Builder()
|
public static final PropertyDescriptor HASH_ALGORITHM = new PropertyDescriptor.Builder()
|
||||||
.name("HASH_ALGORITHM")
|
.name("HASH_ALGORITHM")
|
||||||
|
@ -109,12 +85,12 @@ public class FuzzyHashContent extends AbstractProcessor {
|
||||||
.build();
|
.build();
|
||||||
|
|
||||||
public static final Relationship REL_SUCCESS = new Relationship.Builder()
|
public static final Relationship REL_SUCCESS = new Relationship.Builder()
|
||||||
.name("Success")
|
.name("success")
|
||||||
.description("Any FlowFile that is successfully hashed will be sent to this Relationship.")
|
.description("Any FlowFile that is successfully hashed will be sent to this Relationship.")
|
||||||
.build();
|
.build();
|
||||||
|
|
||||||
public static final Relationship REL_FAILURE = new Relationship.Builder()
|
public static final Relationship REL_FAILURE = new Relationship.Builder()
|
||||||
.name("Failure")
|
.name("failure")
|
||||||
.description("Any FlowFile that is successfully hashed will be sent to this Relationship.")
|
.description("Any FlowFile that is successfully hashed will be sent to this Relationship.")
|
||||||
.build();
|
.build();
|
||||||
|
|
||||||
|
@ -157,18 +133,17 @@ public class FuzzyHashContent extends AbstractProcessor {
|
||||||
}
|
}
|
||||||
|
|
||||||
final ComponentLog logger = getLogger();
|
final ComponentLog logger = getLogger();
|
||||||
|
String algorithm = context.getProperty(HASH_ALGORITHM).getValue();
|
||||||
|
|
||||||
// Check if content matches minimum length requirement
|
// Check if content matches minimum length requirement
|
||||||
if (context.getProperty(HASH_ALGORITHM).equals(allowableValueTLSH) && flowFile.getSize() < 512 ) {
|
|
||||||
logger.info("The content of {} is smaller than the minimum required by TLSH, routing to failure", new Object[]{flowFile});
|
if (checkMinimumAlgorithmRequirements(algorithm, flowFile) == false) {
|
||||||
|
logger.error("The content of '{}' is smaller than the minimum required by {}, routing to failure",
|
||||||
|
new Object[]{flowFile, algorithm});
|
||||||
session.transfer(flowFile, REL_FAILURE);
|
session.transfer(flowFile, REL_FAILURE);
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
final AtomicReference<String> hashValueHolder = new AtomicReference<>(null);
|
final AtomicReference<String> hashValueHolder = new AtomicReference<>(null);
|
||||||
|
|
||||||
try {
|
try {
|
||||||
|
@ -178,13 +153,12 @@ public class FuzzyHashContent extends AbstractProcessor {
|
||||||
try (ByteArrayOutputStream holder = new ByteArrayOutputStream()) {
|
try (ByteArrayOutputStream holder = new ByteArrayOutputStream()) {
|
||||||
StreamUtils.copy(in,holder);
|
StreamUtils.copy(in,holder);
|
||||||
|
|
||||||
if (context.getProperty(HASH_ALGORITHM).getValue().equals(allowableValueSSDEEP.getValue())) {
|
String hashValue = generateHash(algorithm, holder.toString());
|
||||||
hashValueHolder.set(new SpamSum().HashString(holder.toString()));
|
if (StringUtils.isBlank(hashValue) == false) {
|
||||||
|
hashValueHolder.set(hashValue);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (context.getProperty(HASH_ALGORITHM).getValue().equals(allowableValueTLSH.getValue())) {
|
|
||||||
hashValueHolder.set(new TLSH(holder.toString()).hash());
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
});
|
});
|
||||||
|
@ -199,4 +173,5 @@ public class FuzzyHashContent extends AbstractProcessor {
|
||||||
session.transfer(flowFile, REL_FAILURE);
|
session.transfer(flowFile, REL_FAILURE);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -0,0 +1,35 @@
|
||||||
|
/*
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||||
|
* contributor license agreements. See the NOTICE file distributed with
|
||||||
|
* this work for additional information regarding copyright ownership.
|
||||||
|
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||||
|
* (the "License"); you may not use this file except in compliance with
|
||||||
|
* the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
package org.apache.nifi.processors.cybersecurity.matchers;
|
||||||
|
|
||||||
|
import java.io.BufferedReader;
|
||||||
|
import java.io.IOException;
|
||||||
|
|
||||||
|
public interface FuzzyHashMatcher {
|
||||||
|
|
||||||
|
BufferedReader getReader(String source) throws IOException;
|
||||||
|
|
||||||
|
boolean matchExceedsThreshold(double similarity, double matchThreshold) ;
|
||||||
|
|
||||||
|
double getSimilarity(String inputHash, String existingHash);
|
||||||
|
|
||||||
|
boolean isValidHash(String inputHash);
|
||||||
|
|
||||||
|
String getHash(String line);
|
||||||
|
|
||||||
|
String getMatch(String line);
|
||||||
|
}
|
|
@ -0,0 +1,126 @@
|
||||||
|
/*
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||||
|
* contributor license agreements. See the NOTICE file distributed with
|
||||||
|
* this work for additional information regarding copyright ownership.
|
||||||
|
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||||
|
* (the "License"); you may not use this file except in compliance with
|
||||||
|
* the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
package org.apache.nifi.processors.cybersecurity.matchers;
|
||||||
|
|
||||||
|
import info.debatty.java.spamsum.SpamSum;
|
||||||
|
import org.apache.nifi.logging.ComponentLog;
|
||||||
|
|
||||||
|
import java.io.BufferedReader;
|
||||||
|
import java.io.File;
|
||||||
|
import java.io.FileInputStream;
|
||||||
|
import java.io.IOException;
|
||||||
|
import java.io.InputStreamReader;
|
||||||
|
import java.util.Scanner;
|
||||||
|
|
||||||
|
public class SSDeepHashMatcher implements FuzzyHashMatcher {
|
||||||
|
|
||||||
|
ComponentLog logger;
|
||||||
|
|
||||||
|
public SSDeepHashMatcher() {
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
public SSDeepHashMatcher(ComponentLog logger) {
|
||||||
|
this.logger = logger;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public BufferedReader getReader(String source) throws IOException {
|
||||||
|
|
||||||
|
File file = new File(source);
|
||||||
|
|
||||||
|
FileInputStream fileInputStream = new FileInputStream(file);
|
||||||
|
BufferedReader reader = new BufferedReader(new InputStreamReader(fileInputStream));
|
||||||
|
|
||||||
|
// If SSdeep skip the first line (as the usual format used by other tools add a header line
|
||||||
|
// to a file list
|
||||||
|
reader.readLine();
|
||||||
|
|
||||||
|
return reader;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public boolean matchExceedsThreshold(double similarity, double matchThreshold) {
|
||||||
|
if (similarity >= matchThreshold) {
|
||||||
|
return true;
|
||||||
|
} else {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public double getSimilarity(String inputHash, String existingHash) {
|
||||||
|
String[] hashToCompare = existingHash.split(",", 2);
|
||||||
|
if (hashToCompare.length > 0) {
|
||||||
|
return new SpamSum().match(inputHash, hashToCompare[0]);
|
||||||
|
} else {
|
||||||
|
return Double.NaN;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public boolean isValidHash(String inputHash) {
|
||||||
|
// format looks like
|
||||||
|
// blocksize:hash:hash
|
||||||
|
|
||||||
|
String [] fields = inputHash.split(":", 3);
|
||||||
|
|
||||||
|
if (fields.length == 3) {
|
||||||
|
Scanner sc = new Scanner(fields[0]);
|
||||||
|
|
||||||
|
boolean isNumber = sc.hasNextInt();
|
||||||
|
if (isNumber == false && logger != null) {
|
||||||
|
if (logger.isDebugEnabled()) {
|
||||||
|
logger.debug("Field should be numeric but got '{}'. Will tell processor to ignore.",
|
||||||
|
new Object[] {fields[0]});
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
boolean hashOneIsNotEmpty = !fields[1].isEmpty();
|
||||||
|
boolean hashTwoIsNotEmpty = !fields[2].isEmpty();
|
||||||
|
|
||||||
|
if (isNumber && hashOneIsNotEmpty && hashTwoIsNotEmpty) {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public String getHash(String line) {
|
||||||
|
if (isValidHash(line)) {
|
||||||
|
return line.split(",", 2)[0];
|
||||||
|
} else {
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public String getMatch(String line) {
|
||||||
|
if (isValidHash(line)) {
|
||||||
|
String[] match = line.split(",", 2);
|
||||||
|
// Because the file can be malformed and contain an unammed match,
|
||||||
|
// if match has a filename...
|
||||||
|
if (match.length == 2) {
|
||||||
|
// Return it.
|
||||||
|
return match[1];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
// Or return null
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,117 @@
|
||||||
|
/*
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||||
|
* contributor license agreements. See the NOTICE file distributed with
|
||||||
|
* this work for additional information regarding copyright ownership.
|
||||||
|
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||||
|
* (the "License"); you may not use this file except in compliance with
|
||||||
|
* the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
package org.apache.nifi.processors.cybersecurity.matchers;
|
||||||
|
|
||||||
|
|
||||||
|
import com.idealista.tlsh.digests.Digest;
|
||||||
|
import com.idealista.tlsh.digests.DigestBuilder;
|
||||||
|
import org.apache.nifi.logging.ComponentLog;
|
||||||
|
|
||||||
|
import java.io.BufferedReader;
|
||||||
|
import java.io.File;
|
||||||
|
import java.io.FileInputStream;
|
||||||
|
import java.io.IOException;
|
||||||
|
import java.io.InputStreamReader;
|
||||||
|
|
||||||
|
import static org.apache.nifi.processors.cybersecurity.CompareFuzzyHash.HASH_LIST_FILE;
|
||||||
|
|
||||||
|
public class TLSHHashMatcher implements FuzzyHashMatcher {
|
||||||
|
|
||||||
|
ComponentLog logger;
|
||||||
|
|
||||||
|
public TLSHHashMatcher(ComponentLog logger) {
|
||||||
|
this.logger = logger;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public BufferedReader getReader(String source) throws IOException {
|
||||||
|
|
||||||
|
File file = new File(source);
|
||||||
|
|
||||||
|
FileInputStream fileInputStream = new FileInputStream(file);
|
||||||
|
BufferedReader reader = new BufferedReader(new InputStreamReader(fileInputStream));
|
||||||
|
|
||||||
|
return reader;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public boolean matchExceedsThreshold(double similarity, double matchThreshold) {
|
||||||
|
if (similarity <= matchThreshold) {
|
||||||
|
return true;
|
||||||
|
} else {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public double getSimilarity(String inputHash, String existingHash) {
|
||||||
|
String[] hashToCompare = existingHash.split("\t", 2);
|
||||||
|
// This will return null in case it fails validation
|
||||||
|
if (isValidHash(inputHash) && isValidHash(hashToCompare[0])) {
|
||||||
|
Digest inputDigest = new DigestBuilder().withHash(inputHash).build();
|
||||||
|
Digest existingHashDigest = new DigestBuilder().withHash(hashToCompare[0]).build();
|
||||||
|
|
||||||
|
return inputDigest.calculateDifference(existingHashDigest, true);
|
||||||
|
} else {
|
||||||
|
return Double.NaN;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public boolean isValidHash(String stringFromHashList) {
|
||||||
|
String[] hashToCompare = stringFromHashList.split("\t", 2);
|
||||||
|
// This will return null in case it fails validation
|
||||||
|
if (hashToCompare.length > 0) {
|
||||||
|
// Because DigestBuilder raises all sort of exceptions, so in order to keep the onTrigger loop a
|
||||||
|
// bit cleaner, we capture them here and return NaN to the loop above, otherwise simply return the
|
||||||
|
// similarity score.
|
||||||
|
try {
|
||||||
|
Digest digest = new DigestBuilder().withHash(hashToCompare[0]).build();
|
||||||
|
return true;
|
||||||
|
} catch (ArrayIndexOutOfBoundsException | StringIndexOutOfBoundsException | NumberFormatException e) {
|
||||||
|
logger.error("Got {} while processing the string '{}'. This usually means the file " +
|
||||||
|
"defined by '{}' property contains invalid entries.",
|
||||||
|
new Object[]{e.getCause(), hashToCompare[0], HASH_LIST_FILE.getDisplayName()});
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public String getHash(String line) {
|
||||||
|
if (isValidHash(line)) {
|
||||||
|
return line.split("\t", 2)[0];
|
||||||
|
} else {
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public String getMatch(String line) {
|
||||||
|
if (isValidHash(line)) {
|
||||||
|
String[] match = line.split("\t", 2);
|
||||||
|
// Because the file can be malformed and contain an unammed match,
|
||||||
|
// if match has a filename...
|
||||||
|
if (match.length == 2) {
|
||||||
|
// Return it.
|
||||||
|
return match[1];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
// Or return null
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
}
|
|
@ -13,3 +13,4 @@
|
||||||
# See the License for the specific language governing permissions and
|
# See the License for the specific language governing permissions and
|
||||||
# limitations under the License.
|
# limitations under the License.
|
||||||
org.apache.nifi.processors.cybersecurity.FuzzyHashContent
|
org.apache.nifi.processors.cybersecurity.FuzzyHashContent
|
||||||
|
org.apache.nifi.processors.cybersecurity.CompareFuzzyHash
|
|
@ -0,0 +1,45 @@
|
||||||
|
<!DOCTYPE html>
|
||||||
|
<html lang="en">
|
||||||
|
<!--
|
||||||
|
Licensed to the Apache Software Foundation (ASF) under one or more
|
||||||
|
contributor license agreements. See the NOTICE file distributed with
|
||||||
|
this work for additional information regarding copyright ownership.
|
||||||
|
The ASF licenses this file to You under the Apache License, Version 2.0
|
||||||
|
(the "License"); you may not use this file except in compliance with
|
||||||
|
the License. You may obtain a copy of the License at
|
||||||
|
http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
Unless required by applicable law or agreed to in writing, software
|
||||||
|
distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
See the License for the specific language governing permissions and
|
||||||
|
limitations under the License.
|
||||||
|
-->
|
||||||
|
<head>
|
||||||
|
<meta charset="utf-8" />
|
||||||
|
<title>CompareFuzzyHash</title>
|
||||||
|
<link rel="stylesheet" href="/nifi-docs/css/component-usage.css"
|
||||||
|
type="text/css" />
|
||||||
|
</head>
|
||||||
|
|
||||||
|
<body>
|
||||||
|
<!-- Processor Documentation ================================================== -->
|
||||||
|
<h2>Description:</h2>
|
||||||
|
<p>This Processor compares a an attribute containing a Fuzzy Hash (TLSH or SSDeep) value and compares it against a list
|
||||||
|
of hashes of the same family (i.e. TLSH is compared with a list of TLSH hashes), routing them to match or non-match
|
||||||
|
depending on a user configured threshold for similarity.
|
||||||
|
</p>
|
||||||
|
|
||||||
|
<p>It is important to note that:</p>
|
||||||
|
|
||||||
|
<ul>
|
||||||
|
<li>TLSH similarity increases as product of its comparison function decreases (i.e. 0 indicates nearly identical files)</li>
|
||||||
|
<li>SSDeep similarity directly relates to the product of its comparison function (e.g. 99 indicates nearly identical files</li>
|
||||||
|
</ul>
|
||||||
|
<p>Based on the above, this processor when referring to "exceed the score" may be referring to:
|
||||||
|
|
||||||
|
<ul>
|
||||||
|
<li>a value equal or lower than the configured threshold (in case of TLSH)</li>
|
||||||
|
<li>a value equal or higher than the configured threshold (in case of SSDeep)</li>
|
||||||
|
</ul>
|
||||||
|
</body>
|
||||||
|
</html>
|
|
@ -0,0 +1,380 @@
|
||||||
|
/*
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||||
|
* contributor license agreements. See the NOTICE file distributed with
|
||||||
|
* this work for additional information regarding copyright ownership.
|
||||||
|
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||||
|
* (the "License"); you may not use this file except in compliance with
|
||||||
|
* the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
package org.apache.nifi.processors.cybersecurity;
|
||||||
|
|
||||||
|
|
||||||
|
import org.apache.nifi.processors.cybersecurity.matchers.FuzzyHashMatcher;
|
||||||
|
import org.apache.nifi.processors.cybersecurity.matchers.SSDeepHashMatcher;
|
||||||
|
import org.apache.nifi.util.MockFlowFile;
|
||||||
|
import org.apache.nifi.util.TestRunner;
|
||||||
|
import org.apache.nifi.util.TestRunners;
|
||||||
|
import org.junit.After;
|
||||||
|
import org.junit.Assert;
|
||||||
|
import org.junit.Test;
|
||||||
|
|
||||||
|
import java.util.Arrays;
|
||||||
|
import java.util.HashMap;
|
||||||
|
import java.util.List;
|
||||||
|
import java.util.Map;
|
||||||
|
|
||||||
|
|
||||||
|
public class TestCompareFuzzyHash {
|
||||||
|
String ssdeepInput = "48:c1xs8Z/m6H0eRH31S8p8bHENANkPrNy4tkPytwPyh2jTytxPythPytNdPytDgYyF:OuO/mg3HFSRHEb44RNMi6uHU2hcq3";
|
||||||
|
String tlshInput = "EB519EA4A8F95171A2A409C1DEEB9872AF55C137E00A5289F1CCD0CE4F6CCD784BB4B7";
|
||||||
|
|
||||||
|
final CompareFuzzyHash proc = new CompareFuzzyHash();
|
||||||
|
final private TestRunner runner = TestRunners.newTestRunner(proc);
|
||||||
|
|
||||||
|
@After
|
||||||
|
public void stop() {
|
||||||
|
runner.shutdown();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testSsdeepCompareFuzzyHash() {
|
||||||
|
double matchingSimilarity = 80;
|
||||||
|
runner.setProperty(CompareFuzzyHash.HASH_ALGORITHM, CompareFuzzyHash.allowableValueSSDEEP.getValue());
|
||||||
|
runner.setProperty(CompareFuzzyHash.ATTRIBUTE_NAME, "fuzzyhash.value");
|
||||||
|
runner.setProperty(CompareFuzzyHash.HASH_LIST_FILE, "src/test/resources/ssdeep.list");
|
||||||
|
runner.setProperty(CompareFuzzyHash.MATCH_THRESHOLD, String.valueOf(matchingSimilarity));
|
||||||
|
runner.setProperty(CompareFuzzyHash.MATCHING_MODE, CompareFuzzyHash.singleMatch.getValue());
|
||||||
|
|
||||||
|
Map<String, String> attributes = new HashMap<>();
|
||||||
|
attributes.put("fuzzyhash.value", ssdeepInput);
|
||||||
|
|
||||||
|
runner.enqueue("bogus".getBytes(), attributes);
|
||||||
|
runner.run();
|
||||||
|
|
||||||
|
runner.assertQueueEmpty();
|
||||||
|
runner.assertAllFlowFilesTransferred(CompareFuzzyHash.REL_FOUND, 1);
|
||||||
|
|
||||||
|
final MockFlowFile outFile = runner.getFlowFilesForRelationship(CompareFuzzyHash.REL_FOUND).get(0);
|
||||||
|
|
||||||
|
|
||||||
|
outFile.assertAttributeEquals(
|
||||||
|
"fuzzyhash.value.0.match",
|
||||||
|
"\"nifi/nifi-nar-bundles/nifi-beats-bundle/nifi-beats-processors/pom.xml\""
|
||||||
|
);
|
||||||
|
double similarity = Double.valueOf(outFile.getAttribute("fuzzyhash.value.0.similarity"));
|
||||||
|
Assert.assertTrue(similarity >= matchingSimilarity);
|
||||||
|
|
||||||
|
outFile.assertAttributeNotExists("fuzzyhash.value.1.match");
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testSsdeepCompareFuzzyHashMultipleMatches() {
|
||||||
|
double matchingSimilarity = 80;
|
||||||
|
runner.setProperty(CompareFuzzyHash.HASH_ALGORITHM, CompareFuzzyHash.allowableValueSSDEEP.getValue());
|
||||||
|
runner.setProperty(CompareFuzzyHash.ATTRIBUTE_NAME, "fuzzyhash.value");
|
||||||
|
runner.setProperty(CompareFuzzyHash.HASH_LIST_FILE, "src/test/resources/ssdeep.list");
|
||||||
|
runner.setProperty(CompareFuzzyHash.MATCH_THRESHOLD, String.valueOf(matchingSimilarity));
|
||||||
|
runner.setProperty(CompareFuzzyHash.MATCHING_MODE, CompareFuzzyHash.multiMatch.getValue());
|
||||||
|
|
||||||
|
Map<String, String> attributes = new HashMap<>();
|
||||||
|
attributes.put("fuzzyhash.value", ssdeepInput );
|
||||||
|
|
||||||
|
runner.enqueue("bogus".getBytes(), attributes);
|
||||||
|
runner.run();
|
||||||
|
|
||||||
|
runner.assertQueueEmpty();
|
||||||
|
runner.assertAllFlowFilesTransferred(CompareFuzzyHash.REL_FOUND, 1);
|
||||||
|
|
||||||
|
final MockFlowFile outFile = runner.getFlowFilesForRelationship(CompareFuzzyHash.REL_FOUND).get(0);
|
||||||
|
|
||||||
|
|
||||||
|
outFile.assertAttributeEquals("fuzzyhash.value.0.match",
|
||||||
|
"\"nifi/nifi-nar-bundles/nifi-lumberjack-bundle/nifi-lumberjack-processors/pom.xml\""
|
||||||
|
);
|
||||||
|
|
||||||
|
double similarity = Double.valueOf(outFile.getAttribute("fuzzyhash.value.0.similarity"));
|
||||||
|
Assert.assertTrue(similarity >= matchingSimilarity);
|
||||||
|
|
||||||
|
outFile.assertAttributeEquals("fuzzyhash.value.1.match",
|
||||||
|
"\"nifi/nifi-nar-bundles/nifi-beats-bundle/nifi-beats-processors/pom.xml\""
|
||||||
|
);
|
||||||
|
similarity = Double.valueOf(outFile.getAttribute("fuzzyhash.value.1.similarity"));
|
||||||
|
Assert.assertTrue(similarity >= matchingSimilarity);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testSsdeepCompareFuzzyHashWithBlankHashList() {
|
||||||
|
double matchingSimilarity = 80;
|
||||||
|
runner.setProperty(CompareFuzzyHash.HASH_ALGORITHM, CompareFuzzyHash.allowableValueSSDEEP.getValue());
|
||||||
|
runner.setProperty(CompareFuzzyHash.ATTRIBUTE_NAME, "fuzzyhash.value");
|
||||||
|
runner.setProperty(CompareFuzzyHash.HASH_LIST_FILE, "src/test/resources/blank_ssdeep.list");
|
||||||
|
runner.setProperty(CompareFuzzyHash.MATCH_THRESHOLD, String.valueOf(matchingSimilarity));
|
||||||
|
|
||||||
|
Map<String, String> attributes = new HashMap<>();
|
||||||
|
attributes.put("fuzzyhash.value", "6:hERjIfhRrlB63J0FDw1NBQmEH68xwMSELN:hZrlB62IwMS");
|
||||||
|
|
||||||
|
runner.enqueue("bogus".getBytes(), attributes);
|
||||||
|
runner.run();
|
||||||
|
|
||||||
|
runner.assertQueueEmpty();
|
||||||
|
runner.assertAllFlowFilesTransferred(CompareFuzzyHash.REL_NOT_FOUND, 1);
|
||||||
|
|
||||||
|
final MockFlowFile outFile = runner.getFlowFilesForRelationship(CompareFuzzyHash.REL_NOT_FOUND).get(0);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testSsdeepCompareFuzzyHashWithInvalidHashList() {
|
||||||
|
// This is different from "BlankHashList series of tests in that the file lacks headers and as such is totally
|
||||||
|
// invalid
|
||||||
|
double matchingSimilarity = 80;
|
||||||
|
runner.setProperty(CompareFuzzyHash.HASH_ALGORITHM, CompareFuzzyHash.allowableValueSSDEEP.getValue());
|
||||||
|
runner.setProperty(CompareFuzzyHash.ATTRIBUTE_NAME, "fuzzyhash.value");
|
||||||
|
runner.setProperty(CompareFuzzyHash.HASH_LIST_FILE, "src/test/resources/empty.list");
|
||||||
|
runner.setProperty(CompareFuzzyHash.MATCH_THRESHOLD, String.valueOf(matchingSimilarity));
|
||||||
|
|
||||||
|
Map<String, String> attributes = new HashMap<>();
|
||||||
|
attributes.put("fuzzyhash.value", "6:hERjIfhRrlB63J0FDw1NBQmEH68xwMSELN:hZrlB62IwMS");
|
||||||
|
|
||||||
|
runner.enqueue("bogus".getBytes(), attributes);
|
||||||
|
runner.run();
|
||||||
|
|
||||||
|
runner.assertQueueEmpty();
|
||||||
|
runner.assertAllFlowFilesTransferred(CompareFuzzyHash.REL_NOT_FOUND, 1);
|
||||||
|
|
||||||
|
final MockFlowFile outFile = runner.getFlowFilesForRelationship(CompareFuzzyHash.REL_NOT_FOUND).get(0);
|
||||||
|
|
||||||
|
outFile.assertAttributeNotExists("fuzzyhash.value.0.match");
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testSsdeepCompareFuzzyHashWithInvalidHash() {
|
||||||
|
double matchingSimilarity = 80;
|
||||||
|
runner.setProperty(CompareFuzzyHash.HASH_ALGORITHM, CompareFuzzyHash.allowableValueSSDEEP.getValue());
|
||||||
|
runner.setProperty(CompareFuzzyHash.ATTRIBUTE_NAME, "fuzzyhash.value");
|
||||||
|
runner.setProperty(CompareFuzzyHash.HASH_LIST_FILE, "src/test/resources/ssdeep.list");
|
||||||
|
runner.setProperty(CompareFuzzyHash.MATCH_THRESHOLD, String.valueOf(matchingSimilarity));
|
||||||
|
runner.setProperty(CompareFuzzyHash.MATCHING_MODE, CompareFuzzyHash.singleMatch.getValue());
|
||||||
|
|
||||||
|
Map<String, String> attributes = new HashMap<>();
|
||||||
|
attributes.put("fuzzyhash.value", "Test test test chocolate!");
|
||||||
|
|
||||||
|
runner.enqueue("bogus".getBytes(), attributes);
|
||||||
|
runner.run();
|
||||||
|
|
||||||
|
runner.assertQueueEmpty();
|
||||||
|
runner.assertAllFlowFilesTransferred(CompareFuzzyHash.REL_FAILURE, 1);
|
||||||
|
|
||||||
|
final MockFlowFile outFile = runner.getFlowFilesForRelationship(CompareFuzzyHash.REL_FAILURE).get(0);
|
||||||
|
|
||||||
|
outFile.assertAttributeNotExists("fuzzyhash.value.0.match");
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testTLSHCompareFuzzyHash() {
|
||||||
|
double matchingSimilarity = 200;
|
||||||
|
runner.setProperty(CompareFuzzyHash.HASH_ALGORITHM, CompareFuzzyHash.allowableValueTLSH.getValue());
|
||||||
|
runner.setProperty(CompareFuzzyHash.ATTRIBUTE_NAME, "fuzzyhash.value");
|
||||||
|
runner.setProperty(CompareFuzzyHash.HASH_LIST_FILE, "src/test/resources/tlsh.list");
|
||||||
|
runner.setProperty(CompareFuzzyHash.MATCH_THRESHOLD, String.valueOf(matchingSimilarity));
|
||||||
|
runner.setProperty(CompareFuzzyHash.MATCHING_MODE, CompareFuzzyHash.singleMatch.getValue());
|
||||||
|
|
||||||
|
Map<String, String> attributes = new HashMap<>();
|
||||||
|
attributes.put("fuzzyhash.value", tlshInput);
|
||||||
|
|
||||||
|
runner.enqueue("bogus".getBytes(), attributes);
|
||||||
|
runner.run();
|
||||||
|
|
||||||
|
runner.assertQueueEmpty();
|
||||||
|
runner.assertAllFlowFilesTransferred(CompareFuzzyHash.REL_FOUND, 1);
|
||||||
|
|
||||||
|
final MockFlowFile outFile = runner.getFlowFilesForRelationship(CompareFuzzyHash.REL_FOUND).get(0);
|
||||||
|
|
||||||
|
outFile.assertAttributeEquals(
|
||||||
|
"fuzzyhash.value.0.match",
|
||||||
|
"nifi-nar-bundles/nifi-lumberjack-bundle/nifi-lumberjack-processors/pom.xml"
|
||||||
|
);
|
||||||
|
double similarity = Double.valueOf(outFile.getAttribute("fuzzyhash.value.0.similarity"));
|
||||||
|
Assert.assertTrue(similarity <= matchingSimilarity);
|
||||||
|
|
||||||
|
outFile.assertAttributeNotExists("fuzzyhash.value.1.match");
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testTLSHCompareFuzzyHashMultipleMatches() {
|
||||||
|
double matchingSimilarity = 200;
|
||||||
|
runner.setProperty(CompareFuzzyHash.HASH_ALGORITHM, CompareFuzzyHash.allowableValueTLSH.getValue());
|
||||||
|
runner.setProperty(CompareFuzzyHash.ATTRIBUTE_NAME, "fuzzyhash.value");
|
||||||
|
runner.setProperty(CompareFuzzyHash.HASH_LIST_FILE, "src/test/resources/tlsh.list");
|
||||||
|
runner.setProperty(CompareFuzzyHash.MATCH_THRESHOLD, String.valueOf(matchingSimilarity));
|
||||||
|
runner.setProperty(CompareFuzzyHash.MATCHING_MODE, CompareFuzzyHash.multiMatch.getValue());
|
||||||
|
|
||||||
|
Map<String, String> attributes = new HashMap<>();
|
||||||
|
attributes.put("fuzzyhash.value", tlshInput);
|
||||||
|
|
||||||
|
runner.enqueue("bogus".getBytes(), attributes);
|
||||||
|
runner.run();
|
||||||
|
|
||||||
|
runner.assertQueueEmpty();
|
||||||
|
runner.assertAllFlowFilesTransferred(CompareFuzzyHash.REL_FOUND, 1);
|
||||||
|
|
||||||
|
final MockFlowFile outFile = runner.getFlowFilesForRelationship(CompareFuzzyHash.REL_FOUND).get(0);
|
||||||
|
|
||||||
|
outFile.assertAttributeEquals(
|
||||||
|
"fuzzyhash.value.0.match",
|
||||||
|
"nifi-nar-bundles/nifi-lumberjack-bundle/nifi-lumberjack-processors/pom.xml"
|
||||||
|
);
|
||||||
|
double similarity = Double.valueOf(outFile.getAttribute("fuzzyhash.value.0.similarity"));
|
||||||
|
Assert.assertTrue(similarity <= matchingSimilarity);
|
||||||
|
|
||||||
|
outFile.assertAttributeEquals(
|
||||||
|
"fuzzyhash.value.1.match",
|
||||||
|
"nifi-nar-bundles/nifi-beats-bundle/nifi-beats-processors/pom.xml"
|
||||||
|
);
|
||||||
|
similarity = Double.valueOf(outFile.getAttribute("fuzzyhash.value.1.similarity"));
|
||||||
|
Assert.assertTrue(similarity <= matchingSimilarity);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testTLSHCompareFuzzyHashWithBlankFile() {
|
||||||
|
// This is different from "BlankHashList series of tests in that the file lacks headers and as such is totally
|
||||||
|
// invalid
|
||||||
|
double matchingSimilarity = 200;
|
||||||
|
runner.setProperty(CompareFuzzyHash.HASH_ALGORITHM, CompareFuzzyHash.allowableValueTLSH.getValue());
|
||||||
|
runner.setProperty(CompareFuzzyHash.ATTRIBUTE_NAME, "fuzzyhash.value");
|
||||||
|
runner.setProperty(CompareFuzzyHash.HASH_LIST_FILE, "src/test/resources/empty.list");
|
||||||
|
runner.setProperty(CompareFuzzyHash.MATCH_THRESHOLD, String.valueOf(matchingSimilarity));
|
||||||
|
|
||||||
|
Map<String, String> attributes = new HashMap<>();
|
||||||
|
attributes.put("fuzzyhash.value", "E2F0818B7AE7173906A72221570E30979B11C0FC47B518A1E89D257E2343CEC02381ED");
|
||||||
|
|
||||||
|
runner.enqueue("bogus".getBytes(), attributes);
|
||||||
|
runner.run();
|
||||||
|
|
||||||
|
runner.assertQueueEmpty();
|
||||||
|
runner.assertAllFlowFilesTransferred(CompareFuzzyHash.REL_NOT_FOUND, 1);
|
||||||
|
|
||||||
|
final MockFlowFile outFile = runner.getFlowFilesForRelationship(CompareFuzzyHash.REL_NOT_FOUND).get(0);
|
||||||
|
|
||||||
|
outFile.assertAttributeNotExists("fuzzyhash.value.0.match");
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testTLSHCompareFuzzyHashWithEmptyHashList() {
|
||||||
|
double matchingSimilarity = 200;
|
||||||
|
runner.setProperty(CompareFuzzyHash.HASH_ALGORITHM, CompareFuzzyHash.allowableValueTLSH.getValue());
|
||||||
|
runner.setProperty(CompareFuzzyHash.ATTRIBUTE_NAME, "fuzzyhash.value");
|
||||||
|
runner.setProperty(CompareFuzzyHash.HASH_LIST_FILE, "src/test/resources/empty.list");
|
||||||
|
runner.setProperty(CompareFuzzyHash.MATCH_THRESHOLD, String.valueOf(matchingSimilarity));
|
||||||
|
|
||||||
|
Map<String, String> attributes = new HashMap<>();
|
||||||
|
attributes.put("fuzzyhash.value", "E2F0818B7AE7173906A72221570E30979B11C0FC47B518A1E89D257E2343CEC02381ED");
|
||||||
|
|
||||||
|
runner.enqueue("bogus".getBytes(), attributes);
|
||||||
|
runner.run();
|
||||||
|
|
||||||
|
runner.assertQueueEmpty();
|
||||||
|
runner.assertAllFlowFilesTransferred(CompareFuzzyHash.REL_NOT_FOUND, 1);
|
||||||
|
|
||||||
|
final MockFlowFile outFile = runner.getFlowFilesForRelationship(CompareFuzzyHash.REL_NOT_FOUND).get(0);
|
||||||
|
|
||||||
|
outFile.assertAttributeNotExists("fuzzyhash.value.0.match");
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testTLSHCompareFuzzyHashWithInvalidHash() {
|
||||||
|
double matchingSimilarity = 200;
|
||||||
|
runner.setProperty(CompareFuzzyHash.HASH_ALGORITHM, CompareFuzzyHash.allowableValueTLSH.getValue());
|
||||||
|
runner.setProperty(CompareFuzzyHash.ATTRIBUTE_NAME, "fuzzyhash.value");
|
||||||
|
runner.setProperty(CompareFuzzyHash.HASH_LIST_FILE, "src/test/resources/empty.list");
|
||||||
|
runner.setProperty(CompareFuzzyHash.MATCH_THRESHOLD, String.valueOf(matchingSimilarity));
|
||||||
|
|
||||||
|
Map<String, String> attributes = new HashMap<>();
|
||||||
|
attributes.put("fuzzyhash.value", "Test test test chocolate");
|
||||||
|
|
||||||
|
runner.enqueue("bogus".getBytes(), attributes);
|
||||||
|
runner.run();
|
||||||
|
|
||||||
|
runner.assertQueueEmpty();
|
||||||
|
runner.assertAllFlowFilesTransferred(CompareFuzzyHash.REL_FAILURE, 1);
|
||||||
|
|
||||||
|
final MockFlowFile outFile = runner.getFlowFilesForRelationship(CompareFuzzyHash.REL_FAILURE).get(0);
|
||||||
|
|
||||||
|
outFile.assertAttributeNotExists("fuzzyhash.value.0.match");
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testMissingAttribute() {
|
||||||
|
double matchingSimilarity = 200;
|
||||||
|
runner.setProperty(CompareFuzzyHash.HASH_ALGORITHM, CompareFuzzyHash.allowableValueTLSH.getValue());
|
||||||
|
runner.setProperty(CompareFuzzyHash.ATTRIBUTE_NAME, "fuzzyhash.value");
|
||||||
|
runner.setProperty(CompareFuzzyHash.HASH_LIST_FILE, "src/test/resources/empty.list");
|
||||||
|
runner.setProperty(CompareFuzzyHash.MATCH_THRESHOLD, String.valueOf(matchingSimilarity));
|
||||||
|
runner.setProperty(CompareFuzzyHash.MATCHING_MODE, CompareFuzzyHash.multiMatch.getValue());
|
||||||
|
|
||||||
|
runner.enqueue("bogus".getBytes());
|
||||||
|
runner.run();
|
||||||
|
|
||||||
|
runner.assertQueueEmpty();
|
||||||
|
runner.assertAllFlowFilesTransferred(CompareFuzzyHash.REL_FAILURE, 1);
|
||||||
|
|
||||||
|
final MockFlowFile outFile = runner.getFlowFilesForRelationship(CompareFuzzyHash.REL_FAILURE).get(0);
|
||||||
|
|
||||||
|
outFile.assertAttributeNotExists("fuzzyhash.value.0.match");
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testAttributeIsEmptyString() {
|
||||||
|
double matchingSimilarity = 200;
|
||||||
|
runner.setProperty(CompareFuzzyHash.HASH_ALGORITHM, CompareFuzzyHash.allowableValueTLSH.getValue());
|
||||||
|
runner.setProperty(CompareFuzzyHash.ATTRIBUTE_NAME, "fuzzyhash.value");
|
||||||
|
runner.setProperty(CompareFuzzyHash.HASH_LIST_FILE, "src/test/resources/empty.list");
|
||||||
|
runner.setProperty(CompareFuzzyHash.MATCH_THRESHOLD, String.valueOf(matchingSimilarity));
|
||||||
|
runner.setProperty(CompareFuzzyHash.MATCHING_MODE, CompareFuzzyHash.multiMatch.getValue());
|
||||||
|
|
||||||
|
Map<String, String> attributes = new HashMap<>();
|
||||||
|
attributes.put("fuzzyhash.value", "");
|
||||||
|
runner.enqueue("bogus".getBytes(), attributes);
|
||||||
|
|
||||||
|
runner.run();
|
||||||
|
|
||||||
|
runner.assertQueueEmpty();
|
||||||
|
runner.assertAllFlowFilesTransferred(CompareFuzzyHash.REL_FAILURE, 1);
|
||||||
|
|
||||||
|
final MockFlowFile outFile = runner.getFlowFilesForRelationship(CompareFuzzyHash.REL_FAILURE).get(0);
|
||||||
|
|
||||||
|
outFile.assertAttributeNotExists("fuzzyhash.value.0.match");
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testlooksLikeSpamSum() {
|
||||||
|
FuzzyHashMatcher matcher = new SSDeepHashMatcher();
|
||||||
|
|
||||||
|
List<String> invalidPayloads = Arrays.asList(
|
||||||
|
"4AD:c1xs8Z/m6H0eRH31S8p8bHENANkPrNy4tkPytwPyh2jTytxPythPytNdPytDgYyF:OuO/mg3HFSRHEb44RNMi6uHU2hcq3", // invalidFirstField
|
||||||
|
":c1xs8Z/m6H0eRH31S8p8bHENANkPrNy4tkPytwPyh2jTytxPythPytNdPytDgYyF:OuO/mg3HFSRHEb44RNMi6uHU2hcq3", // emptyFirstField
|
||||||
|
"48::OuO/mg3HFSRHEb44RNMi6uHU2hcq3", // emptySecondField
|
||||||
|
"48:c1xs8Z/m6H0eRH31S8p8bHENANkPrNy4tkPytwPyh2jTytxPythPytNdPytDgYyF:", // emptyThirdField
|
||||||
|
"48:c1xs8Z/m6H0eRH31S8p8bHENANkPrNy4tkPytwPyh2jTytxPythPytNdPytDgYyF", // withoutThirdField
|
||||||
|
"c1xs8Z/m6H0eRH31S8p8bHENANkPrNy4tkPytwPyh2jTytxPythPytNdPytDgYyF" // Just a simple string
|
||||||
|
);
|
||||||
|
|
||||||
|
for (String item : invalidPayloads) {
|
||||||
|
Assert.assertTrue("item '" + item + "' should have failed validation", !matcher.isValidHash(item));
|
||||||
|
}
|
||||||
|
|
||||||
|
// Now test with a valid string
|
||||||
|
Assert.assertTrue(matcher.isValidHash(ssdeepInput));
|
||||||
|
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1 @@
|
||||||
|
ssdeep,1.0--blocksize:hash:hash,filename
|
|
@ -0,0 +1,11 @@
|
||||||
|
ssdeep,1.0--blocksize:hash:hash,filename
|
||||||
|
96:KQhaGCVZGhr83h3bc0ok3892m12wzgnH5w2pw+sxNEI58:FIVkH4x73h39LH+2w+sxaD,"config.h"
|
||||||
|
96:EQOJvOl4ab3hhiNFXc4wwcweomr0cNJDBoqXjmAHKX8dEt001nfEhVIuX0dDcs:3mzpAsZpprbshfu3oujjdENdp21,"doc\README"
|
||||||
|
48:c1xs8Z/m6H0eRH31S8p8bHENRNkPSNy4tkPytwPytyYytxPythPytNdPytDgYyse:OuO/mg3HFSRHE+H4RNc6uHU2hqoMkh
|
||||||
|
96,MD9fHjsEuddrg31904l8bgx5ROg2MQZHZqpAlycowOsexbHDbk:MJwz/l2PqGqqbr2yk6pVgrwPV,"Whatever.txt-INVALID-DUE-TO-COMMA-AFTER-96"
|
||||||
|
48:c1xs8Z/m6H0eRH31S8p8bHENANkPrNy4tkPytwPyh2jTytxPythPytNdPytDgYyF:OuO/mg3HFSRHEb44RNMi6uHU2hcq3,"nifi/nifi-nar-bundles/nifi-beats-bundle/nifi-beats-processors/pom.xml"
|
||||||
|
6:hERjIfhRrlB63J0FDw1NBQmEH68xwMSELN:hZrlB62IwMS,"c:\this_is_valid_but_should_not_match"
|
||||||
|
96:MD9fHjsEuddrg31904l8bgx5ROg2MQZHZqpAlycowOsexbHDbk:MJwz/l2PqGqqbr2yk6pVgrwPV,"INSTALL"
|
||||||
|
48:c1xs8Z/m6H0eRH31S8p8bHENRNkPSNy4tkPytwPytyYytxPythPytNdPytDgYyse:OuO/mg3HFSRHE+H4RNc6uHU2hqoMkh,"nifi/nifi-nar-bundles/nifi-lumberjack-bundle/nifi-lumberjack-processors/pom.xml"
|
||||||
|
48:c1xs8Z/m6H0eRH31S8p8bHENRNkPSNy4tkPytwPytyYytxPythPytNdPytDgYyse:OuO/mg3HFSRHE+H4RNc6uHU2hqoMkh,
|
||||||
|
|
|
@ -0,0 +1,9 @@
|
||||||
|
A4518DA4A8F9517162A409C1DEEA9872AF55C137E00A62C9F0CDD0CE4F6CCD784BB4B7 nifi-nar-bundles/nifi-lumberjack-bundle/nifi-lumberjack-processors/pom.xml
|
||||||
|
THERE SEEMS TO BE SOMETHING MISSING
|
||||||
|
6FF02BEF718027B0160B4391212923ED7F1A463D563B1549B86CF62973B197AD2731F Synthetic shorter-INVALID
|
||||||
|
EB519EA4A8F95171A2A409C1DEEB9872AF55C137E00A5289F1CCD0CE4F6CCD784BB4B7
|
||||||
|
E2F0818B7AE7173906A72221570E30979B11C0FC47B518A1E89D257E2343CEC02381ED /this/is/also/valid/but/should/not/match
|
||||||
|
EB519EA4A8F95171A2A409C1DEEB9872AF55C137E00A5289F1CCD0CE4F6CCD784BB4B7 nifi-nar-bundles/nifi-beats-bundle/nifi-beats-processors/pom.xml
|
||||||
|
EB519EA4A8F95171A2A409C1DEEB9872AF55C137E00A5289F1CCD0CE4F6CCD784BB4B7
|
||||||
|
|
||||||
|
6FF02BEF718027B0160B4391212923ED7F1A463D563B1549B86CF62973B197AD2731F8 /this/is/valid/but/should/not/match
|
6
pom.xml
6
pom.xml
|
@ -1293,6 +1293,12 @@
|
||||||
<version>1.2.0-SNAPSHOT</version>
|
<version>1.2.0-SNAPSHOT</version>
|
||||||
<type>nar</type>
|
<type>nar</type>
|
||||||
</dependency>
|
</dependency>
|
||||||
|
<dependency>
|
||||||
|
<groupId>org.apache.nifi</groupId>
|
||||||
|
<artifactId>nifi-cybersecurity-nar</artifactId>
|
||||||
|
<version>1.2.0-SNAPSHOT</version>
|
||||||
|
<type>nar</type>
|
||||||
|
</dependency>
|
||||||
<dependency>
|
<dependency>
|
||||||
<groupId>org.apache.nifi</groupId>
|
<groupId>org.apache.nifi</groupId>
|
||||||
<artifactId>nifi-email-nar</artifactId>
|
<artifactId>nifi-email-nar</artifactId>
|
||||||
|
|
Loading…
Reference in New Issue