mirror of https://github.com/apache/nifi.git
NIFI-3726 - Introduces CompareFuzzyHash processor
- Abstract FuzzyhashContent to reduce a bit of code duplication NIFI-3726 - Attempt to address peer review comments NIFI-3726 - Addresses additional PR comments NIFI-3726 - Fix checkstyle violations NIFI-3726 - Fix issues with POMs NIFI-3726 - Fixes the logging level to error in case of failure due to message too short NIFI-3726 - Capture ArrayIndexOutOfBoundsException when consuming a hash source that matches but lacks a separator AND filename or matches but lacks a filename(i.e. ends with trailing separator) Signed-off-by: Matt Burgess <mattyb149@apache.org> This closes #1692
This commit is contained in:
parent
960ef9142d
commit
54d47c7f74
|
@ -359,6 +359,11 @@
|
|||
<artifactId>nifi-beats-nar</artifactId>
|
||||
<type>nar</type>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.apache.nifi</groupId>
|
||||
<artifactId>nifi-cybersecurity-nar</artifactId>
|
||||
<type>nar</type>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.apache.nifi</groupId>
|
||||
<artifactId>nifi-email-nar</artifactId>
|
||||
|
|
|
@ -44,12 +44,6 @@
|
|||
<artifactId>tlsh</artifactId>
|
||||
<version>1.0.0</version>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.apache.nifi</groupId>
|
||||
<artifactId>nifi-standard-processors</artifactId>
|
||||
<version>1.2.0-SNAPSHOT</version>
|
||||
<scope>provided</scope>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.apache.nifi</groupId>
|
||||
<artifactId>nifi-mock</artifactId>
|
||||
|
@ -65,5 +59,26 @@
|
|||
<artifactId>junit</artifactId>
|
||||
<scope>test</scope>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.apache.nifi</groupId>
|
||||
<artifactId>nifi-properties</artifactId>
|
||||
<scope>compile</scope>
|
||||
</dependency>
|
||||
</dependencies>
|
||||
<build>
|
||||
<plugins>
|
||||
<plugin>
|
||||
<groupId>org.apache.rat</groupId>
|
||||
<artifactId>apache-rat-plugin</artifactId>
|
||||
<configuration>
|
||||
<excludes combine.children="append">
|
||||
<exclude>src/test/resources/blank_ssdeep.list</exclude>
|
||||
<exclude>src/test/resources/empty.list</exclude>
|
||||
<exclude>src/test/resources/ssdeep.list</exclude>
|
||||
<exclude>src/test/resources/tlsh.list</exclude>
|
||||
</excludes>
|
||||
</configuration>
|
||||
</plugin>
|
||||
</plugins>
|
||||
</build>
|
||||
</project>
|
||||
|
|
|
@ -0,0 +1,88 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.nifi.processors.cybersecurity;
|
||||
|
||||
|
||||
import com.idealista.tlsh.TLSH;
|
||||
import info.debatty.java.spamsum.SpamSum;
|
||||
import org.apache.nifi.components.AllowableValue;
|
||||
import org.apache.nifi.components.PropertyDescriptor;
|
||||
import org.apache.nifi.flowfile.FlowFile;
|
||||
import org.apache.nifi.processor.AbstractProcessor;
|
||||
import org.apache.nifi.processor.Relationship;
|
||||
import org.apache.nifi.processor.util.StandardValidators;
|
||||
|
||||
import java.util.List;
|
||||
import java.util.Set;
|
||||
|
||||
abstract class AbstractFuzzyHashProcessor extends AbstractProcessor {
|
||||
final protected static String ssdeep = "ssdeep";
|
||||
final protected static String tlsh = "tlsh";
|
||||
|
||||
public static final AllowableValue allowableValueSSDEEP = new AllowableValue(
|
||||
ssdeep,
|
||||
ssdeep,
|
||||
"Uses ssdeep / SpamSum 'context triggered piecewise hash'.");
|
||||
public static final AllowableValue allowableValueTLSH = new AllowableValue(
|
||||
tlsh,
|
||||
tlsh,
|
||||
"Uses TLSH (Trend 'Locality Sensitive Hash'). Note: FlowFile Content must be at least 512 characters long");
|
||||
|
||||
public static final PropertyDescriptor ATTRIBUTE_NAME = new PropertyDescriptor.Builder()
|
||||
.name("ATTRIBUTE_NAME")
|
||||
.displayName("Hash Attribute Name")
|
||||
.description("The name of the FlowFile Attribute that should hold the Fuzzy Hash Value")
|
||||
.required(true)
|
||||
.addValidator(StandardValidators.NON_EMPTY_VALIDATOR)
|
||||
.defaultValue("fuzzyhash.value")
|
||||
.build();
|
||||
|
||||
public static final PropertyDescriptor HASH_ALGORITHM = new PropertyDescriptor.Builder()
|
||||
.name("HASH_ALGORITHM")
|
||||
.displayName("Hashing Algorithm")
|
||||
.description("The hashing algorithm utilised")
|
||||
.allowableValues(allowableValueSSDEEP, allowableValueTLSH)
|
||||
.required(true)
|
||||
.addValidator(StandardValidators.NON_EMPTY_VALIDATOR)
|
||||
.build();
|
||||
|
||||
|
||||
protected List<PropertyDescriptor> descriptors;
|
||||
|
||||
protected Set<Relationship> relationships;
|
||||
|
||||
protected boolean checkMinimumAlgorithmRequirements(String algorithm, FlowFile flowFile) {
|
||||
// Check if content matches minimum length requirement
|
||||
if (algorithm.equals(tlsh) && flowFile.getSize() < 512 ) {
|
||||
return false;
|
||||
} else {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
protected String generateHash(String algorithm, String content) {
|
||||
switch (algorithm) {
|
||||
case tlsh:
|
||||
return new TLSH(content).hash();
|
||||
case ssdeep:
|
||||
return new SpamSum().HashString(content);
|
||||
default:
|
||||
return null;
|
||||
}
|
||||
}
|
||||
}
|
|
@ -0,0 +1,277 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.apache.nifi.processors.cybersecurity;
|
||||
|
||||
import org.apache.nifi.annotation.behavior.EventDriven;
|
||||
import org.apache.nifi.annotation.behavior.InputRequirement;
|
||||
import org.apache.nifi.annotation.behavior.SideEffectFree;
|
||||
import org.apache.nifi.annotation.behavior.SupportsBatching;
|
||||
import org.apache.nifi.annotation.behavior.WritesAttribute;
|
||||
import org.apache.nifi.annotation.behavior.WritesAttributes;
|
||||
import org.apache.nifi.annotation.documentation.CapabilityDescription;
|
||||
import org.apache.nifi.annotation.documentation.SeeAlso;
|
||||
import org.apache.nifi.annotation.documentation.Tags;
|
||||
import org.apache.nifi.annotation.lifecycle.OnScheduled;
|
||||
import org.apache.nifi.components.AllowableValue;
|
||||
import org.apache.nifi.components.PropertyDescriptor;
|
||||
import org.apache.nifi.flowfile.FlowFile;
|
||||
import org.apache.nifi.logging.ComponentLog;
|
||||
import org.apache.nifi.processor.ProcessContext;
|
||||
import org.apache.nifi.processor.ProcessSession;
|
||||
import org.apache.nifi.processor.ProcessorInitializationContext;
|
||||
import org.apache.nifi.processor.Relationship;
|
||||
import org.apache.nifi.processor.exception.ProcessException;
|
||||
import org.apache.nifi.processor.util.StandardValidators;
|
||||
import org.apache.nifi.processors.cybersecurity.matchers.FuzzyHashMatcher;
|
||||
import org.apache.nifi.processors.cybersecurity.matchers.SSDeepHashMatcher;
|
||||
import org.apache.nifi.processors.cybersecurity.matchers.TLSHHashMatcher;
|
||||
import org.apache.nifi.util.StringUtils;
|
||||
|
||||
import java.io.BufferedReader;
|
||||
import java.io.IOException;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Collections;
|
||||
import java.util.HashSet;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.Set;
|
||||
import java.util.concurrent.ConcurrentHashMap;
|
||||
|
||||
|
||||
@EventDriven
|
||||
@SideEffectFree
|
||||
@SupportsBatching
|
||||
@InputRequirement(InputRequirement.Requirement.INPUT_REQUIRED)
|
||||
@SeeAlso({FuzzyHashContent.class})
|
||||
@Tags({"hashing", "fuzzy-hashing", "cyber-security"})
|
||||
@CapabilityDescription("Compares an attribute containing a Fuzzy Hash against a file containing a list of fuzzy hashes, " +
|
||||
"appending an attribute to the FlowFile in case of a successful match.")
|
||||
|
||||
@WritesAttributes({
|
||||
@WritesAttribute(attribute = "XXXX.N.match", description = "The match that resembles the attribute specified " +
|
||||
"by the <Hash Attribute Name> property. Note that: 'XXX' gets replaced with the <Hash Attribute Name>"),
|
||||
@WritesAttribute(attribute = "XXXX.N.similarity", description = "The similarity score between this flowfile" +
|
||||
"and its match of the same number N. Note that: 'XXX' gets replaced with the <Hash Attribute Name>")})
|
||||
|
||||
public class CompareFuzzyHash extends AbstractFuzzyHashProcessor {
|
||||
public static final AllowableValue singleMatch = new AllowableValue(
|
||||
"single",
|
||||
"single",
|
||||
"Send FlowFile to matched after the first match above threshold");
|
||||
public static final AllowableValue multiMatch = new AllowableValue(
|
||||
"multi-match",
|
||||
"multi-match",
|
||||
"Iterate full list of hashes before deciding to send FlowFile to matched or unmatched");
|
||||
|
||||
public static final PropertyDescriptor HASH_LIST_FILE = new PropertyDescriptor.Builder()
|
||||
.name("HASH_LIST_FILE")
|
||||
.displayName("Hash List source file")
|
||||
.description("Path to the file containing hashes to be validated against")
|
||||
.required(true)
|
||||
.addValidator(StandardValidators.FILE_EXISTS_VALIDATOR)
|
||||
.build();
|
||||
|
||||
// Note we add a PropertyDescriptor HASH_ALGORITHM and ATTRIBUTE_NAME from parent class
|
||||
|
||||
public static final PropertyDescriptor MATCH_THRESHOLD = new PropertyDescriptor.Builder()
|
||||
// Note that while both TLSH and SSDeep seems to return int, we treat them as double in code.
|
||||
// The rationale behind being the expectation that other algorithms thatmay return double values
|
||||
// may be added to the processor later on.
|
||||
.name("MATCH_THRESHOLD")
|
||||
.displayName("Match threshold")
|
||||
.description("The similarity score must exceed or be equal to in order for" +
|
||||
"match to be considered true. Refer to Additional Information for differences between TLSH " +
|
||||
"and SSDEEP scores and how they relate to this property.")
|
||||
.required(true)
|
||||
.addValidator(StandardValidators.NUMBER_VALIDATOR)
|
||||
.build();
|
||||
|
||||
public static final PropertyDescriptor MATCHING_MODE = new PropertyDescriptor.Builder()
|
||||
.name("MATCHING_MODE")
|
||||
.displayName("Matching mode")
|
||||
.description("Defines if the Processor should try to match as many entries as possible (" + multiMatch.getDisplayName() +
|
||||
") or if it should stop after the first match (" + singleMatch.getDisplayName() + ")")
|
||||
.required(true)
|
||||
.allowableValues(singleMatch,multiMatch)
|
||||
.defaultValue(singleMatch.getValue())
|
||||
.build();
|
||||
|
||||
public static final Relationship REL_FOUND = new Relationship.Builder()
|
||||
.name("found")
|
||||
.description("Any FlowFile that is successfully matched to an existing hash will be sent to this Relationship.")
|
||||
.build();
|
||||
|
||||
public static final Relationship REL_NOT_FOUND = new Relationship.Builder()
|
||||
.name("not-found")
|
||||
.description("Any FlowFile that cannot be matched to an existing hash will be sent to this Relationship.")
|
||||
.build();
|
||||
|
||||
public static final Relationship REL_FAILURE = new Relationship.Builder()
|
||||
.name("failure")
|
||||
.description("Any FlowFile that cannot be matched, e.g. (lacks the attribute) will be sent to this Relationship.")
|
||||
.build();
|
||||
|
||||
@Override
|
||||
protected void init(final ProcessorInitializationContext context) {
|
||||
final List<PropertyDescriptor> descriptors = new ArrayList<PropertyDescriptor>();
|
||||
descriptors.add(HASH_LIST_FILE);
|
||||
// As mentioned above, add the PropertyDescriptor HASH_ALGORITHM and ATTRIBUTE_NAME from parent class
|
||||
descriptors.add(HASH_ALGORITHM);
|
||||
descriptors.add(ATTRIBUTE_NAME);
|
||||
descriptors.add(MATCH_THRESHOLD);
|
||||
descriptors.add(MATCHING_MODE);
|
||||
this.descriptors = Collections.unmodifiableList(descriptors);
|
||||
|
||||
final Set<Relationship> relationships = new HashSet<Relationship>();
|
||||
relationships.add(REL_FOUND);
|
||||
relationships.add(REL_NOT_FOUND);
|
||||
relationships.add(REL_FAILURE);
|
||||
this.relationships = Collections.unmodifiableSet(relationships);
|
||||
}
|
||||
|
||||
@Override
|
||||
public Set<Relationship> getRelationships() {
|
||||
return this.relationships;
|
||||
}
|
||||
|
||||
@Override
|
||||
public final List<PropertyDescriptor> getSupportedPropertyDescriptors() {
|
||||
return descriptors;
|
||||
}
|
||||
|
||||
@OnScheduled
|
||||
public void onScheduled(final ProcessContext context) {
|
||||
}
|
||||
|
||||
@Override
|
||||
public void onTrigger(ProcessContext context, ProcessSession session) throws ProcessException {
|
||||
|
||||
FlowFile flowFile = session.get();
|
||||
if (flowFile == null) {
|
||||
return;
|
||||
}
|
||||
|
||||
final ComponentLog logger = getLogger();
|
||||
String algorithm = context.getProperty(HASH_ALGORITHM).getValue();
|
||||
|
||||
final String attributeName = context.getProperty(ATTRIBUTE_NAME).getValue();
|
||||
String inputHash = flowFile.getAttribute(attributeName);
|
||||
|
||||
if (inputHash == null) {
|
||||
getLogger().info("FlowFile {} lacks the required '{}' attribute, routing to failure.",
|
||||
new Object[]{flowFile, attributeName});
|
||||
session.transfer(flowFile, REL_FAILURE);
|
||||
return;
|
||||
}
|
||||
|
||||
FuzzyHashMatcher fuzzyHashMatcher = null;
|
||||
|
||||
switch (algorithm) {
|
||||
case tlsh:
|
||||
fuzzyHashMatcher = new TLSHHashMatcher(getLogger());
|
||||
break;
|
||||
case ssdeep:
|
||||
fuzzyHashMatcher = new SSDeepHashMatcher(getLogger());
|
||||
break;
|
||||
default:
|
||||
getLogger().error("Seems like the processor is configured to use unsupported algorithm '{}' ? Yielding.",
|
||||
new Object[]{algorithm});
|
||||
context.yield();
|
||||
return;
|
||||
}
|
||||
|
||||
if (fuzzyHashMatcher.isValidHash(inputHash) == false) {
|
||||
// and if that is the case we log
|
||||
logger.error("Invalid hash provided. Sending to failure");
|
||||
// and send to failure
|
||||
session.transfer(flowFile, REL_FAILURE);
|
||||
session.commit();
|
||||
return;
|
||||
}
|
||||
|
||||
double similarity = 0;
|
||||
double matchThreshold = context.getProperty(MATCH_THRESHOLD).asDouble();
|
||||
|
||||
try {
|
||||
Map<String, Double> matched = new ConcurrentHashMap<String, Double>();
|
||||
|
||||
BufferedReader reader = fuzzyHashMatcher.getReader(context.getProperty(HASH_LIST_FILE).getValue());
|
||||
|
||||
String line = null;
|
||||
|
||||
iterateFile: while ((line = reader.readLine()) != null) {
|
||||
if (line != null) {
|
||||
similarity = fuzzyHashMatcher.getSimilarity(inputHash, line);
|
||||
|
||||
if (fuzzyHashMatcher.matchExceedsThreshold(similarity, matchThreshold)) {
|
||||
String match = fuzzyHashMatcher.getMatch(line);
|
||||
// A malformed file may cause a match with no filename
|
||||
// Because this would simply look odd, we ignore such entry and log
|
||||
if (!StringUtils.isEmpty(match)) {
|
||||
matched.put(match, similarity);
|
||||
} else {
|
||||
logger.error("Found a match against a malformed entry '{}'. Please inspect the contents of" +
|
||||
"the {} file and ensure they are properly formatted",
|
||||
new Object[]{line, HASH_LIST_FILE.getDisplayName()});
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Check if single match is desired and if a match has been made
|
||||
if (context.getProperty(MATCHING_MODE).getValue() == singleMatch.getValue() && (matched.size() > 0)) {
|
||||
// and save time by breaking the outer loop
|
||||
break iterateFile;
|
||||
}
|
||||
}
|
||||
// no matter if the break was called or not, Continue processing
|
||||
// First by creating a new map to hold attributes
|
||||
Map<String, String> attributes = new ConcurrentHashMap<String, String>();
|
||||
|
||||
// Then by iterating over the hashmap of matches
|
||||
if (matched.size() > 0) {
|
||||
int x = 0;
|
||||
for (Map.Entry<String, Double> entry : matched.entrySet()) {
|
||||
// defining attributes accordingly
|
||||
attributes.put(
|
||||
attributeName + "." + x + ".match",
|
||||
entry.getKey());
|
||||
attributes.put(
|
||||
attributeName + "." + x + ".similarity",
|
||||
String.valueOf(entry.getValue()));
|
||||
x++;
|
||||
}
|
||||
// Finally, append the attributes to the flowfile and sent to match
|
||||
flowFile = session.putAllAttributes(flowFile, attributes);
|
||||
session.transfer(flowFile, REL_FOUND);
|
||||
session.commit();
|
||||
return;
|
||||
} else {
|
||||
// Otherwise send it to non-match
|
||||
session.transfer(flowFile, REL_NOT_FOUND);
|
||||
session.commit();
|
||||
return;
|
||||
}
|
||||
} catch (IOException e) {
|
||||
logger.error("Error while reading the hash input source" );
|
||||
context.yield();
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
}
|
|
@ -16,16 +16,12 @@
|
|||
*/
|
||||
package org.apache.nifi.processors.cybersecurity;
|
||||
|
||||
import com.idealista.tlsh.TLSH;
|
||||
import com.idealista.tlsh.exceptions.InsufficientComplexityException;
|
||||
import info.debatty.java.spamsum.SpamSum;
|
||||
|
||||
import org.apache.nifi.annotation.behavior.EventDriven;
|
||||
import org.apache.nifi.annotation.behavior.InputRequirement;
|
||||
import org.apache.nifi.annotation.behavior.SideEffectFree;
|
||||
import org.apache.nifi.annotation.behavior.SupportsBatching;
|
||||
import org.apache.nifi.annotation.behavior.ReadsAttribute;
|
||||
import org.apache.nifi.annotation.behavior.ReadsAttributes;
|
||||
import org.apache.nifi.annotation.behavior.WritesAttribute;
|
||||
import org.apache.nifi.annotation.behavior.WritesAttributes;
|
||||
import org.apache.nifi.annotation.documentation.CapabilityDescription;
|
||||
|
@ -33,19 +29,17 @@ import org.apache.nifi.annotation.documentation.SeeAlso;
|
|||
import org.apache.nifi.annotation.documentation.Tags;
|
||||
import org.apache.nifi.annotation.lifecycle.OnScheduled;
|
||||
|
||||
import org.apache.nifi.components.AllowableValue;
|
||||
import org.apache.nifi.components.PropertyDescriptor;
|
||||
import org.apache.nifi.flowfile.FlowFile;
|
||||
import org.apache.nifi.logging.ComponentLog;
|
||||
import org.apache.nifi.processor.exception.ProcessException;
|
||||
import org.apache.nifi.processor.AbstractProcessor;
|
||||
import org.apache.nifi.processor.ProcessContext;
|
||||
import org.apache.nifi.processor.ProcessSession;
|
||||
import org.apache.nifi.processor.ProcessorInitializationContext;
|
||||
import org.apache.nifi.processor.Relationship;
|
||||
import org.apache.nifi.processor.io.InputStreamCallback;
|
||||
import org.apache.nifi.processor.util.StandardValidators;
|
||||
import org.apache.nifi.processors.standard.HashContent;
|
||||
import org.apache.nifi.util.StringUtils;
|
||||
|
||||
import org.apache.nifi.stream.io.StreamUtils;
|
||||
|
||||
|
@ -73,31 +67,13 @@ import java.util.concurrent.atomic.AtomicReference;
|
|||
"evaluations in memory. Accordingly, it is important to consider the anticipated profile of content being " +
|
||||
"evaluated by this processor and the hardware supporting it especially when working against large files.")
|
||||
|
||||
@SeeAlso({HashContent.class})
|
||||
@ReadsAttributes({@ReadsAttribute(attribute="", description="")})
|
||||
@SeeAlso(classNames = {"org.apache.nifi.processors.standard.HashContent"}, value = {CompareFuzzyHash.class})
|
||||
@WritesAttributes({@WritesAttribute(attribute = "<Hash Attribute Name>", description = "This Processor adds an attribute whose value is the result of Hashing the "
|
||||
+ "existing FlowFile content. The name of this attribute is specified by the <Hash Attribute Name> property")})
|
||||
|
||||
public class FuzzyHashContent extends AbstractProcessor {
|
||||
public class FuzzyHashContent extends AbstractFuzzyHashProcessor {
|
||||
|
||||
public static final AllowableValue allowableValueSSDEEP = new AllowableValue(
|
||||
"ssdeep",
|
||||
"ssdeep",
|
||||
"Uses ssdeep / SpamSum 'context triggered piecewise hash'.");
|
||||
public static final AllowableValue allowableValueTLSH = new AllowableValue(
|
||||
"tlsh",
|
||||
"tlsh",
|
||||
"Uses TLSH (Trend 'Locality Sensitive Hash'). Note: FlowFile Content must be at least 512 characters long");
|
||||
|
||||
public static final PropertyDescriptor ATTRIBUTE_NAME = new PropertyDescriptor.Builder()
|
||||
.name("ATTRIBUTE_NAME")
|
||||
.displayName("Hash Attribute Name")
|
||||
.description("The name of the FlowFile Attribute into which the Hash Value should be written. " +
|
||||
"If the value already exists, it will be overwritten")
|
||||
.required(true)
|
||||
.addValidator(StandardValidators.NON_EMPTY_VALIDATOR)
|
||||
.defaultValue("fuzzyhash.value")
|
||||
.build();
|
||||
|
||||
public static final PropertyDescriptor HASH_ALGORITHM = new PropertyDescriptor.Builder()
|
||||
.name("HASH_ALGORITHM")
|
||||
|
@ -109,12 +85,12 @@ public class FuzzyHashContent extends AbstractProcessor {
|
|||
.build();
|
||||
|
||||
public static final Relationship REL_SUCCESS = new Relationship.Builder()
|
||||
.name("Success")
|
||||
.name("success")
|
||||
.description("Any FlowFile that is successfully hashed will be sent to this Relationship.")
|
||||
.build();
|
||||
|
||||
public static final Relationship REL_FAILURE = new Relationship.Builder()
|
||||
.name("Failure")
|
||||
.name("failure")
|
||||
.description("Any FlowFile that is successfully hashed will be sent to this Relationship.")
|
||||
.build();
|
||||
|
||||
|
@ -157,18 +133,17 @@ public class FuzzyHashContent extends AbstractProcessor {
|
|||
}
|
||||
|
||||
final ComponentLog logger = getLogger();
|
||||
String algorithm = context.getProperty(HASH_ALGORITHM).getValue();
|
||||
|
||||
// Check if content matches minimum length requirement
|
||||
if (context.getProperty(HASH_ALGORITHM).equals(allowableValueTLSH) && flowFile.getSize() < 512 ) {
|
||||
logger.info("The content of {} is smaller than the minimum required by TLSH, routing to failure", new Object[]{flowFile});
|
||||
|
||||
if (checkMinimumAlgorithmRequirements(algorithm, flowFile) == false) {
|
||||
logger.error("The content of '{}' is smaller than the minimum required by {}, routing to failure",
|
||||
new Object[]{flowFile, algorithm});
|
||||
session.transfer(flowFile, REL_FAILURE);
|
||||
return;
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
final AtomicReference<String> hashValueHolder = new AtomicReference<>(null);
|
||||
|
||||
try {
|
||||
|
@ -178,13 +153,12 @@ public class FuzzyHashContent extends AbstractProcessor {
|
|||
try (ByteArrayOutputStream holder = new ByteArrayOutputStream()) {
|
||||
StreamUtils.copy(in,holder);
|
||||
|
||||
if (context.getProperty(HASH_ALGORITHM).getValue().equals(allowableValueSSDEEP.getValue())) {
|
||||
hashValueHolder.set(new SpamSum().HashString(holder.toString()));
|
||||
String hashValue = generateHash(algorithm, holder.toString());
|
||||
if (StringUtils.isBlank(hashValue) == false) {
|
||||
hashValueHolder.set(hashValue);
|
||||
}
|
||||
|
||||
if (context.getProperty(HASH_ALGORITHM).getValue().equals(allowableValueTLSH.getValue())) {
|
||||
hashValueHolder.set(new TLSH(holder.toString()).hash());
|
||||
}
|
||||
|
||||
}
|
||||
}
|
||||
});
|
||||
|
@ -199,4 +173,5 @@ public class FuzzyHashContent extends AbstractProcessor {
|
|||
session.transfer(flowFile, REL_FAILURE);
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
@ -0,0 +1,35 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.nifi.processors.cybersecurity.matchers;
|
||||
|
||||
import java.io.BufferedReader;
|
||||
import java.io.IOException;
|
||||
|
||||
public interface FuzzyHashMatcher {
|
||||
|
||||
BufferedReader getReader(String source) throws IOException;
|
||||
|
||||
boolean matchExceedsThreshold(double similarity, double matchThreshold) ;
|
||||
|
||||
double getSimilarity(String inputHash, String existingHash);
|
||||
|
||||
boolean isValidHash(String inputHash);
|
||||
|
||||
String getHash(String line);
|
||||
|
||||
String getMatch(String line);
|
||||
}
|
|
@ -0,0 +1,126 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.nifi.processors.cybersecurity.matchers;
|
||||
|
||||
import info.debatty.java.spamsum.SpamSum;
|
||||
import org.apache.nifi.logging.ComponentLog;
|
||||
|
||||
import java.io.BufferedReader;
|
||||
import java.io.File;
|
||||
import java.io.FileInputStream;
|
||||
import java.io.IOException;
|
||||
import java.io.InputStreamReader;
|
||||
import java.util.Scanner;
|
||||
|
||||
public class SSDeepHashMatcher implements FuzzyHashMatcher {
|
||||
|
||||
ComponentLog logger;
|
||||
|
||||
public SSDeepHashMatcher() {
|
||||
|
||||
}
|
||||
|
||||
public SSDeepHashMatcher(ComponentLog logger) {
|
||||
this.logger = logger;
|
||||
}
|
||||
|
||||
@Override
|
||||
public BufferedReader getReader(String source) throws IOException {
|
||||
|
||||
File file = new File(source);
|
||||
|
||||
FileInputStream fileInputStream = new FileInputStream(file);
|
||||
BufferedReader reader = new BufferedReader(new InputStreamReader(fileInputStream));
|
||||
|
||||
// If SSdeep skip the first line (as the usual format used by other tools add a header line
|
||||
// to a file list
|
||||
reader.readLine();
|
||||
|
||||
return reader;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean matchExceedsThreshold(double similarity, double matchThreshold) {
|
||||
if (similarity >= matchThreshold) {
|
||||
return true;
|
||||
} else {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public double getSimilarity(String inputHash, String existingHash) {
|
||||
String[] hashToCompare = existingHash.split(",", 2);
|
||||
if (hashToCompare.length > 0) {
|
||||
return new SpamSum().match(inputHash, hashToCompare[0]);
|
||||
} else {
|
||||
return Double.NaN;
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean isValidHash(String inputHash) {
|
||||
// format looks like
|
||||
// blocksize:hash:hash
|
||||
|
||||
String [] fields = inputHash.split(":", 3);
|
||||
|
||||
if (fields.length == 3) {
|
||||
Scanner sc = new Scanner(fields[0]);
|
||||
|
||||
boolean isNumber = sc.hasNextInt();
|
||||
if (isNumber == false && logger != null) {
|
||||
if (logger.isDebugEnabled()) {
|
||||
logger.debug("Field should be numeric but got '{}'. Will tell processor to ignore.",
|
||||
new Object[] {fields[0]});
|
||||
}
|
||||
}
|
||||
|
||||
boolean hashOneIsNotEmpty = !fields[1].isEmpty();
|
||||
boolean hashTwoIsNotEmpty = !fields[2].isEmpty();
|
||||
|
||||
if (isNumber && hashOneIsNotEmpty && hashTwoIsNotEmpty) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
@Override
|
||||
public String getHash(String line) {
|
||||
if (isValidHash(line)) {
|
||||
return line.split(",", 2)[0];
|
||||
} else {
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public String getMatch(String line) {
|
||||
if (isValidHash(line)) {
|
||||
String[] match = line.split(",", 2);
|
||||
// Because the file can be malformed and contain an unammed match,
|
||||
// if match has a filename...
|
||||
if (match.length == 2) {
|
||||
// Return it.
|
||||
return match[1];
|
||||
}
|
||||
}
|
||||
// Or return null
|
||||
return null;
|
||||
}
|
||||
}
|
|
@ -0,0 +1,117 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.nifi.processors.cybersecurity.matchers;
|
||||
|
||||
|
||||
import com.idealista.tlsh.digests.Digest;
|
||||
import com.idealista.tlsh.digests.DigestBuilder;
|
||||
import org.apache.nifi.logging.ComponentLog;
|
||||
|
||||
import java.io.BufferedReader;
|
||||
import java.io.File;
|
||||
import java.io.FileInputStream;
|
||||
import java.io.IOException;
|
||||
import java.io.InputStreamReader;
|
||||
|
||||
import static org.apache.nifi.processors.cybersecurity.CompareFuzzyHash.HASH_LIST_FILE;
|
||||
|
||||
public class TLSHHashMatcher implements FuzzyHashMatcher {
|
||||
|
||||
ComponentLog logger;
|
||||
|
||||
public TLSHHashMatcher(ComponentLog logger) {
|
||||
this.logger = logger;
|
||||
}
|
||||
|
||||
@Override
|
||||
public BufferedReader getReader(String source) throws IOException {
|
||||
|
||||
File file = new File(source);
|
||||
|
||||
FileInputStream fileInputStream = new FileInputStream(file);
|
||||
BufferedReader reader = new BufferedReader(new InputStreamReader(fileInputStream));
|
||||
|
||||
return reader;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean matchExceedsThreshold(double similarity, double matchThreshold) {
|
||||
if (similarity <= matchThreshold) {
|
||||
return true;
|
||||
} else {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public double getSimilarity(String inputHash, String existingHash) {
|
||||
String[] hashToCompare = existingHash.split("\t", 2);
|
||||
// This will return null in case it fails validation
|
||||
if (isValidHash(inputHash) && isValidHash(hashToCompare[0])) {
|
||||
Digest inputDigest = new DigestBuilder().withHash(inputHash).build();
|
||||
Digest existingHashDigest = new DigestBuilder().withHash(hashToCompare[0]).build();
|
||||
|
||||
return inputDigest.calculateDifference(existingHashDigest, true);
|
||||
} else {
|
||||
return Double.NaN;
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean isValidHash(String stringFromHashList) {
|
||||
String[] hashToCompare = stringFromHashList.split("\t", 2);
|
||||
// This will return null in case it fails validation
|
||||
if (hashToCompare.length > 0) {
|
||||
// Because DigestBuilder raises all sort of exceptions, so in order to keep the onTrigger loop a
|
||||
// bit cleaner, we capture them here and return NaN to the loop above, otherwise simply return the
|
||||
// similarity score.
|
||||
try {
|
||||
Digest digest = new DigestBuilder().withHash(hashToCompare[0]).build();
|
||||
return true;
|
||||
} catch (ArrayIndexOutOfBoundsException | StringIndexOutOfBoundsException | NumberFormatException e) {
|
||||
logger.error("Got {} while processing the string '{}'. This usually means the file " +
|
||||
"defined by '{}' property contains invalid entries.",
|
||||
new Object[]{e.getCause(), hashToCompare[0], HASH_LIST_FILE.getDisplayName()});
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
@Override
|
||||
public String getHash(String line) {
|
||||
if (isValidHash(line)) {
|
||||
return line.split("\t", 2)[0];
|
||||
} else {
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public String getMatch(String line) {
|
||||
if (isValidHash(line)) {
|
||||
String[] match = line.split("\t", 2);
|
||||
// Because the file can be malformed and contain an unammed match,
|
||||
// if match has a filename...
|
||||
if (match.length == 2) {
|
||||
// Return it.
|
||||
return match[1];
|
||||
}
|
||||
}
|
||||
// Or return null
|
||||
return null;
|
||||
}
|
||||
}
|
|
@ -12,4 +12,5 @@
|
|||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
org.apache.nifi.processors.cybersecurity.FuzzyHashContent
|
||||
org.apache.nifi.processors.cybersecurity.FuzzyHashContent
|
||||
org.apache.nifi.processors.cybersecurity.CompareFuzzyHash
|
|
@ -0,0 +1,45 @@
|
|||
<!DOCTYPE html>
|
||||
<html lang="en">
|
||||
<!--
|
||||
Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
contributor license agreements. See the NOTICE file distributed with
|
||||
this work for additional information regarding copyright ownership.
|
||||
The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
(the "License"); you may not use this file except in compliance with
|
||||
the License. You may obtain a copy of the License at
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
-->
|
||||
<head>
|
||||
<meta charset="utf-8" />
|
||||
<title>CompareFuzzyHash</title>
|
||||
<link rel="stylesheet" href="/nifi-docs/css/component-usage.css"
|
||||
type="text/css" />
|
||||
</head>
|
||||
|
||||
<body>
|
||||
<!-- Processor Documentation ================================================== -->
|
||||
<h2>Description:</h2>
|
||||
<p>This Processor compares a an attribute containing a Fuzzy Hash (TLSH or SSDeep) value and compares it against a list
|
||||
of hashes of the same family (i.e. TLSH is compared with a list of TLSH hashes), routing them to match or non-match
|
||||
depending on a user configured threshold for similarity.
|
||||
</p>
|
||||
|
||||
<p>It is important to note that:</p>
|
||||
|
||||
<ul>
|
||||
<li>TLSH similarity increases as product of its comparison function decreases (i.e. 0 indicates nearly identical files)</li>
|
||||
<li>SSDeep similarity directly relates to the product of its comparison function (e.g. 99 indicates nearly identical files</li>
|
||||
</ul>
|
||||
<p>Based on the above, this processor when referring to "exceed the score" may be referring to:
|
||||
|
||||
<ul>
|
||||
<li>a value equal or lower than the configured threshold (in case of TLSH)</li>
|
||||
<li>a value equal or higher than the configured threshold (in case of SSDeep)</li>
|
||||
</ul>
|
||||
</body>
|
||||
</html>
|
|
@ -0,0 +1,380 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.nifi.processors.cybersecurity;
|
||||
|
||||
|
||||
import org.apache.nifi.processors.cybersecurity.matchers.FuzzyHashMatcher;
|
||||
import org.apache.nifi.processors.cybersecurity.matchers.SSDeepHashMatcher;
|
||||
import org.apache.nifi.util.MockFlowFile;
|
||||
import org.apache.nifi.util.TestRunner;
|
||||
import org.apache.nifi.util.TestRunners;
|
||||
import org.junit.After;
|
||||
import org.junit.Assert;
|
||||
import org.junit.Test;
|
||||
|
||||
import java.util.Arrays;
|
||||
import java.util.HashMap;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
|
||||
|
||||
public class TestCompareFuzzyHash {
|
||||
String ssdeepInput = "48:c1xs8Z/m6H0eRH31S8p8bHENANkPrNy4tkPytwPyh2jTytxPythPytNdPytDgYyF:OuO/mg3HFSRHEb44RNMi6uHU2hcq3";
|
||||
String tlshInput = "EB519EA4A8F95171A2A409C1DEEB9872AF55C137E00A5289F1CCD0CE4F6CCD784BB4B7";
|
||||
|
||||
final CompareFuzzyHash proc = new CompareFuzzyHash();
|
||||
final private TestRunner runner = TestRunners.newTestRunner(proc);
|
||||
|
||||
@After
|
||||
public void stop() {
|
||||
runner.shutdown();
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testSsdeepCompareFuzzyHash() {
|
||||
double matchingSimilarity = 80;
|
||||
runner.setProperty(CompareFuzzyHash.HASH_ALGORITHM, CompareFuzzyHash.allowableValueSSDEEP.getValue());
|
||||
runner.setProperty(CompareFuzzyHash.ATTRIBUTE_NAME, "fuzzyhash.value");
|
||||
runner.setProperty(CompareFuzzyHash.HASH_LIST_FILE, "src/test/resources/ssdeep.list");
|
||||
runner.setProperty(CompareFuzzyHash.MATCH_THRESHOLD, String.valueOf(matchingSimilarity));
|
||||
runner.setProperty(CompareFuzzyHash.MATCHING_MODE, CompareFuzzyHash.singleMatch.getValue());
|
||||
|
||||
Map<String, String> attributes = new HashMap<>();
|
||||
attributes.put("fuzzyhash.value", ssdeepInput);
|
||||
|
||||
runner.enqueue("bogus".getBytes(), attributes);
|
||||
runner.run();
|
||||
|
||||
runner.assertQueueEmpty();
|
||||
runner.assertAllFlowFilesTransferred(CompareFuzzyHash.REL_FOUND, 1);
|
||||
|
||||
final MockFlowFile outFile = runner.getFlowFilesForRelationship(CompareFuzzyHash.REL_FOUND).get(0);
|
||||
|
||||
|
||||
outFile.assertAttributeEquals(
|
||||
"fuzzyhash.value.0.match",
|
||||
"\"nifi/nifi-nar-bundles/nifi-beats-bundle/nifi-beats-processors/pom.xml\""
|
||||
);
|
||||
double similarity = Double.valueOf(outFile.getAttribute("fuzzyhash.value.0.similarity"));
|
||||
Assert.assertTrue(similarity >= matchingSimilarity);
|
||||
|
||||
outFile.assertAttributeNotExists("fuzzyhash.value.1.match");
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testSsdeepCompareFuzzyHashMultipleMatches() {
|
||||
double matchingSimilarity = 80;
|
||||
runner.setProperty(CompareFuzzyHash.HASH_ALGORITHM, CompareFuzzyHash.allowableValueSSDEEP.getValue());
|
||||
runner.setProperty(CompareFuzzyHash.ATTRIBUTE_NAME, "fuzzyhash.value");
|
||||
runner.setProperty(CompareFuzzyHash.HASH_LIST_FILE, "src/test/resources/ssdeep.list");
|
||||
runner.setProperty(CompareFuzzyHash.MATCH_THRESHOLD, String.valueOf(matchingSimilarity));
|
||||
runner.setProperty(CompareFuzzyHash.MATCHING_MODE, CompareFuzzyHash.multiMatch.getValue());
|
||||
|
||||
Map<String, String> attributes = new HashMap<>();
|
||||
attributes.put("fuzzyhash.value", ssdeepInput );
|
||||
|
||||
runner.enqueue("bogus".getBytes(), attributes);
|
||||
runner.run();
|
||||
|
||||
runner.assertQueueEmpty();
|
||||
runner.assertAllFlowFilesTransferred(CompareFuzzyHash.REL_FOUND, 1);
|
||||
|
||||
final MockFlowFile outFile = runner.getFlowFilesForRelationship(CompareFuzzyHash.REL_FOUND).get(0);
|
||||
|
||||
|
||||
outFile.assertAttributeEquals("fuzzyhash.value.0.match",
|
||||
"\"nifi/nifi-nar-bundles/nifi-lumberjack-bundle/nifi-lumberjack-processors/pom.xml\""
|
||||
);
|
||||
|
||||
double similarity = Double.valueOf(outFile.getAttribute("fuzzyhash.value.0.similarity"));
|
||||
Assert.assertTrue(similarity >= matchingSimilarity);
|
||||
|
||||
outFile.assertAttributeEquals("fuzzyhash.value.1.match",
|
||||
"\"nifi/nifi-nar-bundles/nifi-beats-bundle/nifi-beats-processors/pom.xml\""
|
||||
);
|
||||
similarity = Double.valueOf(outFile.getAttribute("fuzzyhash.value.1.similarity"));
|
||||
Assert.assertTrue(similarity >= matchingSimilarity);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testSsdeepCompareFuzzyHashWithBlankHashList() {
|
||||
double matchingSimilarity = 80;
|
||||
runner.setProperty(CompareFuzzyHash.HASH_ALGORITHM, CompareFuzzyHash.allowableValueSSDEEP.getValue());
|
||||
runner.setProperty(CompareFuzzyHash.ATTRIBUTE_NAME, "fuzzyhash.value");
|
||||
runner.setProperty(CompareFuzzyHash.HASH_LIST_FILE, "src/test/resources/blank_ssdeep.list");
|
||||
runner.setProperty(CompareFuzzyHash.MATCH_THRESHOLD, String.valueOf(matchingSimilarity));
|
||||
|
||||
Map<String, String> attributes = new HashMap<>();
|
||||
attributes.put("fuzzyhash.value", "6:hERjIfhRrlB63J0FDw1NBQmEH68xwMSELN:hZrlB62IwMS");
|
||||
|
||||
runner.enqueue("bogus".getBytes(), attributes);
|
||||
runner.run();
|
||||
|
||||
runner.assertQueueEmpty();
|
||||
runner.assertAllFlowFilesTransferred(CompareFuzzyHash.REL_NOT_FOUND, 1);
|
||||
|
||||
final MockFlowFile outFile = runner.getFlowFilesForRelationship(CompareFuzzyHash.REL_NOT_FOUND).get(0);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testSsdeepCompareFuzzyHashWithInvalidHashList() {
|
||||
// This is different from "BlankHashList series of tests in that the file lacks headers and as such is totally
|
||||
// invalid
|
||||
double matchingSimilarity = 80;
|
||||
runner.setProperty(CompareFuzzyHash.HASH_ALGORITHM, CompareFuzzyHash.allowableValueSSDEEP.getValue());
|
||||
runner.setProperty(CompareFuzzyHash.ATTRIBUTE_NAME, "fuzzyhash.value");
|
||||
runner.setProperty(CompareFuzzyHash.HASH_LIST_FILE, "src/test/resources/empty.list");
|
||||
runner.setProperty(CompareFuzzyHash.MATCH_THRESHOLD, String.valueOf(matchingSimilarity));
|
||||
|
||||
Map<String, String> attributes = new HashMap<>();
|
||||
attributes.put("fuzzyhash.value", "6:hERjIfhRrlB63J0FDw1NBQmEH68xwMSELN:hZrlB62IwMS");
|
||||
|
||||
runner.enqueue("bogus".getBytes(), attributes);
|
||||
runner.run();
|
||||
|
||||
runner.assertQueueEmpty();
|
||||
runner.assertAllFlowFilesTransferred(CompareFuzzyHash.REL_NOT_FOUND, 1);
|
||||
|
||||
final MockFlowFile outFile = runner.getFlowFilesForRelationship(CompareFuzzyHash.REL_NOT_FOUND).get(0);
|
||||
|
||||
outFile.assertAttributeNotExists("fuzzyhash.value.0.match");
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testSsdeepCompareFuzzyHashWithInvalidHash() {
|
||||
double matchingSimilarity = 80;
|
||||
runner.setProperty(CompareFuzzyHash.HASH_ALGORITHM, CompareFuzzyHash.allowableValueSSDEEP.getValue());
|
||||
runner.setProperty(CompareFuzzyHash.ATTRIBUTE_NAME, "fuzzyhash.value");
|
||||
runner.setProperty(CompareFuzzyHash.HASH_LIST_FILE, "src/test/resources/ssdeep.list");
|
||||
runner.setProperty(CompareFuzzyHash.MATCH_THRESHOLD, String.valueOf(matchingSimilarity));
|
||||
runner.setProperty(CompareFuzzyHash.MATCHING_MODE, CompareFuzzyHash.singleMatch.getValue());
|
||||
|
||||
Map<String, String> attributes = new HashMap<>();
|
||||
attributes.put("fuzzyhash.value", "Test test test chocolate!");
|
||||
|
||||
runner.enqueue("bogus".getBytes(), attributes);
|
||||
runner.run();
|
||||
|
||||
runner.assertQueueEmpty();
|
||||
runner.assertAllFlowFilesTransferred(CompareFuzzyHash.REL_FAILURE, 1);
|
||||
|
||||
final MockFlowFile outFile = runner.getFlowFilesForRelationship(CompareFuzzyHash.REL_FAILURE).get(0);
|
||||
|
||||
outFile.assertAttributeNotExists("fuzzyhash.value.0.match");
|
||||
}
|
||||
|
||||
|
||||
@Test
|
||||
public void testTLSHCompareFuzzyHash() {
|
||||
double matchingSimilarity = 200;
|
||||
runner.setProperty(CompareFuzzyHash.HASH_ALGORITHM, CompareFuzzyHash.allowableValueTLSH.getValue());
|
||||
runner.setProperty(CompareFuzzyHash.ATTRIBUTE_NAME, "fuzzyhash.value");
|
||||
runner.setProperty(CompareFuzzyHash.HASH_LIST_FILE, "src/test/resources/tlsh.list");
|
||||
runner.setProperty(CompareFuzzyHash.MATCH_THRESHOLD, String.valueOf(matchingSimilarity));
|
||||
runner.setProperty(CompareFuzzyHash.MATCHING_MODE, CompareFuzzyHash.singleMatch.getValue());
|
||||
|
||||
Map<String, String> attributes = new HashMap<>();
|
||||
attributes.put("fuzzyhash.value", tlshInput);
|
||||
|
||||
runner.enqueue("bogus".getBytes(), attributes);
|
||||
runner.run();
|
||||
|
||||
runner.assertQueueEmpty();
|
||||
runner.assertAllFlowFilesTransferred(CompareFuzzyHash.REL_FOUND, 1);
|
||||
|
||||
final MockFlowFile outFile = runner.getFlowFilesForRelationship(CompareFuzzyHash.REL_FOUND).get(0);
|
||||
|
||||
outFile.assertAttributeEquals(
|
||||
"fuzzyhash.value.0.match",
|
||||
"nifi-nar-bundles/nifi-lumberjack-bundle/nifi-lumberjack-processors/pom.xml"
|
||||
);
|
||||
double similarity = Double.valueOf(outFile.getAttribute("fuzzyhash.value.0.similarity"));
|
||||
Assert.assertTrue(similarity <= matchingSimilarity);
|
||||
|
||||
outFile.assertAttributeNotExists("fuzzyhash.value.1.match");
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testTLSHCompareFuzzyHashMultipleMatches() {
|
||||
double matchingSimilarity = 200;
|
||||
runner.setProperty(CompareFuzzyHash.HASH_ALGORITHM, CompareFuzzyHash.allowableValueTLSH.getValue());
|
||||
runner.setProperty(CompareFuzzyHash.ATTRIBUTE_NAME, "fuzzyhash.value");
|
||||
runner.setProperty(CompareFuzzyHash.HASH_LIST_FILE, "src/test/resources/tlsh.list");
|
||||
runner.setProperty(CompareFuzzyHash.MATCH_THRESHOLD, String.valueOf(matchingSimilarity));
|
||||
runner.setProperty(CompareFuzzyHash.MATCHING_MODE, CompareFuzzyHash.multiMatch.getValue());
|
||||
|
||||
Map<String, String> attributes = new HashMap<>();
|
||||
attributes.put("fuzzyhash.value", tlshInput);
|
||||
|
||||
runner.enqueue("bogus".getBytes(), attributes);
|
||||
runner.run();
|
||||
|
||||
runner.assertQueueEmpty();
|
||||
runner.assertAllFlowFilesTransferred(CompareFuzzyHash.REL_FOUND, 1);
|
||||
|
||||
final MockFlowFile outFile = runner.getFlowFilesForRelationship(CompareFuzzyHash.REL_FOUND).get(0);
|
||||
|
||||
outFile.assertAttributeEquals(
|
||||
"fuzzyhash.value.0.match",
|
||||
"nifi-nar-bundles/nifi-lumberjack-bundle/nifi-lumberjack-processors/pom.xml"
|
||||
);
|
||||
double similarity = Double.valueOf(outFile.getAttribute("fuzzyhash.value.0.similarity"));
|
||||
Assert.assertTrue(similarity <= matchingSimilarity);
|
||||
|
||||
outFile.assertAttributeEquals(
|
||||
"fuzzyhash.value.1.match",
|
||||
"nifi-nar-bundles/nifi-beats-bundle/nifi-beats-processors/pom.xml"
|
||||
);
|
||||
similarity = Double.valueOf(outFile.getAttribute("fuzzyhash.value.1.similarity"));
|
||||
Assert.assertTrue(similarity <= matchingSimilarity);
|
||||
}
|
||||
|
||||
|
||||
@Test
|
||||
public void testTLSHCompareFuzzyHashWithBlankFile() {
|
||||
// This is different from "BlankHashList series of tests in that the file lacks headers and as such is totally
|
||||
// invalid
|
||||
double matchingSimilarity = 200;
|
||||
runner.setProperty(CompareFuzzyHash.HASH_ALGORITHM, CompareFuzzyHash.allowableValueTLSH.getValue());
|
||||
runner.setProperty(CompareFuzzyHash.ATTRIBUTE_NAME, "fuzzyhash.value");
|
||||
runner.setProperty(CompareFuzzyHash.HASH_LIST_FILE, "src/test/resources/empty.list");
|
||||
runner.setProperty(CompareFuzzyHash.MATCH_THRESHOLD, String.valueOf(matchingSimilarity));
|
||||
|
||||
Map<String, String> attributes = new HashMap<>();
|
||||
attributes.put("fuzzyhash.value", "E2F0818B7AE7173906A72221570E30979B11C0FC47B518A1E89D257E2343CEC02381ED");
|
||||
|
||||
runner.enqueue("bogus".getBytes(), attributes);
|
||||
runner.run();
|
||||
|
||||
runner.assertQueueEmpty();
|
||||
runner.assertAllFlowFilesTransferred(CompareFuzzyHash.REL_NOT_FOUND, 1);
|
||||
|
||||
final MockFlowFile outFile = runner.getFlowFilesForRelationship(CompareFuzzyHash.REL_NOT_FOUND).get(0);
|
||||
|
||||
outFile.assertAttributeNotExists("fuzzyhash.value.0.match");
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testTLSHCompareFuzzyHashWithEmptyHashList() {
|
||||
double matchingSimilarity = 200;
|
||||
runner.setProperty(CompareFuzzyHash.HASH_ALGORITHM, CompareFuzzyHash.allowableValueTLSH.getValue());
|
||||
runner.setProperty(CompareFuzzyHash.ATTRIBUTE_NAME, "fuzzyhash.value");
|
||||
runner.setProperty(CompareFuzzyHash.HASH_LIST_FILE, "src/test/resources/empty.list");
|
||||
runner.setProperty(CompareFuzzyHash.MATCH_THRESHOLD, String.valueOf(matchingSimilarity));
|
||||
|
||||
Map<String, String> attributes = new HashMap<>();
|
||||
attributes.put("fuzzyhash.value", "E2F0818B7AE7173906A72221570E30979B11C0FC47B518A1E89D257E2343CEC02381ED");
|
||||
|
||||
runner.enqueue("bogus".getBytes(), attributes);
|
||||
runner.run();
|
||||
|
||||
runner.assertQueueEmpty();
|
||||
runner.assertAllFlowFilesTransferred(CompareFuzzyHash.REL_NOT_FOUND, 1);
|
||||
|
||||
final MockFlowFile outFile = runner.getFlowFilesForRelationship(CompareFuzzyHash.REL_NOT_FOUND).get(0);
|
||||
|
||||
outFile.assertAttributeNotExists("fuzzyhash.value.0.match");
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testTLSHCompareFuzzyHashWithInvalidHash() {
|
||||
double matchingSimilarity = 200;
|
||||
runner.setProperty(CompareFuzzyHash.HASH_ALGORITHM, CompareFuzzyHash.allowableValueTLSH.getValue());
|
||||
runner.setProperty(CompareFuzzyHash.ATTRIBUTE_NAME, "fuzzyhash.value");
|
||||
runner.setProperty(CompareFuzzyHash.HASH_LIST_FILE, "src/test/resources/empty.list");
|
||||
runner.setProperty(CompareFuzzyHash.MATCH_THRESHOLD, String.valueOf(matchingSimilarity));
|
||||
|
||||
Map<String, String> attributes = new HashMap<>();
|
||||
attributes.put("fuzzyhash.value", "Test test test chocolate");
|
||||
|
||||
runner.enqueue("bogus".getBytes(), attributes);
|
||||
runner.run();
|
||||
|
||||
runner.assertQueueEmpty();
|
||||
runner.assertAllFlowFilesTransferred(CompareFuzzyHash.REL_FAILURE, 1);
|
||||
|
||||
final MockFlowFile outFile = runner.getFlowFilesForRelationship(CompareFuzzyHash.REL_FAILURE).get(0);
|
||||
|
||||
outFile.assertAttributeNotExists("fuzzyhash.value.0.match");
|
||||
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testMissingAttribute() {
|
||||
double matchingSimilarity = 200;
|
||||
runner.setProperty(CompareFuzzyHash.HASH_ALGORITHM, CompareFuzzyHash.allowableValueTLSH.getValue());
|
||||
runner.setProperty(CompareFuzzyHash.ATTRIBUTE_NAME, "fuzzyhash.value");
|
||||
runner.setProperty(CompareFuzzyHash.HASH_LIST_FILE, "src/test/resources/empty.list");
|
||||
runner.setProperty(CompareFuzzyHash.MATCH_THRESHOLD, String.valueOf(matchingSimilarity));
|
||||
runner.setProperty(CompareFuzzyHash.MATCHING_MODE, CompareFuzzyHash.multiMatch.getValue());
|
||||
|
||||
runner.enqueue("bogus".getBytes());
|
||||
runner.run();
|
||||
|
||||
runner.assertQueueEmpty();
|
||||
runner.assertAllFlowFilesTransferred(CompareFuzzyHash.REL_FAILURE, 1);
|
||||
|
||||
final MockFlowFile outFile = runner.getFlowFilesForRelationship(CompareFuzzyHash.REL_FAILURE).get(0);
|
||||
|
||||
outFile.assertAttributeNotExists("fuzzyhash.value.0.match");
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testAttributeIsEmptyString() {
|
||||
double matchingSimilarity = 200;
|
||||
runner.setProperty(CompareFuzzyHash.HASH_ALGORITHM, CompareFuzzyHash.allowableValueTLSH.getValue());
|
||||
runner.setProperty(CompareFuzzyHash.ATTRIBUTE_NAME, "fuzzyhash.value");
|
||||
runner.setProperty(CompareFuzzyHash.HASH_LIST_FILE, "src/test/resources/empty.list");
|
||||
runner.setProperty(CompareFuzzyHash.MATCH_THRESHOLD, String.valueOf(matchingSimilarity));
|
||||
runner.setProperty(CompareFuzzyHash.MATCHING_MODE, CompareFuzzyHash.multiMatch.getValue());
|
||||
|
||||
Map<String, String> attributes = new HashMap<>();
|
||||
attributes.put("fuzzyhash.value", "");
|
||||
runner.enqueue("bogus".getBytes(), attributes);
|
||||
|
||||
runner.run();
|
||||
|
||||
runner.assertQueueEmpty();
|
||||
runner.assertAllFlowFilesTransferred(CompareFuzzyHash.REL_FAILURE, 1);
|
||||
|
||||
final MockFlowFile outFile = runner.getFlowFilesForRelationship(CompareFuzzyHash.REL_FAILURE).get(0);
|
||||
|
||||
outFile.assertAttributeNotExists("fuzzyhash.value.0.match");
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testlooksLikeSpamSum() {
|
||||
FuzzyHashMatcher matcher = new SSDeepHashMatcher();
|
||||
|
||||
List<String> invalidPayloads = Arrays.asList(
|
||||
"4AD:c1xs8Z/m6H0eRH31S8p8bHENANkPrNy4tkPytwPyh2jTytxPythPytNdPytDgYyF:OuO/mg3HFSRHEb44RNMi6uHU2hcq3", // invalidFirstField
|
||||
":c1xs8Z/m6H0eRH31S8p8bHENANkPrNy4tkPytwPyh2jTytxPythPytNdPytDgYyF:OuO/mg3HFSRHEb44RNMi6uHU2hcq3", // emptyFirstField
|
||||
"48::OuO/mg3HFSRHEb44RNMi6uHU2hcq3", // emptySecondField
|
||||
"48:c1xs8Z/m6H0eRH31S8p8bHENANkPrNy4tkPytwPyh2jTytxPythPytNdPytDgYyF:", // emptyThirdField
|
||||
"48:c1xs8Z/m6H0eRH31S8p8bHENANkPrNy4tkPytwPyh2jTytxPythPytNdPytDgYyF", // withoutThirdField
|
||||
"c1xs8Z/m6H0eRH31S8p8bHENANkPrNy4tkPytwPyh2jTytxPythPytNdPytDgYyF" // Just a simple string
|
||||
);
|
||||
|
||||
for (String item : invalidPayloads) {
|
||||
Assert.assertTrue("item '" + item + "' should have failed validation", !matcher.isValidHash(item));
|
||||
}
|
||||
|
||||
// Now test with a valid string
|
||||
Assert.assertTrue(matcher.isValidHash(ssdeepInput));
|
||||
|
||||
}
|
||||
}
|
|
@ -0,0 +1 @@
|
|||
ssdeep,1.0--blocksize:hash:hash,filename
|
|
@ -0,0 +1,11 @@
|
|||
ssdeep,1.0--blocksize:hash:hash,filename
|
||||
96:KQhaGCVZGhr83h3bc0ok3892m12wzgnH5w2pw+sxNEI58:FIVkH4x73h39LH+2w+sxaD,"config.h"
|
||||
96:EQOJvOl4ab3hhiNFXc4wwcweomr0cNJDBoqXjmAHKX8dEt001nfEhVIuX0dDcs:3mzpAsZpprbshfu3oujjdENdp21,"doc\README"
|
||||
48:c1xs8Z/m6H0eRH31S8p8bHENRNkPSNy4tkPytwPytyYytxPythPytNdPytDgYyse:OuO/mg3HFSRHE+H4RNc6uHU2hqoMkh
|
||||
96,MD9fHjsEuddrg31904l8bgx5ROg2MQZHZqpAlycowOsexbHDbk:MJwz/l2PqGqqbr2yk6pVgrwPV,"Whatever.txt-INVALID-DUE-TO-COMMA-AFTER-96"
|
||||
48:c1xs8Z/m6H0eRH31S8p8bHENANkPrNy4tkPytwPyh2jTytxPythPytNdPytDgYyF:OuO/mg3HFSRHEb44RNMi6uHU2hcq3,"nifi/nifi-nar-bundles/nifi-beats-bundle/nifi-beats-processors/pom.xml"
|
||||
6:hERjIfhRrlB63J0FDw1NBQmEH68xwMSELN:hZrlB62IwMS,"c:\this_is_valid_but_should_not_match"
|
||||
96:MD9fHjsEuddrg31904l8bgx5ROg2MQZHZqpAlycowOsexbHDbk:MJwz/l2PqGqqbr2yk6pVgrwPV,"INSTALL"
|
||||
48:c1xs8Z/m6H0eRH31S8p8bHENRNkPSNy4tkPytwPytyYytxPythPytNdPytDgYyse:OuO/mg3HFSRHE+H4RNc6uHU2hqoMkh,"nifi/nifi-nar-bundles/nifi-lumberjack-bundle/nifi-lumberjack-processors/pom.xml"
|
||||
48:c1xs8Z/m6H0eRH31S8p8bHENRNkPSNy4tkPytwPytyYytxPythPytNdPytDgYyse:OuO/mg3HFSRHE+H4RNc6uHU2hqoMkh,
|
||||
|
|
@ -0,0 +1,9 @@
|
|||
A4518DA4A8F9517162A409C1DEEA9872AF55C137E00A62C9F0CDD0CE4F6CCD784BB4B7 nifi-nar-bundles/nifi-lumberjack-bundle/nifi-lumberjack-processors/pom.xml
|
||||
THERE SEEMS TO BE SOMETHING MISSING
|
||||
6FF02BEF718027B0160B4391212923ED7F1A463D563B1549B86CF62973B197AD2731F Synthetic shorter-INVALID
|
||||
EB519EA4A8F95171A2A409C1DEEB9872AF55C137E00A5289F1CCD0CE4F6CCD784BB4B7
|
||||
E2F0818B7AE7173906A72221570E30979B11C0FC47B518A1E89D257E2343CEC02381ED /this/is/also/valid/but/should/not/match
|
||||
EB519EA4A8F95171A2A409C1DEEB9872AF55C137E00A5289F1CCD0CE4F6CCD784BB4B7 nifi-nar-bundles/nifi-beats-bundle/nifi-beats-processors/pom.xml
|
||||
EB519EA4A8F95171A2A409C1DEEB9872AF55C137E00A5289F1CCD0CE4F6CCD784BB4B7
|
||||
|
||||
6FF02BEF718027B0160B4391212923ED7F1A463D563B1549B86CF62973B197AD2731F8 /this/is/valid/but/should/not/match
|
6
pom.xml
6
pom.xml
|
@ -1293,6 +1293,12 @@
|
|||
<version>1.2.0-SNAPSHOT</version>
|
||||
<type>nar</type>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.apache.nifi</groupId>
|
||||
<artifactId>nifi-cybersecurity-nar</artifactId>
|
||||
<version>1.2.0-SNAPSHOT</version>
|
||||
<type>nar</type>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.apache.nifi</groupId>
|
||||
<artifactId>nifi-email-nar</artifactId>
|
||||
|
|
Loading…
Reference in New Issue