NIFI-3726 - Introduces CompareFuzzyHash processor

- Abstract FuzzyhashContent to reduce a bit of code
            duplication

NIFI-3726 - Attempt to address peer review comments

NIFI-3726 - Addresses additional PR comments

NIFI-3726 - Fix checkstyle violations

NIFI-3726 - Fix issues with POMs

NIFI-3726 - Fixes the logging level to error in case of failure due to message too short

NIFI-3726 - Capture ArrayIndexOutOfBoundsException when consuming a hash source

            that matches but lacks a separator AND filename or matches but lacks
            a filename(i.e. ends with trailing separator)

Signed-off-by: Matt Burgess <mattyb149@apache.org>

This closes #1692
This commit is contained in:
Andre F de Miranda 2017-04-24 09:46:40 +10:00 committed by Matt Burgess
parent 960ef9142d
commit 54d47c7f74
16 changed files with 1138 additions and 47 deletions

View File

@ -359,6 +359,11 @@
<artifactId>nifi-beats-nar</artifactId>
<type>nar</type>
</dependency>
<dependency>
<groupId>org.apache.nifi</groupId>
<artifactId>nifi-cybersecurity-nar</artifactId>
<type>nar</type>
</dependency>
<dependency>
<groupId>org.apache.nifi</groupId>
<artifactId>nifi-email-nar</artifactId>

View File

@ -44,12 +44,6 @@
<artifactId>tlsh</artifactId>
<version>1.0.0</version>
</dependency>
<dependency>
<groupId>org.apache.nifi</groupId>
<artifactId>nifi-standard-processors</artifactId>
<version>1.2.0-SNAPSHOT</version>
<scope>provided</scope>
</dependency>
<dependency>
<groupId>org.apache.nifi</groupId>
<artifactId>nifi-mock</artifactId>
@ -65,5 +59,26 @@
<artifactId>junit</artifactId>
<scope>test</scope>
</dependency>
<dependency>
<groupId>org.apache.nifi</groupId>
<artifactId>nifi-properties</artifactId>
<scope>compile</scope>
</dependency>
</dependencies>
<build>
<plugins>
<plugin>
<groupId>org.apache.rat</groupId>
<artifactId>apache-rat-plugin</artifactId>
<configuration>
<excludes combine.children="append">
<exclude>src/test/resources/blank_ssdeep.list</exclude>
<exclude>src/test/resources/empty.list</exclude>
<exclude>src/test/resources/ssdeep.list</exclude>
<exclude>src/test/resources/tlsh.list</exclude>
</excludes>
</configuration>
</plugin>
</plugins>
</build>
</project>

View File

@ -0,0 +1,88 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.nifi.processors.cybersecurity;
import com.idealista.tlsh.TLSH;
import info.debatty.java.spamsum.SpamSum;
import org.apache.nifi.components.AllowableValue;
import org.apache.nifi.components.PropertyDescriptor;
import org.apache.nifi.flowfile.FlowFile;
import org.apache.nifi.processor.AbstractProcessor;
import org.apache.nifi.processor.Relationship;
import org.apache.nifi.processor.util.StandardValidators;
import java.util.List;
import java.util.Set;
abstract class AbstractFuzzyHashProcessor extends AbstractProcessor {
final protected static String ssdeep = "ssdeep";
final protected static String tlsh = "tlsh";
public static final AllowableValue allowableValueSSDEEP = new AllowableValue(
ssdeep,
ssdeep,
"Uses ssdeep / SpamSum 'context triggered piecewise hash'.");
public static final AllowableValue allowableValueTLSH = new AllowableValue(
tlsh,
tlsh,
"Uses TLSH (Trend 'Locality Sensitive Hash'). Note: FlowFile Content must be at least 512 characters long");
public static final PropertyDescriptor ATTRIBUTE_NAME = new PropertyDescriptor.Builder()
.name("ATTRIBUTE_NAME")
.displayName("Hash Attribute Name")
.description("The name of the FlowFile Attribute that should hold the Fuzzy Hash Value")
.required(true)
.addValidator(StandardValidators.NON_EMPTY_VALIDATOR)
.defaultValue("fuzzyhash.value")
.build();
public static final PropertyDescriptor HASH_ALGORITHM = new PropertyDescriptor.Builder()
.name("HASH_ALGORITHM")
.displayName("Hashing Algorithm")
.description("The hashing algorithm utilised")
.allowableValues(allowableValueSSDEEP, allowableValueTLSH)
.required(true)
.addValidator(StandardValidators.NON_EMPTY_VALIDATOR)
.build();
protected List<PropertyDescriptor> descriptors;
protected Set<Relationship> relationships;
protected boolean checkMinimumAlgorithmRequirements(String algorithm, FlowFile flowFile) {
// Check if content matches minimum length requirement
if (algorithm.equals(tlsh) && flowFile.getSize() < 512 ) {
return false;
} else {
return true;
}
}
protected String generateHash(String algorithm, String content) {
switch (algorithm) {
case tlsh:
return new TLSH(content).hash();
case ssdeep:
return new SpamSum().HashString(content);
default:
return null;
}
}
}

View File

@ -0,0 +1,277 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.nifi.processors.cybersecurity;
import org.apache.nifi.annotation.behavior.EventDriven;
import org.apache.nifi.annotation.behavior.InputRequirement;
import org.apache.nifi.annotation.behavior.SideEffectFree;
import org.apache.nifi.annotation.behavior.SupportsBatching;
import org.apache.nifi.annotation.behavior.WritesAttribute;
import org.apache.nifi.annotation.behavior.WritesAttributes;
import org.apache.nifi.annotation.documentation.CapabilityDescription;
import org.apache.nifi.annotation.documentation.SeeAlso;
import org.apache.nifi.annotation.documentation.Tags;
import org.apache.nifi.annotation.lifecycle.OnScheduled;
import org.apache.nifi.components.AllowableValue;
import org.apache.nifi.components.PropertyDescriptor;
import org.apache.nifi.flowfile.FlowFile;
import org.apache.nifi.logging.ComponentLog;
import org.apache.nifi.processor.ProcessContext;
import org.apache.nifi.processor.ProcessSession;
import org.apache.nifi.processor.ProcessorInitializationContext;
import org.apache.nifi.processor.Relationship;
import org.apache.nifi.processor.exception.ProcessException;
import org.apache.nifi.processor.util.StandardValidators;
import org.apache.nifi.processors.cybersecurity.matchers.FuzzyHashMatcher;
import org.apache.nifi.processors.cybersecurity.matchers.SSDeepHashMatcher;
import org.apache.nifi.processors.cybersecurity.matchers.TLSHHashMatcher;
import org.apache.nifi.util.StringUtils;
import java.io.BufferedReader;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Collections;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.concurrent.ConcurrentHashMap;
@EventDriven
@SideEffectFree
@SupportsBatching
@InputRequirement(InputRequirement.Requirement.INPUT_REQUIRED)
@SeeAlso({FuzzyHashContent.class})
@Tags({"hashing", "fuzzy-hashing", "cyber-security"})
@CapabilityDescription("Compares an attribute containing a Fuzzy Hash against a file containing a list of fuzzy hashes, " +
"appending an attribute to the FlowFile in case of a successful match.")
@WritesAttributes({
@WritesAttribute(attribute = "XXXX.N.match", description = "The match that resembles the attribute specified " +
"by the <Hash Attribute Name> property. Note that: 'XXX' gets replaced with the <Hash Attribute Name>"),
@WritesAttribute(attribute = "XXXX.N.similarity", description = "The similarity score between this flowfile" +
"and its match of the same number N. Note that: 'XXX' gets replaced with the <Hash Attribute Name>")})
public class CompareFuzzyHash extends AbstractFuzzyHashProcessor {
public static final AllowableValue singleMatch = new AllowableValue(
"single",
"single",
"Send FlowFile to matched after the first match above threshold");
public static final AllowableValue multiMatch = new AllowableValue(
"multi-match",
"multi-match",
"Iterate full list of hashes before deciding to send FlowFile to matched or unmatched");
public static final PropertyDescriptor HASH_LIST_FILE = new PropertyDescriptor.Builder()
.name("HASH_LIST_FILE")
.displayName("Hash List source file")
.description("Path to the file containing hashes to be validated against")
.required(true)
.addValidator(StandardValidators.FILE_EXISTS_VALIDATOR)
.build();
// Note we add a PropertyDescriptor HASH_ALGORITHM and ATTRIBUTE_NAME from parent class
public static final PropertyDescriptor MATCH_THRESHOLD = new PropertyDescriptor.Builder()
// Note that while both TLSH and SSDeep seems to return int, we treat them as double in code.
// The rationale behind being the expectation that other algorithms thatmay return double values
// may be added to the processor later on.
.name("MATCH_THRESHOLD")
.displayName("Match threshold")
.description("The similarity score must exceed or be equal to in order for" +
"match to be considered true. Refer to Additional Information for differences between TLSH " +
"and SSDEEP scores and how they relate to this property.")
.required(true)
.addValidator(StandardValidators.NUMBER_VALIDATOR)
.build();
public static final PropertyDescriptor MATCHING_MODE = new PropertyDescriptor.Builder()
.name("MATCHING_MODE")
.displayName("Matching mode")
.description("Defines if the Processor should try to match as many entries as possible (" + multiMatch.getDisplayName() +
") or if it should stop after the first match (" + singleMatch.getDisplayName() + ")")
.required(true)
.allowableValues(singleMatch,multiMatch)
.defaultValue(singleMatch.getValue())
.build();
public static final Relationship REL_FOUND = new Relationship.Builder()
.name("found")
.description("Any FlowFile that is successfully matched to an existing hash will be sent to this Relationship.")
.build();
public static final Relationship REL_NOT_FOUND = new Relationship.Builder()
.name("not-found")
.description("Any FlowFile that cannot be matched to an existing hash will be sent to this Relationship.")
.build();
public static final Relationship REL_FAILURE = new Relationship.Builder()
.name("failure")
.description("Any FlowFile that cannot be matched, e.g. (lacks the attribute) will be sent to this Relationship.")
.build();
@Override
protected void init(final ProcessorInitializationContext context) {
final List<PropertyDescriptor> descriptors = new ArrayList<PropertyDescriptor>();
descriptors.add(HASH_LIST_FILE);
// As mentioned above, add the PropertyDescriptor HASH_ALGORITHM and ATTRIBUTE_NAME from parent class
descriptors.add(HASH_ALGORITHM);
descriptors.add(ATTRIBUTE_NAME);
descriptors.add(MATCH_THRESHOLD);
descriptors.add(MATCHING_MODE);
this.descriptors = Collections.unmodifiableList(descriptors);
final Set<Relationship> relationships = new HashSet<Relationship>();
relationships.add(REL_FOUND);
relationships.add(REL_NOT_FOUND);
relationships.add(REL_FAILURE);
this.relationships = Collections.unmodifiableSet(relationships);
}
@Override
public Set<Relationship> getRelationships() {
return this.relationships;
}
@Override
public final List<PropertyDescriptor> getSupportedPropertyDescriptors() {
return descriptors;
}
@OnScheduled
public void onScheduled(final ProcessContext context) {
}
@Override
public void onTrigger(ProcessContext context, ProcessSession session) throws ProcessException {
FlowFile flowFile = session.get();
if (flowFile == null) {
return;
}
final ComponentLog logger = getLogger();
String algorithm = context.getProperty(HASH_ALGORITHM).getValue();
final String attributeName = context.getProperty(ATTRIBUTE_NAME).getValue();
String inputHash = flowFile.getAttribute(attributeName);
if (inputHash == null) {
getLogger().info("FlowFile {} lacks the required '{}' attribute, routing to failure.",
new Object[]{flowFile, attributeName});
session.transfer(flowFile, REL_FAILURE);
return;
}
FuzzyHashMatcher fuzzyHashMatcher = null;
switch (algorithm) {
case tlsh:
fuzzyHashMatcher = new TLSHHashMatcher(getLogger());
break;
case ssdeep:
fuzzyHashMatcher = new SSDeepHashMatcher(getLogger());
break;
default:
getLogger().error("Seems like the processor is configured to use unsupported algorithm '{}' ? Yielding.",
new Object[]{algorithm});
context.yield();
return;
}
if (fuzzyHashMatcher.isValidHash(inputHash) == false) {
// and if that is the case we log
logger.error("Invalid hash provided. Sending to failure");
// and send to failure
session.transfer(flowFile, REL_FAILURE);
session.commit();
return;
}
double similarity = 0;
double matchThreshold = context.getProperty(MATCH_THRESHOLD).asDouble();
try {
Map<String, Double> matched = new ConcurrentHashMap<String, Double>();
BufferedReader reader = fuzzyHashMatcher.getReader(context.getProperty(HASH_LIST_FILE).getValue());
String line = null;
iterateFile: while ((line = reader.readLine()) != null) {
if (line != null) {
similarity = fuzzyHashMatcher.getSimilarity(inputHash, line);
if (fuzzyHashMatcher.matchExceedsThreshold(similarity, matchThreshold)) {
String match = fuzzyHashMatcher.getMatch(line);
// A malformed file may cause a match with no filename
// Because this would simply look odd, we ignore such entry and log
if (!StringUtils.isEmpty(match)) {
matched.put(match, similarity);
} else {
logger.error("Found a match against a malformed entry '{}'. Please inspect the contents of" +
"the {} file and ensure they are properly formatted",
new Object[]{line, HASH_LIST_FILE.getDisplayName()});
}
}
}
// Check if single match is desired and if a match has been made
if (context.getProperty(MATCHING_MODE).getValue() == singleMatch.getValue() && (matched.size() > 0)) {
// and save time by breaking the outer loop
break iterateFile;
}
}
// no matter if the break was called or not, Continue processing
// First by creating a new map to hold attributes
Map<String, String> attributes = new ConcurrentHashMap<String, String>();
// Then by iterating over the hashmap of matches
if (matched.size() > 0) {
int x = 0;
for (Map.Entry<String, Double> entry : matched.entrySet()) {
// defining attributes accordingly
attributes.put(
attributeName + "." + x + ".match",
entry.getKey());
attributes.put(
attributeName + "." + x + ".similarity",
String.valueOf(entry.getValue()));
x++;
}
// Finally, append the attributes to the flowfile and sent to match
flowFile = session.putAllAttributes(flowFile, attributes);
session.transfer(flowFile, REL_FOUND);
session.commit();
return;
} else {
// Otherwise send it to non-match
session.transfer(flowFile, REL_NOT_FOUND);
session.commit();
return;
}
} catch (IOException e) {
logger.error("Error while reading the hash input source" );
context.yield();
}
}
}

View File

@ -16,16 +16,12 @@
*/
package org.apache.nifi.processors.cybersecurity;
import com.idealista.tlsh.TLSH;
import com.idealista.tlsh.exceptions.InsufficientComplexityException;
import info.debatty.java.spamsum.SpamSum;
import org.apache.nifi.annotation.behavior.EventDriven;
import org.apache.nifi.annotation.behavior.InputRequirement;
import org.apache.nifi.annotation.behavior.SideEffectFree;
import org.apache.nifi.annotation.behavior.SupportsBatching;
import org.apache.nifi.annotation.behavior.ReadsAttribute;
import org.apache.nifi.annotation.behavior.ReadsAttributes;
import org.apache.nifi.annotation.behavior.WritesAttribute;
import org.apache.nifi.annotation.behavior.WritesAttributes;
import org.apache.nifi.annotation.documentation.CapabilityDescription;
@ -33,19 +29,17 @@ import org.apache.nifi.annotation.documentation.SeeAlso;
import org.apache.nifi.annotation.documentation.Tags;
import org.apache.nifi.annotation.lifecycle.OnScheduled;
import org.apache.nifi.components.AllowableValue;
import org.apache.nifi.components.PropertyDescriptor;
import org.apache.nifi.flowfile.FlowFile;
import org.apache.nifi.logging.ComponentLog;
import org.apache.nifi.processor.exception.ProcessException;
import org.apache.nifi.processor.AbstractProcessor;
import org.apache.nifi.processor.ProcessContext;
import org.apache.nifi.processor.ProcessSession;
import org.apache.nifi.processor.ProcessorInitializationContext;
import org.apache.nifi.processor.Relationship;
import org.apache.nifi.processor.io.InputStreamCallback;
import org.apache.nifi.processor.util.StandardValidators;
import org.apache.nifi.processors.standard.HashContent;
import org.apache.nifi.util.StringUtils;
import org.apache.nifi.stream.io.StreamUtils;
@ -73,31 +67,13 @@ import java.util.concurrent.atomic.AtomicReference;
"evaluations in memory. Accordingly, it is important to consider the anticipated profile of content being " +
"evaluated by this processor and the hardware supporting it especially when working against large files.")
@SeeAlso({HashContent.class})
@ReadsAttributes({@ReadsAttribute(attribute="", description="")})
@SeeAlso(classNames = {"org.apache.nifi.processors.standard.HashContent"}, value = {CompareFuzzyHash.class})
@WritesAttributes({@WritesAttribute(attribute = "<Hash Attribute Name>", description = "This Processor adds an attribute whose value is the result of Hashing the "
+ "existing FlowFile content. The name of this attribute is specified by the <Hash Attribute Name> property")})
public class FuzzyHashContent extends AbstractProcessor {
public class FuzzyHashContent extends AbstractFuzzyHashProcessor {
public static final AllowableValue allowableValueSSDEEP = new AllowableValue(
"ssdeep",
"ssdeep",
"Uses ssdeep / SpamSum 'context triggered piecewise hash'.");
public static final AllowableValue allowableValueTLSH = new AllowableValue(
"tlsh",
"tlsh",
"Uses TLSH (Trend 'Locality Sensitive Hash'). Note: FlowFile Content must be at least 512 characters long");
public static final PropertyDescriptor ATTRIBUTE_NAME = new PropertyDescriptor.Builder()
.name("ATTRIBUTE_NAME")
.displayName("Hash Attribute Name")
.description("The name of the FlowFile Attribute into which the Hash Value should be written. " +
"If the value already exists, it will be overwritten")
.required(true)
.addValidator(StandardValidators.NON_EMPTY_VALIDATOR)
.defaultValue("fuzzyhash.value")
.build();
public static final PropertyDescriptor HASH_ALGORITHM = new PropertyDescriptor.Builder()
.name("HASH_ALGORITHM")
@ -109,12 +85,12 @@ public class FuzzyHashContent extends AbstractProcessor {
.build();
public static final Relationship REL_SUCCESS = new Relationship.Builder()
.name("Success")
.name("success")
.description("Any FlowFile that is successfully hashed will be sent to this Relationship.")
.build();
public static final Relationship REL_FAILURE = new Relationship.Builder()
.name("Failure")
.name("failure")
.description("Any FlowFile that is successfully hashed will be sent to this Relationship.")
.build();
@ -157,18 +133,17 @@ public class FuzzyHashContent extends AbstractProcessor {
}
final ComponentLog logger = getLogger();
String algorithm = context.getProperty(HASH_ALGORITHM).getValue();
// Check if content matches minimum length requirement
if (context.getProperty(HASH_ALGORITHM).equals(allowableValueTLSH) && flowFile.getSize() < 512 ) {
logger.info("The content of {} is smaller than the minimum required by TLSH, routing to failure", new Object[]{flowFile});
if (checkMinimumAlgorithmRequirements(algorithm, flowFile) == false) {
logger.error("The content of '{}' is smaller than the minimum required by {}, routing to failure",
new Object[]{flowFile, algorithm});
session.transfer(flowFile, REL_FAILURE);
return;
}
final AtomicReference<String> hashValueHolder = new AtomicReference<>(null);
try {
@ -178,13 +153,12 @@ public class FuzzyHashContent extends AbstractProcessor {
try (ByteArrayOutputStream holder = new ByteArrayOutputStream()) {
StreamUtils.copy(in,holder);
if (context.getProperty(HASH_ALGORITHM).getValue().equals(allowableValueSSDEEP.getValue())) {
hashValueHolder.set(new SpamSum().HashString(holder.toString()));
String hashValue = generateHash(algorithm, holder.toString());
if (StringUtils.isBlank(hashValue) == false) {
hashValueHolder.set(hashValue);
}
if (context.getProperty(HASH_ALGORITHM).getValue().equals(allowableValueTLSH.getValue())) {
hashValueHolder.set(new TLSH(holder.toString()).hash());
}
}
}
});
@ -199,4 +173,5 @@ public class FuzzyHashContent extends AbstractProcessor {
session.transfer(flowFile, REL_FAILURE);
}
}
}

View File

@ -0,0 +1,35 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.nifi.processors.cybersecurity.matchers;
import java.io.BufferedReader;
import java.io.IOException;
public interface FuzzyHashMatcher {
BufferedReader getReader(String source) throws IOException;
boolean matchExceedsThreshold(double similarity, double matchThreshold) ;
double getSimilarity(String inputHash, String existingHash);
boolean isValidHash(String inputHash);
String getHash(String line);
String getMatch(String line);
}

View File

@ -0,0 +1,126 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.nifi.processors.cybersecurity.matchers;
import info.debatty.java.spamsum.SpamSum;
import org.apache.nifi.logging.ComponentLog;
import java.io.BufferedReader;
import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStreamReader;
import java.util.Scanner;
public class SSDeepHashMatcher implements FuzzyHashMatcher {
ComponentLog logger;
public SSDeepHashMatcher() {
}
public SSDeepHashMatcher(ComponentLog logger) {
this.logger = logger;
}
@Override
public BufferedReader getReader(String source) throws IOException {
File file = new File(source);
FileInputStream fileInputStream = new FileInputStream(file);
BufferedReader reader = new BufferedReader(new InputStreamReader(fileInputStream));
// If SSdeep skip the first line (as the usual format used by other tools add a header line
// to a file list
reader.readLine();
return reader;
}
@Override
public boolean matchExceedsThreshold(double similarity, double matchThreshold) {
if (similarity >= matchThreshold) {
return true;
} else {
return false;
}
}
@Override
public double getSimilarity(String inputHash, String existingHash) {
String[] hashToCompare = existingHash.split(",", 2);
if (hashToCompare.length > 0) {
return new SpamSum().match(inputHash, hashToCompare[0]);
} else {
return Double.NaN;
}
}
@Override
public boolean isValidHash(String inputHash) {
// format looks like
// blocksize:hash:hash
String [] fields = inputHash.split(":", 3);
if (fields.length == 3) {
Scanner sc = new Scanner(fields[0]);
boolean isNumber = sc.hasNextInt();
if (isNumber == false && logger != null) {
if (logger.isDebugEnabled()) {
logger.debug("Field should be numeric but got '{}'. Will tell processor to ignore.",
new Object[] {fields[0]});
}
}
boolean hashOneIsNotEmpty = !fields[1].isEmpty();
boolean hashTwoIsNotEmpty = !fields[2].isEmpty();
if (isNumber && hashOneIsNotEmpty && hashTwoIsNotEmpty) {
return true;
}
}
return false;
}
@Override
public String getHash(String line) {
if (isValidHash(line)) {
return line.split(",", 2)[0];
} else {
return null;
}
}
@Override
public String getMatch(String line) {
if (isValidHash(line)) {
String[] match = line.split(",", 2);
// Because the file can be malformed and contain an unammed match,
// if match has a filename...
if (match.length == 2) {
// Return it.
return match[1];
}
}
// Or return null
return null;
}
}

View File

@ -0,0 +1,117 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.nifi.processors.cybersecurity.matchers;
import com.idealista.tlsh.digests.Digest;
import com.idealista.tlsh.digests.DigestBuilder;
import org.apache.nifi.logging.ComponentLog;
import java.io.BufferedReader;
import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStreamReader;
import static org.apache.nifi.processors.cybersecurity.CompareFuzzyHash.HASH_LIST_FILE;
public class TLSHHashMatcher implements FuzzyHashMatcher {
ComponentLog logger;
public TLSHHashMatcher(ComponentLog logger) {
this.logger = logger;
}
@Override
public BufferedReader getReader(String source) throws IOException {
File file = new File(source);
FileInputStream fileInputStream = new FileInputStream(file);
BufferedReader reader = new BufferedReader(new InputStreamReader(fileInputStream));
return reader;
}
@Override
public boolean matchExceedsThreshold(double similarity, double matchThreshold) {
if (similarity <= matchThreshold) {
return true;
} else {
return false;
}
}
@Override
public double getSimilarity(String inputHash, String existingHash) {
String[] hashToCompare = existingHash.split("\t", 2);
// This will return null in case it fails validation
if (isValidHash(inputHash) && isValidHash(hashToCompare[0])) {
Digest inputDigest = new DigestBuilder().withHash(inputHash).build();
Digest existingHashDigest = new DigestBuilder().withHash(hashToCompare[0]).build();
return inputDigest.calculateDifference(existingHashDigest, true);
} else {
return Double.NaN;
}
}
@Override
public boolean isValidHash(String stringFromHashList) {
String[] hashToCompare = stringFromHashList.split("\t", 2);
// This will return null in case it fails validation
if (hashToCompare.length > 0) {
// Because DigestBuilder raises all sort of exceptions, so in order to keep the onTrigger loop a
// bit cleaner, we capture them here and return NaN to the loop above, otherwise simply return the
// similarity score.
try {
Digest digest = new DigestBuilder().withHash(hashToCompare[0]).build();
return true;
} catch (ArrayIndexOutOfBoundsException | StringIndexOutOfBoundsException | NumberFormatException e) {
logger.error("Got {} while processing the string '{}'. This usually means the file " +
"defined by '{}' property contains invalid entries.",
new Object[]{e.getCause(), hashToCompare[0], HASH_LIST_FILE.getDisplayName()});
}
}
return false;
}
@Override
public String getHash(String line) {
if (isValidHash(line)) {
return line.split("\t", 2)[0];
} else {
return null;
}
}
@Override
public String getMatch(String line) {
if (isValidHash(line)) {
String[] match = line.split("\t", 2);
// Because the file can be malformed and contain an unammed match,
// if match has a filename...
if (match.length == 2) {
// Return it.
return match[1];
}
}
// Or return null
return null;
}
}

View File

@ -12,4 +12,5 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
org.apache.nifi.processors.cybersecurity.FuzzyHashContent
org.apache.nifi.processors.cybersecurity.FuzzyHashContent
org.apache.nifi.processors.cybersecurity.CompareFuzzyHash

View File

@ -0,0 +1,45 @@
<!DOCTYPE html>
<html lang="en">
<!--
Licensed to the Apache Software Foundation (ASF) under one or more
contributor license agreements. See the NOTICE file distributed with
this work for additional information regarding copyright ownership.
The ASF licenses this file to You under the Apache License, Version 2.0
(the "License"); you may not use this file except in compliance with
the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
-->
<head>
<meta charset="utf-8" />
<title>CompareFuzzyHash</title>
<link rel="stylesheet" href="/nifi-docs/css/component-usage.css"
type="text/css" />
</head>
<body>
<!-- Processor Documentation ================================================== -->
<h2>Description:</h2>
<p>This Processor compares a an attribute containing a Fuzzy Hash (TLSH or SSDeep) value and compares it against a list
of hashes of the same family (i.e. TLSH is compared with a list of TLSH hashes), routing them to match or non-match
depending on a user configured threshold for similarity.
</p>
<p>It is important to note that:</p>
<ul>
<li>TLSH similarity increases as product of its comparison function decreases (i.e. 0 indicates nearly identical files)</li>
<li>SSDeep similarity directly relates to the product of its comparison function (e.g. 99 indicates nearly identical files</li>
</ul>
<p>Based on the above, this processor when referring to "exceed the score" may be referring to:
<ul>
<li>a value equal or lower than the configured threshold (in case of TLSH)</li>
<li>a value equal or higher than the configured threshold (in case of SSDeep)</li>
</ul>
</body>
</html>

View File

@ -0,0 +1,380 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.nifi.processors.cybersecurity;
import org.apache.nifi.processors.cybersecurity.matchers.FuzzyHashMatcher;
import org.apache.nifi.processors.cybersecurity.matchers.SSDeepHashMatcher;
import org.apache.nifi.util.MockFlowFile;
import org.apache.nifi.util.TestRunner;
import org.apache.nifi.util.TestRunners;
import org.junit.After;
import org.junit.Assert;
import org.junit.Test;
import java.util.Arrays;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
public class TestCompareFuzzyHash {
String ssdeepInput = "48:c1xs8Z/m6H0eRH31S8p8bHENANkPrNy4tkPytwPyh2jTytxPythPytNdPytDgYyF:OuO/mg3HFSRHEb44RNMi6uHU2hcq3";
String tlshInput = "EB519EA4A8F95171A2A409C1DEEB9872AF55C137E00A5289F1CCD0CE4F6CCD784BB4B7";
final CompareFuzzyHash proc = new CompareFuzzyHash();
final private TestRunner runner = TestRunners.newTestRunner(proc);
@After
public void stop() {
runner.shutdown();
}
@Test
public void testSsdeepCompareFuzzyHash() {
double matchingSimilarity = 80;
runner.setProperty(CompareFuzzyHash.HASH_ALGORITHM, CompareFuzzyHash.allowableValueSSDEEP.getValue());
runner.setProperty(CompareFuzzyHash.ATTRIBUTE_NAME, "fuzzyhash.value");
runner.setProperty(CompareFuzzyHash.HASH_LIST_FILE, "src/test/resources/ssdeep.list");
runner.setProperty(CompareFuzzyHash.MATCH_THRESHOLD, String.valueOf(matchingSimilarity));
runner.setProperty(CompareFuzzyHash.MATCHING_MODE, CompareFuzzyHash.singleMatch.getValue());
Map<String, String> attributes = new HashMap<>();
attributes.put("fuzzyhash.value", ssdeepInput);
runner.enqueue("bogus".getBytes(), attributes);
runner.run();
runner.assertQueueEmpty();
runner.assertAllFlowFilesTransferred(CompareFuzzyHash.REL_FOUND, 1);
final MockFlowFile outFile = runner.getFlowFilesForRelationship(CompareFuzzyHash.REL_FOUND).get(0);
outFile.assertAttributeEquals(
"fuzzyhash.value.0.match",
"\"nifi/nifi-nar-bundles/nifi-beats-bundle/nifi-beats-processors/pom.xml\""
);
double similarity = Double.valueOf(outFile.getAttribute("fuzzyhash.value.0.similarity"));
Assert.assertTrue(similarity >= matchingSimilarity);
outFile.assertAttributeNotExists("fuzzyhash.value.1.match");
}
@Test
public void testSsdeepCompareFuzzyHashMultipleMatches() {
double matchingSimilarity = 80;
runner.setProperty(CompareFuzzyHash.HASH_ALGORITHM, CompareFuzzyHash.allowableValueSSDEEP.getValue());
runner.setProperty(CompareFuzzyHash.ATTRIBUTE_NAME, "fuzzyhash.value");
runner.setProperty(CompareFuzzyHash.HASH_LIST_FILE, "src/test/resources/ssdeep.list");
runner.setProperty(CompareFuzzyHash.MATCH_THRESHOLD, String.valueOf(matchingSimilarity));
runner.setProperty(CompareFuzzyHash.MATCHING_MODE, CompareFuzzyHash.multiMatch.getValue());
Map<String, String> attributes = new HashMap<>();
attributes.put("fuzzyhash.value", ssdeepInput );
runner.enqueue("bogus".getBytes(), attributes);
runner.run();
runner.assertQueueEmpty();
runner.assertAllFlowFilesTransferred(CompareFuzzyHash.REL_FOUND, 1);
final MockFlowFile outFile = runner.getFlowFilesForRelationship(CompareFuzzyHash.REL_FOUND).get(0);
outFile.assertAttributeEquals("fuzzyhash.value.0.match",
"\"nifi/nifi-nar-bundles/nifi-lumberjack-bundle/nifi-lumberjack-processors/pom.xml\""
);
double similarity = Double.valueOf(outFile.getAttribute("fuzzyhash.value.0.similarity"));
Assert.assertTrue(similarity >= matchingSimilarity);
outFile.assertAttributeEquals("fuzzyhash.value.1.match",
"\"nifi/nifi-nar-bundles/nifi-beats-bundle/nifi-beats-processors/pom.xml\""
);
similarity = Double.valueOf(outFile.getAttribute("fuzzyhash.value.1.similarity"));
Assert.assertTrue(similarity >= matchingSimilarity);
}
@Test
public void testSsdeepCompareFuzzyHashWithBlankHashList() {
double matchingSimilarity = 80;
runner.setProperty(CompareFuzzyHash.HASH_ALGORITHM, CompareFuzzyHash.allowableValueSSDEEP.getValue());
runner.setProperty(CompareFuzzyHash.ATTRIBUTE_NAME, "fuzzyhash.value");
runner.setProperty(CompareFuzzyHash.HASH_LIST_FILE, "src/test/resources/blank_ssdeep.list");
runner.setProperty(CompareFuzzyHash.MATCH_THRESHOLD, String.valueOf(matchingSimilarity));
Map<String, String> attributes = new HashMap<>();
attributes.put("fuzzyhash.value", "6:hERjIfhRrlB63J0FDw1NBQmEH68xwMSELN:hZrlB62IwMS");
runner.enqueue("bogus".getBytes(), attributes);
runner.run();
runner.assertQueueEmpty();
runner.assertAllFlowFilesTransferred(CompareFuzzyHash.REL_NOT_FOUND, 1);
final MockFlowFile outFile = runner.getFlowFilesForRelationship(CompareFuzzyHash.REL_NOT_FOUND).get(0);
}
@Test
public void testSsdeepCompareFuzzyHashWithInvalidHashList() {
// This is different from "BlankHashList series of tests in that the file lacks headers and as such is totally
// invalid
double matchingSimilarity = 80;
runner.setProperty(CompareFuzzyHash.HASH_ALGORITHM, CompareFuzzyHash.allowableValueSSDEEP.getValue());
runner.setProperty(CompareFuzzyHash.ATTRIBUTE_NAME, "fuzzyhash.value");
runner.setProperty(CompareFuzzyHash.HASH_LIST_FILE, "src/test/resources/empty.list");
runner.setProperty(CompareFuzzyHash.MATCH_THRESHOLD, String.valueOf(matchingSimilarity));
Map<String, String> attributes = new HashMap<>();
attributes.put("fuzzyhash.value", "6:hERjIfhRrlB63J0FDw1NBQmEH68xwMSELN:hZrlB62IwMS");
runner.enqueue("bogus".getBytes(), attributes);
runner.run();
runner.assertQueueEmpty();
runner.assertAllFlowFilesTransferred(CompareFuzzyHash.REL_NOT_FOUND, 1);
final MockFlowFile outFile = runner.getFlowFilesForRelationship(CompareFuzzyHash.REL_NOT_FOUND).get(0);
outFile.assertAttributeNotExists("fuzzyhash.value.0.match");
}
@Test
public void testSsdeepCompareFuzzyHashWithInvalidHash() {
double matchingSimilarity = 80;
runner.setProperty(CompareFuzzyHash.HASH_ALGORITHM, CompareFuzzyHash.allowableValueSSDEEP.getValue());
runner.setProperty(CompareFuzzyHash.ATTRIBUTE_NAME, "fuzzyhash.value");
runner.setProperty(CompareFuzzyHash.HASH_LIST_FILE, "src/test/resources/ssdeep.list");
runner.setProperty(CompareFuzzyHash.MATCH_THRESHOLD, String.valueOf(matchingSimilarity));
runner.setProperty(CompareFuzzyHash.MATCHING_MODE, CompareFuzzyHash.singleMatch.getValue());
Map<String, String> attributes = new HashMap<>();
attributes.put("fuzzyhash.value", "Test test test chocolate!");
runner.enqueue("bogus".getBytes(), attributes);
runner.run();
runner.assertQueueEmpty();
runner.assertAllFlowFilesTransferred(CompareFuzzyHash.REL_FAILURE, 1);
final MockFlowFile outFile = runner.getFlowFilesForRelationship(CompareFuzzyHash.REL_FAILURE).get(0);
outFile.assertAttributeNotExists("fuzzyhash.value.0.match");
}
@Test
public void testTLSHCompareFuzzyHash() {
double matchingSimilarity = 200;
runner.setProperty(CompareFuzzyHash.HASH_ALGORITHM, CompareFuzzyHash.allowableValueTLSH.getValue());
runner.setProperty(CompareFuzzyHash.ATTRIBUTE_NAME, "fuzzyhash.value");
runner.setProperty(CompareFuzzyHash.HASH_LIST_FILE, "src/test/resources/tlsh.list");
runner.setProperty(CompareFuzzyHash.MATCH_THRESHOLD, String.valueOf(matchingSimilarity));
runner.setProperty(CompareFuzzyHash.MATCHING_MODE, CompareFuzzyHash.singleMatch.getValue());
Map<String, String> attributes = new HashMap<>();
attributes.put("fuzzyhash.value", tlshInput);
runner.enqueue("bogus".getBytes(), attributes);
runner.run();
runner.assertQueueEmpty();
runner.assertAllFlowFilesTransferred(CompareFuzzyHash.REL_FOUND, 1);
final MockFlowFile outFile = runner.getFlowFilesForRelationship(CompareFuzzyHash.REL_FOUND).get(0);
outFile.assertAttributeEquals(
"fuzzyhash.value.0.match",
"nifi-nar-bundles/nifi-lumberjack-bundle/nifi-lumberjack-processors/pom.xml"
);
double similarity = Double.valueOf(outFile.getAttribute("fuzzyhash.value.0.similarity"));
Assert.assertTrue(similarity <= matchingSimilarity);
outFile.assertAttributeNotExists("fuzzyhash.value.1.match");
}
@Test
public void testTLSHCompareFuzzyHashMultipleMatches() {
double matchingSimilarity = 200;
runner.setProperty(CompareFuzzyHash.HASH_ALGORITHM, CompareFuzzyHash.allowableValueTLSH.getValue());
runner.setProperty(CompareFuzzyHash.ATTRIBUTE_NAME, "fuzzyhash.value");
runner.setProperty(CompareFuzzyHash.HASH_LIST_FILE, "src/test/resources/tlsh.list");
runner.setProperty(CompareFuzzyHash.MATCH_THRESHOLD, String.valueOf(matchingSimilarity));
runner.setProperty(CompareFuzzyHash.MATCHING_MODE, CompareFuzzyHash.multiMatch.getValue());
Map<String, String> attributes = new HashMap<>();
attributes.put("fuzzyhash.value", tlshInput);
runner.enqueue("bogus".getBytes(), attributes);
runner.run();
runner.assertQueueEmpty();
runner.assertAllFlowFilesTransferred(CompareFuzzyHash.REL_FOUND, 1);
final MockFlowFile outFile = runner.getFlowFilesForRelationship(CompareFuzzyHash.REL_FOUND).get(0);
outFile.assertAttributeEquals(
"fuzzyhash.value.0.match",
"nifi-nar-bundles/nifi-lumberjack-bundle/nifi-lumberjack-processors/pom.xml"
);
double similarity = Double.valueOf(outFile.getAttribute("fuzzyhash.value.0.similarity"));
Assert.assertTrue(similarity <= matchingSimilarity);
outFile.assertAttributeEquals(
"fuzzyhash.value.1.match",
"nifi-nar-bundles/nifi-beats-bundle/nifi-beats-processors/pom.xml"
);
similarity = Double.valueOf(outFile.getAttribute("fuzzyhash.value.1.similarity"));
Assert.assertTrue(similarity <= matchingSimilarity);
}
@Test
public void testTLSHCompareFuzzyHashWithBlankFile() {
// This is different from "BlankHashList series of tests in that the file lacks headers and as such is totally
// invalid
double matchingSimilarity = 200;
runner.setProperty(CompareFuzzyHash.HASH_ALGORITHM, CompareFuzzyHash.allowableValueTLSH.getValue());
runner.setProperty(CompareFuzzyHash.ATTRIBUTE_NAME, "fuzzyhash.value");
runner.setProperty(CompareFuzzyHash.HASH_LIST_FILE, "src/test/resources/empty.list");
runner.setProperty(CompareFuzzyHash.MATCH_THRESHOLD, String.valueOf(matchingSimilarity));
Map<String, String> attributes = new HashMap<>();
attributes.put("fuzzyhash.value", "E2F0818B7AE7173906A72221570E30979B11C0FC47B518A1E89D257E2343CEC02381ED");
runner.enqueue("bogus".getBytes(), attributes);
runner.run();
runner.assertQueueEmpty();
runner.assertAllFlowFilesTransferred(CompareFuzzyHash.REL_NOT_FOUND, 1);
final MockFlowFile outFile = runner.getFlowFilesForRelationship(CompareFuzzyHash.REL_NOT_FOUND).get(0);
outFile.assertAttributeNotExists("fuzzyhash.value.0.match");
}
@Test
public void testTLSHCompareFuzzyHashWithEmptyHashList() {
double matchingSimilarity = 200;
runner.setProperty(CompareFuzzyHash.HASH_ALGORITHM, CompareFuzzyHash.allowableValueTLSH.getValue());
runner.setProperty(CompareFuzzyHash.ATTRIBUTE_NAME, "fuzzyhash.value");
runner.setProperty(CompareFuzzyHash.HASH_LIST_FILE, "src/test/resources/empty.list");
runner.setProperty(CompareFuzzyHash.MATCH_THRESHOLD, String.valueOf(matchingSimilarity));
Map<String, String> attributes = new HashMap<>();
attributes.put("fuzzyhash.value", "E2F0818B7AE7173906A72221570E30979B11C0FC47B518A1E89D257E2343CEC02381ED");
runner.enqueue("bogus".getBytes(), attributes);
runner.run();
runner.assertQueueEmpty();
runner.assertAllFlowFilesTransferred(CompareFuzzyHash.REL_NOT_FOUND, 1);
final MockFlowFile outFile = runner.getFlowFilesForRelationship(CompareFuzzyHash.REL_NOT_FOUND).get(0);
outFile.assertAttributeNotExists("fuzzyhash.value.0.match");
}
@Test
public void testTLSHCompareFuzzyHashWithInvalidHash() {
double matchingSimilarity = 200;
runner.setProperty(CompareFuzzyHash.HASH_ALGORITHM, CompareFuzzyHash.allowableValueTLSH.getValue());
runner.setProperty(CompareFuzzyHash.ATTRIBUTE_NAME, "fuzzyhash.value");
runner.setProperty(CompareFuzzyHash.HASH_LIST_FILE, "src/test/resources/empty.list");
runner.setProperty(CompareFuzzyHash.MATCH_THRESHOLD, String.valueOf(matchingSimilarity));
Map<String, String> attributes = new HashMap<>();
attributes.put("fuzzyhash.value", "Test test test chocolate");
runner.enqueue("bogus".getBytes(), attributes);
runner.run();
runner.assertQueueEmpty();
runner.assertAllFlowFilesTransferred(CompareFuzzyHash.REL_FAILURE, 1);
final MockFlowFile outFile = runner.getFlowFilesForRelationship(CompareFuzzyHash.REL_FAILURE).get(0);
outFile.assertAttributeNotExists("fuzzyhash.value.0.match");
}
@Test
public void testMissingAttribute() {
double matchingSimilarity = 200;
runner.setProperty(CompareFuzzyHash.HASH_ALGORITHM, CompareFuzzyHash.allowableValueTLSH.getValue());
runner.setProperty(CompareFuzzyHash.ATTRIBUTE_NAME, "fuzzyhash.value");
runner.setProperty(CompareFuzzyHash.HASH_LIST_FILE, "src/test/resources/empty.list");
runner.setProperty(CompareFuzzyHash.MATCH_THRESHOLD, String.valueOf(matchingSimilarity));
runner.setProperty(CompareFuzzyHash.MATCHING_MODE, CompareFuzzyHash.multiMatch.getValue());
runner.enqueue("bogus".getBytes());
runner.run();
runner.assertQueueEmpty();
runner.assertAllFlowFilesTransferred(CompareFuzzyHash.REL_FAILURE, 1);
final MockFlowFile outFile = runner.getFlowFilesForRelationship(CompareFuzzyHash.REL_FAILURE).get(0);
outFile.assertAttributeNotExists("fuzzyhash.value.0.match");
}
@Test
public void testAttributeIsEmptyString() {
double matchingSimilarity = 200;
runner.setProperty(CompareFuzzyHash.HASH_ALGORITHM, CompareFuzzyHash.allowableValueTLSH.getValue());
runner.setProperty(CompareFuzzyHash.ATTRIBUTE_NAME, "fuzzyhash.value");
runner.setProperty(CompareFuzzyHash.HASH_LIST_FILE, "src/test/resources/empty.list");
runner.setProperty(CompareFuzzyHash.MATCH_THRESHOLD, String.valueOf(matchingSimilarity));
runner.setProperty(CompareFuzzyHash.MATCHING_MODE, CompareFuzzyHash.multiMatch.getValue());
Map<String, String> attributes = new HashMap<>();
attributes.put("fuzzyhash.value", "");
runner.enqueue("bogus".getBytes(), attributes);
runner.run();
runner.assertQueueEmpty();
runner.assertAllFlowFilesTransferred(CompareFuzzyHash.REL_FAILURE, 1);
final MockFlowFile outFile = runner.getFlowFilesForRelationship(CompareFuzzyHash.REL_FAILURE).get(0);
outFile.assertAttributeNotExists("fuzzyhash.value.0.match");
}
@Test
public void testlooksLikeSpamSum() {
FuzzyHashMatcher matcher = new SSDeepHashMatcher();
List<String> invalidPayloads = Arrays.asList(
"4AD:c1xs8Z/m6H0eRH31S8p8bHENANkPrNy4tkPytwPyh2jTytxPythPytNdPytDgYyF:OuO/mg3HFSRHEb44RNMi6uHU2hcq3", // invalidFirstField
":c1xs8Z/m6H0eRH31S8p8bHENANkPrNy4tkPytwPyh2jTytxPythPytNdPytDgYyF:OuO/mg3HFSRHEb44RNMi6uHU2hcq3", // emptyFirstField
"48::OuO/mg3HFSRHEb44RNMi6uHU2hcq3", // emptySecondField
"48:c1xs8Z/m6H0eRH31S8p8bHENANkPrNy4tkPytwPyh2jTytxPythPytNdPytDgYyF:", // emptyThirdField
"48:c1xs8Z/m6H0eRH31S8p8bHENANkPrNy4tkPytwPyh2jTytxPythPytNdPytDgYyF", // withoutThirdField
"c1xs8Z/m6H0eRH31S8p8bHENANkPrNy4tkPytwPyh2jTytxPythPytNdPytDgYyF" // Just a simple string
);
for (String item : invalidPayloads) {
Assert.assertTrue("item '" + item + "' should have failed validation", !matcher.isValidHash(item));
}
// Now test with a valid string
Assert.assertTrue(matcher.isValidHash(ssdeepInput));
}
}

View File

@ -0,0 +1 @@
ssdeep,1.0--blocksize:hash:hash,filename

View File

@ -0,0 +1,11 @@
ssdeep,1.0--blocksize:hash:hash,filename
96:KQhaGCVZGhr83h3bc0ok3892m12wzgnH5w2pw+sxNEI58:FIVkH4x73h39LH+2w+sxaD,"config.h"
96:EQOJvOl4ab3hhiNFXc4wwcweomr0cNJDBoqXjmAHKX8dEt001nfEhVIuX0dDcs:3mzpAsZpprbshfu3oujjdENdp21,"doc\README"
48:c1xs8Z/m6H0eRH31S8p8bHENRNkPSNy4tkPytwPytyYytxPythPytNdPytDgYyse:OuO/mg3HFSRHE+H4RNc6uHU2hqoMkh
96,MD9fHjsEuddrg31904l8bgx5ROg2MQZHZqpAlycowOsexbHDbk:MJwz/l2PqGqqbr2yk6pVgrwPV,"Whatever.txt-INVALID-DUE-TO-COMMA-AFTER-96"
48:c1xs8Z/m6H0eRH31S8p8bHENANkPrNy4tkPytwPyh2jTytxPythPytNdPytDgYyF:OuO/mg3HFSRHEb44RNMi6uHU2hcq3,"nifi/nifi-nar-bundles/nifi-beats-bundle/nifi-beats-processors/pom.xml"
6:hERjIfhRrlB63J0FDw1NBQmEH68xwMSELN:hZrlB62IwMS,"c:\this_is_valid_but_should_not_match"
96:MD9fHjsEuddrg31904l8bgx5ROg2MQZHZqpAlycowOsexbHDbk:MJwz/l2PqGqqbr2yk6pVgrwPV,"INSTALL"
48:c1xs8Z/m6H0eRH31S8p8bHENRNkPSNy4tkPytwPytyYytxPythPytNdPytDgYyse:OuO/mg3HFSRHE+H4RNc6uHU2hqoMkh,"nifi/nifi-nar-bundles/nifi-lumberjack-bundle/nifi-lumberjack-processors/pom.xml"
48:c1xs8Z/m6H0eRH31S8p8bHENRNkPSNy4tkPytwPytyYytxPythPytNdPytDgYyse:OuO/mg3HFSRHE+H4RNc6uHU2hqoMkh,

View File

@ -0,0 +1,9 @@
A4518DA4A8F9517162A409C1DEEA9872AF55C137E00A62C9F0CDD0CE4F6CCD784BB4B7 nifi-nar-bundles/nifi-lumberjack-bundle/nifi-lumberjack-processors/pom.xml
THERE SEEMS TO BE SOMETHING MISSING
6FF02BEF718027B0160B4391212923ED7F1A463D563B1549B86CF62973B197AD2731F Synthetic shorter-INVALID
EB519EA4A8F95171A2A409C1DEEB9872AF55C137E00A5289F1CCD0CE4F6CCD784BB4B7
E2F0818B7AE7173906A72221570E30979B11C0FC47B518A1E89D257E2343CEC02381ED /this/is/also/valid/but/should/not/match
EB519EA4A8F95171A2A409C1DEEB9872AF55C137E00A5289F1CCD0CE4F6CCD784BB4B7 nifi-nar-bundles/nifi-beats-bundle/nifi-beats-processors/pom.xml
EB519EA4A8F95171A2A409C1DEEB9872AF55C137E00A5289F1CCD0CE4F6CCD784BB4B7
6FF02BEF718027B0160B4391212923ED7F1A463D563B1549B86CF62973B197AD2731F8 /this/is/valid/but/should/not/match

View File

@ -1293,6 +1293,12 @@
<version>1.2.0-SNAPSHOT</version>
<type>nar</type>
</dependency>
<dependency>
<groupId>org.apache.nifi</groupId>
<artifactId>nifi-cybersecurity-nar</artifactId>
<version>1.2.0-SNAPSHOT</version>
<type>nar</type>
</dependency>
<dependency>
<groupId>org.apache.nifi</groupId>
<artifactId>nifi-email-nar</artifactId>