diff --git a/nifi-nar-bundles/nifi-standard-bundle/nifi-standard-processors/pom.xml b/nifi-nar-bundles/nifi-standard-bundle/nifi-standard-processors/pom.xml
index 67cf93dec6..568b4d50fc 100644
--- a/nifi-nar-bundles/nifi-standard-bundle/nifi-standard-processors/pom.xml
+++ b/nifi-nar-bundles/nifi-standard-bundle/nifi-standard-processors/pom.xml
@@ -526,6 +526,8 @@
src/test/resources/TestIdentifyMimeType/1.txt.gz
src/test/resources/TestIdentifyMimeType/1.zip
src/test/resources/TestIdentifyMimeType/flowfilev1.tar
+ src/test/resources/TestIdentifyMimeType/2.custom
+ src/test/resources/TestIdentifyMimeType/.customConfig.xml
src/test/resources/TestUnpackContent/data.tar
src/test/resources/TestUnpackContent/data.zip
src/test/resources/TestUnpackContent/invalid_data.zip
diff --git a/nifi-nar-bundles/nifi-standard-bundle/nifi-standard-processors/src/main/java/org/apache/nifi/processors/standard/IdentifyMimeType.java b/nifi-nar-bundles/nifi-standard-bundle/nifi-standard-processors/src/main/java/org/apache/nifi/processors/standard/IdentifyMimeType.java
index c259e881d5..d6ebd39e50 100644
--- a/nifi-nar-bundles/nifi-standard-bundle/nifi-standard-processors/src/main/java/org/apache/nifi/processors/standard/IdentifyMimeType.java
+++ b/nifi-nar-bundles/nifi-standard-bundle/nifi-standard-processors/src/main/java/org/apache/nifi/processors/standard/IdentifyMimeType.java
@@ -17,6 +17,9 @@
package org.apache.nifi.processors.standard;
import java.io.BufferedInputStream;
+import java.io.ByteArrayInputStream;
+import java.util.Collection;
+import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.util.ArrayList;
@@ -32,9 +35,15 @@ import org.apache.nifi.annotation.behavior.InputRequirement.Requirement;
import org.apache.nifi.annotation.behavior.SideEffectFree;
import org.apache.nifi.annotation.behavior.SupportsBatching;
import org.apache.nifi.annotation.behavior.WritesAttribute;
+import org.apache.nifi.annotation.behavior.WritesAttributes;
+import org.apache.nifi.annotation.lifecycle.OnScheduled;
import org.apache.nifi.annotation.documentation.CapabilityDescription;
import org.apache.nifi.annotation.documentation.Tags;
import org.apache.nifi.components.PropertyDescriptor;
+import org.apache.nifi.components.Validator;
+import org.apache.nifi.components.ValidationContext;
+import org.apache.nifi.components.ValidationResult;
+import org.apache.nifi.expression.ExpressionLanguageScope;
import org.apache.nifi.flowfile.FlowFile;
import org.apache.nifi.flowfile.attributes.CoreAttributes;
import org.apache.nifi.logging.ComponentLog;
@@ -44,6 +53,8 @@ import org.apache.nifi.processor.ProcessSession;
import org.apache.nifi.processor.ProcessorInitializationContext;
import org.apache.nifi.processor.Relationship;
import org.apache.nifi.processor.io.InputStreamCallback;
+import org.apache.nifi.processor.exception.ProcessException;
+import org.apache.nifi.processor.util.StandardValidators;
import org.apache.tika.config.TikaConfig;
import org.apache.tika.detect.Detector;
import org.apache.tika.io.TikaInputStream;
@@ -51,8 +62,11 @@ import org.apache.tika.metadata.Metadata;
import org.apache.tika.metadata.TikaMetadataKeys;
import org.apache.tika.mime.MediaType;
import org.apache.tika.mime.MimeType;
+import org.apache.tika.mime.MimeTypes;
+import org.apache.tika.mime.MimeTypesFactory;
import org.apache.tika.mime.MimeTypeException;
+
/**
*
* Attempts to detect the MIME Type of a FlowFile by examining its contents. If the MIME Type is determined, it is added
@@ -76,9 +90,16 @@ import org.apache.tika.mime.MimeTypeException;
@CapabilityDescription("Attempts to identify the MIME Type used for a FlowFile. If the MIME Type can be identified, "
+ "an attribute with the name 'mime.type' is added with the value being the MIME Type. If the MIME Type cannot be determined, "
+ "the value will be set to 'application/octet-stream'. In addition, the attribute mime.extension will be set if a common file "
- + "extension for the MIME Type is known.")
+ + "extension for the MIME Type is known. If both Config File and Config Body are not set, the default NiFi MIME Types will "
+ + "be used.")
+@WritesAttributes({
@WritesAttribute(attribute = "mime.type", description = "This Processor sets the FlowFile's mime.type attribute to the detected MIME Type. "
- + "If unable to detect the MIME Type, the attribute's value will be set to application/octet-stream")
+ + "If unable to detect the MIME Type, the attribute's value will be set to application/octet-stream"),
+@WritesAttribute(attribute = "mime.extension", description = "This Processor sets the FlowFile's mime.extension attribute to the file "
+ + "extension associated with the detected MIME Type. "
+ + "If there is no correlated extension, the attribute's value will be empty")
+}
+)
public class IdentifyMimeType extends AbstractProcessor {
public static final PropertyDescriptor USE_FILENAME_IN_DETECTION = new PropertyDescriptor.Builder()
@@ -90,6 +111,24 @@ public class IdentifyMimeType extends AbstractProcessor {
.defaultValue("true")
.build();
+ public static final PropertyDescriptor MIME_CONFIG_FILE = new PropertyDescriptor.Builder()
+ .displayName("Config File")
+ .name("config-file")
+ .required(false)
+ .description("Path to MIME type config file. Only one of Config File or Config Body may be used.")
+ .addValidator(new StandardValidators.FileExistsValidator(true))
+ .expressionLanguageSupported(ExpressionLanguageScope.VARIABLE_REGISTRY)
+ .build();
+
+ public static final PropertyDescriptor MIME_CONFIG_BODY = new PropertyDescriptor.Builder()
+ .displayName("Config Body")
+ .name("config-body")
+ .required(false)
+ .description("Body of MIME type config file. Only one of Config File or Config Body may be used.")
+ .addValidator(Validator.VALID)
+ .expressionLanguageSupported(ExpressionLanguageScope.NONE)
+ .build();
+
public static final Relationship REL_SUCCESS = new Relationship.Builder()
.name("success")
.description("All FlowFiles are routed to success")
@@ -99,12 +138,11 @@ public class IdentifyMimeType extends AbstractProcessor {
private List properties;
private final TikaConfig config;
- private final Detector detector;
+ private Detector detector;
+ private MimeTypes mimeTypes;
public IdentifyMimeType() {
- // Setup Tika
this.config = TikaConfig.getDefaultConfig();
- this.detector = config.getDetector();
}
@Override
@@ -112,6 +150,8 @@ public class IdentifyMimeType extends AbstractProcessor {
final List properties = new ArrayList<>();
properties.add(USE_FILENAME_IN_DETECTION);
+ properties.add(MIME_CONFIG_BODY);
+ properties.add(MIME_CONFIG_FILE);
this.properties = Collections.unmodifiableList(properties);
final Set rels = new HashSet<>();
@@ -119,6 +159,35 @@ public class IdentifyMimeType extends AbstractProcessor {
this.relationships = Collections.unmodifiableSet(rels);
}
+ @OnScheduled
+ public void setup(final ProcessContext context) {
+ String configBody = context.getProperty(MIME_CONFIG_BODY).getValue();
+ String configFile = context.getProperty(MIME_CONFIG_FILE).evaluateAttributeExpressions().getValue();
+
+ if (configBody == null && configFile == null){
+ this.detector = config.getDetector();
+ this.mimeTypes = config.getMimeRepository();
+ } else if (configBody != null) {
+ try {
+ this.detector = MimeTypesFactory.create(new ByteArrayInputStream(configBody.getBytes()));
+ this.mimeTypes = (MimeTypes)this.detector;
+ } catch (Exception e) {
+ context.yield();
+ throw new ProcessException("Failed to load config body", e);
+ }
+
+ } else {
+ try {
+ this.detector = MimeTypesFactory.create(new FileInputStream(configFile));
+ this.mimeTypes = (MimeTypes)this.detector;
+ } catch (Exception e) {
+ context.yield();
+ throw new ProcessException("Failed to load config file", e);
+ }
+ }
+ }
+
+
@Override
public Set getRelationships() {
return relationships;
@@ -161,7 +230,7 @@ public class IdentifyMimeType extends AbstractProcessor {
String extension = "";
try {
MimeType mimetype;
- mimetype = config.getMimeRepository().forName(mimeType);
+ mimetype = mimeTypes.forName(mimeType);
extension = mimetype.getExtension();
} catch (MimeTypeException ex) {
logger.warn("MIME type extension lookup failed: {}", new Object[]{ex});
@@ -185,4 +254,21 @@ public class IdentifyMimeType extends AbstractProcessor {
session.getProvenanceReporter().modifyAttributes(flowFile);
session.transfer(flowFile, REL_SUCCESS);
}
+
+ @Override
+ protected Collection customValidate(ValidationContext validationContext) {
+ Set results = new HashSet<>();
+ String body = validationContext.getProperty(MIME_CONFIG_BODY).getValue();
+ String file = validationContext.getProperty(MIME_CONFIG_FILE).getValue();
+ if(body != null && file != null) {
+ results.add(new ValidationResult.Builder()
+ .input(MIME_CONFIG_FILE.getName())
+ .subject(file)
+ .valid(false)
+ .explanation("Can only specify Config Body or Config File. Not both.")
+ .build());
+ }
+ return results;
+ }
+
}
diff --git a/nifi-nar-bundles/nifi-standard-bundle/nifi-standard-processors/src/main/resources/docs/org.apache.nifi.processors.standard.IdentifyMimeType/additionalDetails.html b/nifi-nar-bundles/nifi-standard-bundle/nifi-standard-processors/src/main/resources/docs/org.apache.nifi.processors.standard.IdentifyMimeType/additionalDetails.html
index bc331f6f09..015d6a0bdc 100644
--- a/nifi-nar-bundles/nifi-standard-bundle/nifi-standard-processors/src/main/resources/docs/org.apache.nifi.processors.standard.IdentifyMimeType/additionalDetails.html
+++ b/nifi-nar-bundles/nifi-standard-bundle/nifi-standard-processors/src/main/resources/docs/org.apache.nifi.processors.standard.IdentifyMimeType/additionalDetails.html
@@ -22,7 +22,7 @@
- The following is a non-exhaustive list of MIME Types detected:
+
The following is a non-exhaustive list of MIME Types detected by default in NiFi:
- application/gzip
@@ -55,9 +55,26 @@
- application/zip
- application/x-lzh
- For a complete list, please refer to
-
- Apache Tika's source code
+
+
An example value for the Config Body property that will identify a file whose contents start with "abcd" as MIME Type "custom/abcd"
+ and with extension ".abcd" would look like the following:
+
+
+ For a more complete list of Tika's default types (and additional details regarding customization of
+ the value for the Config Body property), please refer to
+
+
+ Apache Tika's documentation
diff --git a/nifi-nar-bundles/nifi-standard-bundle/nifi-standard-processors/src/test/java/org/apache/nifi/processors/standard/TestIdentifyMimeType.java b/nifi-nar-bundles/nifi-standard-bundle/nifi-standard-processors/src/test/java/org/apache/nifi/processors/standard/TestIdentifyMimeType.java
index dc611135b9..71ede0b2db 100644
--- a/nifi-nar-bundles/nifi-standard-bundle/nifi-standard-processors/src/test/java/org/apache/nifi/processors/standard/TestIdentifyMimeType.java
+++ b/nifi-nar-bundles/nifi-standard-bundle/nifi-standard-processors/src/test/java/org/apache/nifi/processors/standard/TestIdentifyMimeType.java
@@ -74,6 +74,7 @@ public class TestIdentifyMimeType {
expectedMimeTypes.put("flowfilev3", "application/flowfile-v3");
expectedMimeTypes.put("flowfilev1.tar", "application/flowfile-v1");
expectedMimeTypes.put("fake.csv", "text/csv");
+ expectedMimeTypes.put("2.custom", "text/plain");
final Map expectedExtensions = new HashMap<>();
expectedExtensions.put("1.7z", ".7z");
@@ -94,6 +95,7 @@ public class TestIdentifyMimeType {
expectedExtensions.put("flowfilev3", "");
expectedExtensions.put("flowfilev1.tar", "");
expectedExtensions.put("fake.csv", ".csv");
+ expectedExtensions.put("2.custom", ".txt");
final List filesOut = runner.getFlowFilesForRelationship(IdentifyMimeType.REL_SUCCESS);
for (final MockFlowFile file : filesOut) {
@@ -122,4 +124,202 @@ public class TestIdentifyMimeType {
flowFile.assertAttributeEquals("mime.extension", ".txt");
flowFile.assertAttributeEquals("mime.type", "text/plain");
}
+
+ @Test
+ public void testConfigBody() throws IOException {
+ final TestRunner runner = TestRunners.newTestRunner(new IdentifyMimeType());
+
+
+ final File dir = new File("src/test/resources/TestIdentifyMimeType");
+ final File[] files = dir.listFiles((ldir,name)-> name != null && !name.startsWith("."));
+ int fileCount = 0;
+ for (final File file : files) {
+ if (file.isDirectory()) {
+ continue;
+ }
+
+ runner.enqueue(file.toPath());
+ fileCount++;
+ }
+
+
+ String configBody = "\n" +
+ "\n" +
+ " \n" +
+ " \n" +
+ " \n" +
+ " \n" +
+ " \n" +
+ " \n" +
+ " \n" +
+ " PNG\n" +
+ " <_comment>Portable Network Graphics\n" +
+ " \n" +
+ " \n" +
+ " \n" +
+ " \n" +
+ " \n" +
+ "";
+ runner.setProperty(IdentifyMimeType.MIME_CONFIG_BODY, configBody);
+
+ runner.setThreadCount(1);
+ runner.run(fileCount);
+
+
+ runner.assertAllFlowFilesTransferred(IdentifyMimeType.REL_SUCCESS, fileCount);
+
+ final Map expectedMimeTypes = new HashMap<>();
+ expectedMimeTypes.put("1.7z", "application/octet-stream");
+ expectedMimeTypes.put("1.mdb", "application/octet-stream");
+ expectedMimeTypes.put("1.txt", "text/plain");
+ expectedMimeTypes.put("1.csv", "text/plain");
+ expectedMimeTypes.put("1.txt.bz2", "application/octet-stream");
+ expectedMimeTypes.put("1.txt.gz", "application/octet-stream");
+ expectedMimeTypes.put("1.zip", "application/octet-stream");
+ expectedMimeTypes.put("bgBannerFoot.png", "image/png");
+ expectedMimeTypes.put("blueBtnBg.jpg", "application/octet-stream");
+ expectedMimeTypes.put("1.pdf", "application/octet-stream");
+ expectedMimeTypes.put("grid.gif", "application/octet-stream");
+ expectedMimeTypes.put("1.tar", "application/octet-stream");
+ expectedMimeTypes.put("1.tar.gz", "application/octet-stream");
+ expectedMimeTypes.put("1.jar", "application/octet-stream");
+ expectedMimeTypes.put("1.xml", "text/plain");
+ expectedMimeTypes.put("flowfilev3", "application/octet-stream");
+ expectedMimeTypes.put("flowfilev1.tar", "application/octet-stream");
+ expectedMimeTypes.put("fake.csv", "text/plain");
+ expectedMimeTypes.put("2.custom", "custom/abcd");
+
+ final Map expectedExtensions = new HashMap<>();
+ expectedExtensions.put("1.7z", "");
+ expectedExtensions.put("1.mdb", "");
+ expectedExtensions.put("1.txt", "");
+ expectedExtensions.put("1.csv", "");
+ expectedExtensions.put("1.txt.bz2", "");
+ expectedExtensions.put("1.txt.gz", "");
+ expectedExtensions.put("1.zip", "");
+ expectedExtensions.put("bgBannerFoot.png", ".customPng");
+ expectedExtensions.put("blueBtnBg.jpg", "");
+ expectedExtensions.put("1.pdf", "");
+ expectedExtensions.put("grid.gif", "");
+ expectedExtensions.put("1.tar", "");
+ expectedExtensions.put("1.tar.gz", "");
+ expectedExtensions.put("1.jar", "");
+ expectedExtensions.put("1.xml", "");
+ expectedExtensions.put("flowfilev3", "");
+ expectedExtensions.put("flowfilev1.tar", "");
+ expectedExtensions.put("fake.csv", "");
+ expectedExtensions.put("2.custom", ".abcd");
+
+ final List filesOut = runner.getFlowFilesForRelationship(IdentifyMimeType.REL_SUCCESS);
+ for (final MockFlowFile file : filesOut) {
+ final String filename = file.getAttribute(CoreAttributes.FILENAME.key());
+ final String mimeType = file.getAttribute(CoreAttributes.MIME_TYPE.key());
+ final String expected = expectedMimeTypes.get(filename);
+
+ final String extension = file.getAttribute("mime.extension");
+ final String expectedExtension = expectedExtensions.get(filename);
+
+ assertEquals("Expected " + file + " to have MIME Type " + expected + ", but it was " + mimeType, expected, mimeType);
+ assertEquals("Expected " + file + " to have extension " + expectedExtension + ", but it was " + extension, expectedExtension, extension);
+ }
+ }
+
+ @Test
+ public void testConfigFile() throws IOException {
+ final TestRunner runner = TestRunners.newTestRunner(new IdentifyMimeType());
+
+
+ final File dir = new File("src/test/resources/TestIdentifyMimeType");
+ final File[] files = dir.listFiles((ldir,name)-> name != null && !name.startsWith("."));
+ int fileCount = 0;
+ for (final File file : files) {
+ if (file.isDirectory()) {
+ continue;
+ }
+
+ runner.enqueue(file.toPath());
+ fileCount++;
+ }
+
+
+ String configFile = "src/test/resources/TestIdentifyMimeType/.customConfig.xml";
+ runner.setProperty(IdentifyMimeType.MIME_CONFIG_FILE, configFile);
+
+ runner.setThreadCount(1);
+ runner.run(fileCount);
+
+
+ runner.assertAllFlowFilesTransferred(IdentifyMimeType.REL_SUCCESS, fileCount);
+
+ final Map expectedMimeTypes = new HashMap<>();
+ expectedMimeTypes.put("1.7z", "application/octet-stream");
+ expectedMimeTypes.put("1.mdb", "application/octet-stream");
+ expectedMimeTypes.put("1.txt", "text/plain");
+ expectedMimeTypes.put("1.csv", "text/plain");
+ expectedMimeTypes.put("1.txt.bz2", "application/octet-stream");
+ expectedMimeTypes.put("1.txt.gz", "application/octet-stream");
+ expectedMimeTypes.put("1.zip", "application/octet-stream");
+ expectedMimeTypes.put("bgBannerFoot.png", "my/png");
+ expectedMimeTypes.put("blueBtnBg.jpg", "my/jpeg");
+ expectedMimeTypes.put("1.pdf", "application/octet-stream");
+ expectedMimeTypes.put("grid.gif", "my/gif");
+ expectedMimeTypes.put("1.tar", "application/octet-stream");
+ expectedMimeTypes.put("1.tar.gz", "application/octet-stream");
+ expectedMimeTypes.put("1.jar", "application/octet-stream");
+ expectedMimeTypes.put("1.xml", "text/plain");
+ expectedMimeTypes.put("flowfilev3", "application/octet-stream");
+ expectedMimeTypes.put("flowfilev1.tar", "application/octet-stream");
+ expectedMimeTypes.put("fake.csv", "text/plain");
+ expectedMimeTypes.put("2.custom", "text/plain");
+
+ final Map expectedExtensions = new HashMap<>();
+ expectedExtensions.put("1.7z", "");
+ expectedExtensions.put("1.mdb", "");
+ expectedExtensions.put("1.txt", "");
+ expectedExtensions.put("1.csv", "");
+ expectedExtensions.put("1.txt.bz2", "");
+ expectedExtensions.put("1.txt.gz", "");
+ expectedExtensions.put("1.zip", "");
+ expectedExtensions.put("bgBannerFoot.png", ".mypng");
+ expectedExtensions.put("blueBtnBg.jpg", ".myjpg");
+ expectedExtensions.put("1.pdf", "");
+ expectedExtensions.put("grid.gif", ".mygif");
+ expectedExtensions.put("1.tar", "");
+ expectedExtensions.put("1.tar.gz", "");
+ expectedExtensions.put("1.jar", "");
+ expectedExtensions.put("1.xml", "");
+ expectedExtensions.put("flowfilev3", "");
+ expectedExtensions.put("flowfilev1.tar", "");
+ expectedExtensions.put("fake.csv", "");
+ expectedExtensions.put("2.custom", "");
+
+ final List filesOut = runner.getFlowFilesForRelationship(IdentifyMimeType.REL_SUCCESS);
+ for (final MockFlowFile file : filesOut) {
+ final String filename = file.getAttribute(CoreAttributes.FILENAME.key());
+ final String mimeType = file.getAttribute(CoreAttributes.MIME_TYPE.key());
+ final String expected = expectedMimeTypes.get(filename);
+
+ final String extension = file.getAttribute("mime.extension");
+ final String expectedExtension = expectedExtensions.get(filename);
+
+ assertEquals("Expected " + file + " to have MIME Type " + expected + ", but it was " + mimeType, expected, mimeType);
+ assertEquals("Expected " + file + " to have extension " + expectedExtension + ", but it was " + extension, expectedExtension, extension);
+ }
+ }
+
+ @Test(expected=AssertionError.class)
+ public void testOnlyOneCustomMimeConfigSpecified() throws IOException {
+ final TestRunner runner = TestRunners.newTestRunner(new IdentifyMimeType());
+
+ String configFile = "src/test/resources/TestIdentifyMimeType/.customConfig.xml";
+ runner.setProperty(IdentifyMimeType.MIME_CONFIG_FILE, configFile);
+
+ String configBody = "foo";
+ runner.setProperty(IdentifyMimeType.MIME_CONFIG_BODY, configBody);
+
+ runner.setThreadCount(1);
+ runner.run();
+
+ }
+
}
diff --git a/nifi-nar-bundles/nifi-standard-bundle/nifi-standard-processors/src/test/resources/TestIdentifyMimeType/.customConfig.xml b/nifi-nar-bundles/nifi-standard-bundle/nifi-standard-processors/src/test/resources/TestIdentifyMimeType/.customConfig.xml
new file mode 100644
index 0000000000..fec5cb89b0
--- /dev/null
+++ b/nifi-nar-bundles/nifi-standard-bundle/nifi-standard-processors/src/test/resources/TestIdentifyMimeType/.customConfig.xml
@@ -0,0 +1,34 @@
+
+
+
+ GIF
+ <_comment>Graphics Interchange Format
+ http://en.wikipedia.org/wiki/Gif
+ com.compuserve.gif
+
+
+
+
+
+
+
+ PNG
+ <_comment>Portable Network Graphics
+
+
+
+
+
+
+ JPEG
+ <_comment>Joint Photographic Experts Group
+ http://en.wikipedia.org/wiki/Jpeg
+ public.jpeg
+
+
+
+
+
+
+
+
diff --git a/nifi-nar-bundles/nifi-standard-bundle/nifi-standard-processors/src/test/resources/TestIdentifyMimeType/2.custom b/nifi-nar-bundles/nifi-standard-bundle/nifi-standard-processors/src/test/resources/TestIdentifyMimeType/2.custom
new file mode 100644
index 0000000000..acbe86c7c8
--- /dev/null
+++ b/nifi-nar-bundles/nifi-standard-bundle/nifi-standard-processors/src/test/resources/TestIdentifyMimeType/2.custom
@@ -0,0 +1 @@
+abcd