diff --git a/nifi-nar-bundles/nifi-standard-bundle/nifi-standard-processors/src/main/java/org/apache/nifi/processors/standard/IdentifyMimeType.java b/nifi-nar-bundles/nifi-standard-bundle/nifi-standard-processors/src/main/java/org/apache/nifi/processors/standard/IdentifyMimeType.java index 8c2bdf9bde..c259e881d5 100644 --- a/nifi-nar-bundles/nifi-standard-bundle/nifi-standard-processors/src/main/java/org/apache/nifi/processors/standard/IdentifyMimeType.java +++ b/nifi-nar-bundles/nifi-standard-bundle/nifi-standard-processors/src/main/java/org/apache/nifi/processors/standard/IdentifyMimeType.java @@ -19,8 +19,10 @@ package org.apache.nifi.processors.standard; import java.io.BufferedInputStream; import java.io.IOException; import java.io.InputStream; +import java.util.ArrayList; import java.util.Collections; import java.util.HashSet; +import java.util.List; import java.util.Set; import java.util.concurrent.atomic.AtomicReference; @@ -32,6 +34,7 @@ import org.apache.nifi.annotation.behavior.SupportsBatching; import org.apache.nifi.annotation.behavior.WritesAttribute; import org.apache.nifi.annotation.documentation.CapabilityDescription; import org.apache.nifi.annotation.documentation.Tags; +import org.apache.nifi.components.PropertyDescriptor; import org.apache.nifi.flowfile.FlowFile; import org.apache.nifi.flowfile.attributes.CoreAttributes; import org.apache.nifi.logging.ComponentLog; @@ -78,12 +81,22 @@ import org.apache.tika.mime.MimeTypeException; + "If unable to detect the MIME Type, the attribute's value will be set to application/octet-stream") public class IdentifyMimeType extends AbstractProcessor { + public static final PropertyDescriptor USE_FILENAME_IN_DETECTION = new PropertyDescriptor.Builder() + .displayName("Use Filename In Detection") + .name("use-filename-in-detection") + .description("If true will pass the filename to Tika to aid in detection.") + .required(true) + .allowableValues("true", "false") + .defaultValue("true") + .build(); + public static final Relationship REL_SUCCESS = new Relationship.Builder() .name("success") .description("All FlowFiles are routed to success") .build(); private Set relationships; + private List properties; private final TikaConfig config; private final Detector detector; @@ -96,6 +109,11 @@ public class IdentifyMimeType extends AbstractProcessor { @Override protected void init(final ProcessorInitializationContext context) { + + final List properties = new ArrayList<>(); + properties.add(USE_FILENAME_IN_DETECTION); + this.properties = Collections.unmodifiableList(properties); + final Set rels = new HashSet<>(); rels.add(REL_SUCCESS); this.relationships = Collections.unmodifiableSet(rels); @@ -106,6 +124,11 @@ public class IdentifyMimeType extends AbstractProcessor { return relationships; } + @Override + protected List getSupportedPropertyDescriptors() { + return properties; + } + @Override public void onTrigger(final ProcessContext context, final ProcessSession session) { FlowFile flowFile = session.get(); @@ -123,8 +146,8 @@ public class IdentifyMimeType extends AbstractProcessor { try (final InputStream in = new BufferedInputStream(stream)) { TikaInputStream tikaStream = TikaInputStream.get(in); Metadata metadata = new Metadata(); - // Add filename if it exists - if (filename != null) { + + if (filename != null && context.getProperty(USE_FILENAME_IN_DETECTION).asBoolean()) { metadata.add(TikaMetadataKeys.RESOURCE_NAME_KEY, filename); } // Get mime type diff --git a/nifi-nar-bundles/nifi-standard-bundle/nifi-standard-processors/src/test/java/org/apache/nifi/processors/standard/TestIdentifyMimeType.java b/nifi-nar-bundles/nifi-standard-bundle/nifi-standard-processors/src/test/java/org/apache/nifi/processors/standard/TestIdentifyMimeType.java index 3465b891ac..dc611135b9 100644 --- a/nifi-nar-bundles/nifi-standard-bundle/nifi-standard-processors/src/test/java/org/apache/nifi/processors/standard/TestIdentifyMimeType.java +++ b/nifi-nar-bundles/nifi-standard-bundle/nifi-standard-processors/src/test/java/org/apache/nifi/processors/standard/TestIdentifyMimeType.java @@ -20,6 +20,7 @@ import static org.junit.Assert.assertEquals; import java.io.File; import java.io.IOException; +import java.nio.file.Paths; import java.util.HashMap; import java.util.List; import java.util.Map; @@ -72,6 +73,7 @@ public class TestIdentifyMimeType { expectedMimeTypes.put("1.xml", "application/xml"); expectedMimeTypes.put("flowfilev3", "application/flowfile-v3"); expectedMimeTypes.put("flowfilev1.tar", "application/flowfile-v1"); + expectedMimeTypes.put("fake.csv", "text/csv"); final Map expectedExtensions = new HashMap<>(); expectedExtensions.put("1.7z", ".7z"); @@ -91,6 +93,7 @@ public class TestIdentifyMimeType { expectedExtensions.put("1.xml", ".xml"); expectedExtensions.put("flowfilev3", ""); expectedExtensions.put("flowfilev1.tar", ""); + expectedExtensions.put("fake.csv", ".csv"); final List filesOut = runner.getFlowFilesForRelationship(IdentifyMimeType.REL_SUCCESS); for (final MockFlowFile file : filesOut) { @@ -105,4 +108,18 @@ public class TestIdentifyMimeType { assertEquals("Expected " + file + " to have extension " + expectedExtension + ", but it was " + extension, expectedExtension, extension); } } + + @Test + public void testIgnoreFileName() throws Exception { + final TestRunner runner = TestRunners.newTestRunner(new IdentifyMimeType()); + runner.setProperty(IdentifyMimeType.USE_FILENAME_IN_DETECTION, "false"); + + runner.enqueue(Paths.get("src/test/resources/TestIdentifyMimeType/fake.csv")); + runner.run(); + + runner.assertAllFlowFilesTransferred(IdentifyMimeType.REL_SUCCESS, 1); + MockFlowFile flowFile = runner.getFlowFilesForRelationship(IdentifyMimeType.REL_SUCCESS).get(0); + flowFile.assertAttributeEquals("mime.extension", ".txt"); + flowFile.assertAttributeEquals("mime.type", "text/plain"); + } } diff --git a/nifi-nar-bundles/nifi-standard-bundle/nifi-standard-processors/src/test/resources/TestIdentifyMimeType/fake.csv b/nifi-nar-bundles/nifi-standard-bundle/nifi-standard-processors/src/test/resources/TestIdentifyMimeType/fake.csv new file mode 100644 index 0000000000..f8ba006b2e --- /dev/null +++ b/nifi-nar-bundles/nifi-standard-bundle/nifi-standard-processors/src/test/resources/TestIdentifyMimeType/fake.csv @@ -0,0 +1,20 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +this is not a valid CSV file but +is intended to verify that the updated +IdentifyMIMEType works as expected.