diff --git a/nifi-extension-bundles/nifi-email-bundle/nifi-email-processors/pom.xml b/nifi-extension-bundles/nifi-email-bundle/nifi-email-processors/pom.xml index f44d1f15cc..a8257aeb8d 100644 --- a/nifi-extension-bundles/nifi-email-bundle/nifi-email-processors/pom.xml +++ b/nifi-extension-bundles/nifi-email-bundle/nifi-email-processors/pom.xml @@ -26,11 +26,9 @@ jar 6.3.1 - 5.3.0 - org.apache.nifi nifi-utils @@ -103,26 +101,6 @@ - - - org.apache.poi - poi-scratchpad - ${poi.version} - - - org.apache.commons - commons-math3 - - - org.apache.commons - commons-collections4 - - - com.zaxxer - SparseBitSet - - - org.apache.nifi nifi-security-utils diff --git a/nifi-extension-bundles/nifi-email-bundle/nifi-email-processors/src/main/java/org/apache/nifi/processors/email/ExtractTNEFAttachments.java b/nifi-extension-bundles/nifi-email-bundle/nifi-email-processors/src/main/java/org/apache/nifi/processors/email/ExtractTNEFAttachments.java deleted file mode 100644 index 3a24acf33d..0000000000 --- a/nifi-extension-bundles/nifi-email-bundle/nifi-email-processors/src/main/java/org/apache/nifi/processors/email/ExtractTNEFAttachments.java +++ /dev/null @@ -1,155 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.nifi.processors.email; - -import java.io.BufferedInputStream; -import java.io.InputStream; -import java.util.ArrayList; -import java.util.HashMap; -import java.util.List; -import java.util.Map; -import java.util.Set; -import org.apache.nifi.annotation.behavior.InputRequirement; -import org.apache.nifi.annotation.behavior.InputRequirement.Requirement; -import org.apache.nifi.annotation.behavior.SideEffectFree; -import org.apache.nifi.annotation.behavior.SupportsBatching; -import org.apache.nifi.annotation.behavior.WritesAttribute; -import org.apache.nifi.annotation.behavior.WritesAttributes; -import org.apache.nifi.annotation.documentation.CapabilityDescription; -import org.apache.nifi.annotation.documentation.Tags; -import org.apache.nifi.flowfile.FlowFile; -import org.apache.nifi.flowfile.attributes.CoreAttributes; -import org.apache.nifi.logging.ComponentLog; -import org.apache.nifi.processor.AbstractProcessor; -import org.apache.nifi.processor.ProcessContext; -import org.apache.nifi.processor.ProcessSession; -import org.apache.nifi.processor.Relationship; -import org.apache.nifi.processor.exception.FlowFileHandlingException; -import org.apache.poi.hmef.Attachment; -import org.apache.poi.hmef.HMEFMessage; - -@SupportsBatching -@SideEffectFree -@Tags({"split", "email"}) -@InputRequirement(Requirement.INPUT_REQUIRED) -@CapabilityDescription("Extract attachments from a mime formatted email file, splitting them into individual flowfiles.") -@WritesAttributes({ - @WritesAttribute(attribute = "filename ", description = "The filename of the attachment"), - @WritesAttribute(attribute = "email.tnef.attachment.parent.filename ", description = "The filename of the parent FlowFile"), - @WritesAttribute(attribute = "email.tnef.attachment.parent.uuid", description = "The UUID of the original FlowFile.")}) -public class ExtractTNEFAttachments extends AbstractProcessor { - public static final String ATTACHMENT_ORIGINAL_FILENAME = "email.tnef.attachment.parent.filename"; - public static final String ATTACHMENT_ORIGINAL_UUID = "email.tnef.attachment.parent.uuid"; - - public static final Relationship REL_ATTACHMENTS = new Relationship.Builder() - .name("attachments") - .description("Each individual attachment will be routed to the attachments relationship") - .build(); - public static final Relationship REL_ORIGINAL = new Relationship.Builder() - .name("original") - .description("Each original flowfile (i.e. before extraction) will be routed to the original relationship") - .build(); - public static final Relationship REL_FAILURE = new Relationship.Builder() - .name("failure") - .description("Each individual flowfile that could not be parsed will be routed to the failure relationship") - .build(); - - private final static Set RELATIONSHIPS = Set.of(REL_ATTACHMENTS, REL_ORIGINAL, REL_FAILURE); - - @Override - public void onTrigger(final ProcessContext context, final ProcessSession session) { - final ComponentLog logger = getLogger(); - final FlowFile originalFlowFile = session.get(); - if (originalFlowFile == null) { - return; - } - final List attachmentsList = new ArrayList<>(); - final List invalidFlowFilesList = new ArrayList<>(); - final List originalFlowFilesList = new ArrayList<>(); - - session.read(originalFlowFile, rawIn -> { - try (final InputStream in = new BufferedInputStream(rawIn)) { - // This will trigger an exception in case content is not a TNEF. - final HMEFMessage hmefMessage = new HMEFMessage(in); - - // Add original FlowFile (may revert later on in case of errors) // - originalFlowFilesList.add(originalFlowFile); - - if (!hmefMessage.getAttachments().isEmpty()) { - final String originalFlowFileName = originalFlowFile.getAttribute(CoreAttributes.FILENAME.key()); - try { - for (final Attachment attachment : hmefMessage.getAttachments()) { - FlowFile split = session.create(originalFlowFile); - final Map attributes = new HashMap<>(); - final String attachmentFilename = attachment.getFilename(); - if (attachmentFilename != null && !attachmentFilename.isBlank()) { - attributes.put(CoreAttributes.FILENAME.key(), attachmentFilename); - } - - String parentUuid = originalFlowFile.getAttribute(CoreAttributes.UUID.key()); - attributes.put(ATTACHMENT_ORIGINAL_UUID, parentUuid); - attributes.put(ATTACHMENT_ORIGINAL_FILENAME, originalFlowFileName); - - split = session.append(split, out -> out.write(attachment.getContents())); - split = session.putAllAttributes(split, attributes); - attachmentsList.add(split); - } - } catch (FlowFileHandlingException e) { - // Something went wrong - // Removing splits that may have been created - session.remove(attachmentsList); - // Removing the original flow from its list - originalFlowFilesList.remove(originalFlowFile); - logger.error("Flowfile {} triggered error {} while processing message removing generated FlowFiles from sessions", originalFlowFile, e); - invalidFlowFilesList.add(originalFlowFile); - } - } - } catch (Exception e) { - // Another error hit... - // Removing the original flow from its list - originalFlowFilesList.remove(originalFlowFile); - logger.error("Could not parse {} as an email, treating as failure", originalFlowFile, e); - // Message is invalid or triggered an error during parsing - invalidFlowFilesList.add(originalFlowFile); - } - }); - - session.transfer(attachmentsList, REL_ATTACHMENTS); - - // As per above code, originalFlowfile may be routed to invalid or - // original depending on RFC2822 compliance. - session.transfer(invalidFlowFilesList, REL_FAILURE); - session.transfer(originalFlowFilesList, REL_ORIGINAL); - - // check if attachments have been extracted - if (!attachmentsList.isEmpty()) { - if (attachmentsList.size() > 10) { - // If more than 10, summarise log - logger.info("Split {} into {} files", originalFlowFile, attachmentsList.size()); - } else { - // Otherwise be more verbose and list each individual split - logger.info("Split {} into {} files: {}", originalFlowFile, attachmentsList.size(), attachmentsList); - } - } - } - - @Override - public Set getRelationships() { - return RELATIONSHIPS; - } -} - diff --git a/nifi-extension-bundles/nifi-email-bundle/nifi-email-processors/src/main/resources/META-INF/services/org.apache.nifi.processor.Processor b/nifi-extension-bundles/nifi-email-bundle/nifi-email-processors/src/main/resources/META-INF/services/org.apache.nifi.processor.Processor index 42187852c2..c6bf6d8ab6 100644 --- a/nifi-extension-bundles/nifi-email-bundle/nifi-email-processors/src/main/resources/META-INF/services/org.apache.nifi.processor.Processor +++ b/nifi-extension-bundles/nifi-email-bundle/nifi-email-processors/src/main/resources/META-INF/services/org.apache.nifi.processor.Processor @@ -12,7 +12,6 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -org.apache.nifi.processors.email.ExtractTNEFAttachments org.apache.nifi.processors.email.ExtractEmailAttachments org.apache.nifi.processors.email.ExtractEmailHeaders org.apache.nifi.processors.email.ListenSMTP diff --git a/nifi-extension-bundles/nifi-email-bundle/nifi-email-processors/src/test/java/org/apache/nifi/processors/email/TestExtractTNEFAttachments.java b/nifi-extension-bundles/nifi-email-bundle/nifi-email-processors/src/test/java/org/apache/nifi/processors/email/TestExtractTNEFAttachments.java deleted file mode 100644 index 5683fd3941..0000000000 --- a/nifi-extension-bundles/nifi-email-bundle/nifi-email-processors/src/test/java/org/apache/nifi/processors/email/TestExtractTNEFAttachments.java +++ /dev/null @@ -1,102 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.nifi.processors.email; - -import org.apache.nifi.util.MockFlowFile; -import org.apache.nifi.util.TestRunner; -import org.apache.nifi.util.TestRunners; -import org.junit.jupiter.api.Test; - -import java.nio.file.Paths; -import java.util.ArrayList; -import java.util.Arrays; -import java.util.List; - -import static org.junit.jupiter.api.Assertions.assertEquals; -import static org.junit.jupiter.api.Assertions.assertTrue; - -public class TestExtractTNEFAttachments { - - @Test - public void testValidTNEFWithoutAttachment() throws Exception { - final TestRunner runner = TestRunners.newTestRunner(new ExtractTNEFAttachments()); - - runner.enqueue(Paths.get("src/test/resources/winmail-simple.dat")); - runner.run(); - - runner.assertTransferCount(ExtractEmailAttachments.REL_ORIGINAL, 1); - runner.assertTransferCount(ExtractEmailAttachments.REL_FAILURE, 0); - runner.assertTransferCount(ExtractEmailAttachments.REL_ATTACHMENTS, 0); - // Have a look at the attachments... - final List splits = runner.getFlowFilesForRelationship(ExtractEmailAttachments.REL_ATTACHMENTS); - assertEquals(0, splits.size()); - } - - @Test - public void testValidTNEFWithMultipleAttachments() throws Exception { - final TestRunner runner = TestRunners.newTestRunner(new ExtractTNEFAttachments()); - - runner.enqueue(Paths.get("src/test/resources/winmail-with-attachments.dat")); - runner.run(); - - runner.assertTransferCount(ExtractEmailAttachments.REL_ORIGINAL, 1); - runner.assertTransferCount(ExtractEmailAttachments.REL_FAILURE, 0); - runner.assertTransferCount(ExtractEmailAttachments.REL_ATTACHMENTS, 2); - // Have a look at the attachments... - final List splits = runner.getFlowFilesForRelationship(ExtractTNEFAttachments.REL_ATTACHMENTS); - - List filenames = new ArrayList<>(); - for (final MockFlowFile flowFile : splits) { - filenames.add(flowFile.getAttribute("filename")); - } - - assertTrue(filenames.containsAll(Arrays.asList("nifiDrop.svg", "MINIFI~1.PNG"))); - } - - @Test - public void testValidTNEFWithAttachment() throws Exception { - final TestRunner runner = TestRunners.newTestRunner(new ExtractTNEFAttachments()); - - runner.enqueue(Paths.get("src/test/resources/winmail-with-attachment.dat")); - runner.run(); - - runner.assertTransferCount(ExtractEmailAttachments.REL_ORIGINAL, 1); - runner.assertTransferCount(ExtractEmailAttachments.REL_FAILURE, 0); - runner.assertTransferCount(ExtractEmailAttachments.REL_ATTACHMENTS, 1); - // Have a look at the attachments... - final List splits = runner.getFlowFilesForRelationship(ExtractTNEFAttachments.REL_ATTACHMENTS); - - List filenames = new ArrayList<>(); - for (final MockFlowFile flowFile : splits) { - filenames.add(flowFile.getAttribute("filename")); - } - - assertTrue(filenames.contains("nifiDrop.svg")); - } - - @Test - public void testInvalidTNEF() { - final TestRunner runner = TestRunners.newTestRunner(new ExtractTNEFAttachments()); - runner.enqueue("test test test chocolate".getBytes()); - runner.run(); - - runner.assertTransferCount(ExtractEmailAttachments.REL_ORIGINAL, 0); - runner.assertTransferCount(ExtractEmailAttachments.REL_FAILURE, 1); - runner.assertTransferCount(ExtractEmailAttachments.REL_ATTACHMENTS, 0); - } -} \ No newline at end of file diff --git a/nifi-extension-bundles/nifi-email-bundle/nifi-email-processors/src/test/resources/winmail-simple.dat b/nifi-extension-bundles/nifi-email-bundle/nifi-email-processors/src/test/resources/winmail-simple.dat deleted file mode 100644 index 7f61beeaa7..0000000000 Binary files a/nifi-extension-bundles/nifi-email-bundle/nifi-email-processors/src/test/resources/winmail-simple.dat and /dev/null differ diff --git a/nifi-extension-bundles/nifi-email-bundle/nifi-email-processors/src/test/resources/winmail-with-attachment.dat b/nifi-extension-bundles/nifi-email-bundle/nifi-email-processors/src/test/resources/winmail-with-attachment.dat deleted file mode 100644 index 42cbec334b..0000000000 Binary files a/nifi-extension-bundles/nifi-email-bundle/nifi-email-processors/src/test/resources/winmail-with-attachment.dat and /dev/null differ diff --git a/nifi-extension-bundles/nifi-email-bundle/nifi-email-processors/src/test/resources/winmail-with-attachments.dat b/nifi-extension-bundles/nifi-email-bundle/nifi-email-processors/src/test/resources/winmail-with-attachments.dat deleted file mode 100644 index ec547211fe..0000000000 Binary files a/nifi-extension-bundles/nifi-email-bundle/nifi-email-processors/src/test/resources/winmail-with-attachments.dat and /dev/null differ