NIFI-13503 Removed ExtractTNEFAttachments from nifi-email-processors

This closes #9042

Signed-off-by: David Handermann <exceptionfactory@apache.org>
This commit is contained in:
Joseph Witt 2024-07-04 15:26:07 -07:00 committed by exceptionfactory
parent db6bac21bf
commit ad7375a3be
No known key found for this signature in database
7 changed files with 0 additions and 280 deletions

View File

@ -26,11 +26,9 @@
<packaging>jar</packaging>
<properties>
<spring.integration.version>6.3.1</spring.integration.version>
<poi.version>5.3.0</poi.version>
</properties>
<dependencies>
<dependency>
<groupId>org.apache.nifi</groupId>
<artifactId>nifi-utils</artifactId>
@ -103,26 +101,6 @@
</exclusion>
</exclusions>
</dependency>
<!-- poi-scratchpad required for TNEF parsing -->
<dependency>
<groupId>org.apache.poi</groupId>
<artifactId>poi-scratchpad</artifactId>
<version>${poi.version}</version>
<exclusions>
<exclusion>
<groupId>org.apache.commons</groupId>
<artifactId>commons-math3</artifactId>
</exclusion>
<exclusion>
<groupId>org.apache.commons</groupId>
<artifactId>commons-collections4</artifactId>
</exclusion>
<exclusion>
<groupId>com.zaxxer</groupId>
<artifactId>SparseBitSet</artifactId>
</exclusion>
</exclusions>
</dependency>
<dependency>
<groupId>org.apache.nifi</groupId>
<artifactId>nifi-security-utils</artifactId>

View File

@ -1,155 +0,0 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.nifi.processors.email;
import java.io.BufferedInputStream;
import java.io.InputStream;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Set;
import org.apache.nifi.annotation.behavior.InputRequirement;
import org.apache.nifi.annotation.behavior.InputRequirement.Requirement;
import org.apache.nifi.annotation.behavior.SideEffectFree;
import org.apache.nifi.annotation.behavior.SupportsBatching;
import org.apache.nifi.annotation.behavior.WritesAttribute;
import org.apache.nifi.annotation.behavior.WritesAttributes;
import org.apache.nifi.annotation.documentation.CapabilityDescription;
import org.apache.nifi.annotation.documentation.Tags;
import org.apache.nifi.flowfile.FlowFile;
import org.apache.nifi.flowfile.attributes.CoreAttributes;
import org.apache.nifi.logging.ComponentLog;
import org.apache.nifi.processor.AbstractProcessor;
import org.apache.nifi.processor.ProcessContext;
import org.apache.nifi.processor.ProcessSession;
import org.apache.nifi.processor.Relationship;
import org.apache.nifi.processor.exception.FlowFileHandlingException;
import org.apache.poi.hmef.Attachment;
import org.apache.poi.hmef.HMEFMessage;
@SupportsBatching
@SideEffectFree
@Tags({"split", "email"})
@InputRequirement(Requirement.INPUT_REQUIRED)
@CapabilityDescription("Extract attachments from a mime formatted email file, splitting them into individual flowfiles.")
@WritesAttributes({
@WritesAttribute(attribute = "filename ", description = "The filename of the attachment"),
@WritesAttribute(attribute = "email.tnef.attachment.parent.filename ", description = "The filename of the parent FlowFile"),
@WritesAttribute(attribute = "email.tnef.attachment.parent.uuid", description = "The UUID of the original FlowFile.")})
public class ExtractTNEFAttachments extends AbstractProcessor {
public static final String ATTACHMENT_ORIGINAL_FILENAME = "email.tnef.attachment.parent.filename";
public static final String ATTACHMENT_ORIGINAL_UUID = "email.tnef.attachment.parent.uuid";
public static final Relationship REL_ATTACHMENTS = new Relationship.Builder()
.name("attachments")
.description("Each individual attachment will be routed to the attachments relationship")
.build();
public static final Relationship REL_ORIGINAL = new Relationship.Builder()
.name("original")
.description("Each original flowfile (i.e. before extraction) will be routed to the original relationship")
.build();
public static final Relationship REL_FAILURE = new Relationship.Builder()
.name("failure")
.description("Each individual flowfile that could not be parsed will be routed to the failure relationship")
.build();
private final static Set<Relationship> RELATIONSHIPS = Set.of(REL_ATTACHMENTS, REL_ORIGINAL, REL_FAILURE);
@Override
public void onTrigger(final ProcessContext context, final ProcessSession session) {
final ComponentLog logger = getLogger();
final FlowFile originalFlowFile = session.get();
if (originalFlowFile == null) {
return;
}
final List<FlowFile> attachmentsList = new ArrayList<>();
final List<FlowFile> invalidFlowFilesList = new ArrayList<>();
final List<FlowFile> originalFlowFilesList = new ArrayList<>();
session.read(originalFlowFile, rawIn -> {
try (final InputStream in = new BufferedInputStream(rawIn)) {
// This will trigger an exception in case content is not a TNEF.
final HMEFMessage hmefMessage = new HMEFMessage(in);
// Add original FlowFile (may revert later on in case of errors) //
originalFlowFilesList.add(originalFlowFile);
if (!hmefMessage.getAttachments().isEmpty()) {
final String originalFlowFileName = originalFlowFile.getAttribute(CoreAttributes.FILENAME.key());
try {
for (final Attachment attachment : hmefMessage.getAttachments()) {
FlowFile split = session.create(originalFlowFile);
final Map<String, String> attributes = new HashMap<>();
final String attachmentFilename = attachment.getFilename();
if (attachmentFilename != null && !attachmentFilename.isBlank()) {
attributes.put(CoreAttributes.FILENAME.key(), attachmentFilename);
}
String parentUuid = originalFlowFile.getAttribute(CoreAttributes.UUID.key());
attributes.put(ATTACHMENT_ORIGINAL_UUID, parentUuid);
attributes.put(ATTACHMENT_ORIGINAL_FILENAME, originalFlowFileName);
split = session.append(split, out -> out.write(attachment.getContents()));
split = session.putAllAttributes(split, attributes);
attachmentsList.add(split);
}
} catch (FlowFileHandlingException e) {
// Something went wrong
// Removing splits that may have been created
session.remove(attachmentsList);
// Removing the original flow from its list
originalFlowFilesList.remove(originalFlowFile);
logger.error("Flowfile {} triggered error {} while processing message removing generated FlowFiles from sessions", originalFlowFile, e);
invalidFlowFilesList.add(originalFlowFile);
}
}
} catch (Exception e) {
// Another error hit...
// Removing the original flow from its list
originalFlowFilesList.remove(originalFlowFile);
logger.error("Could not parse {} as an email, treating as failure", originalFlowFile, e);
// Message is invalid or triggered an error during parsing
invalidFlowFilesList.add(originalFlowFile);
}
});
session.transfer(attachmentsList, REL_ATTACHMENTS);
// As per above code, originalFlowfile may be routed to invalid or
// original depending on RFC2822 compliance.
session.transfer(invalidFlowFilesList, REL_FAILURE);
session.transfer(originalFlowFilesList, REL_ORIGINAL);
// check if attachments have been extracted
if (!attachmentsList.isEmpty()) {
if (attachmentsList.size() > 10) {
// If more than 10, summarise log
logger.info("Split {} into {} files", originalFlowFile, attachmentsList.size());
} else {
// Otherwise be more verbose and list each individual split
logger.info("Split {} into {} files: {}", originalFlowFile, attachmentsList.size(), attachmentsList);
}
}
}
@Override
public Set<Relationship> getRelationships() {
return RELATIONSHIPS;
}
}

View File

@ -12,7 +12,6 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
org.apache.nifi.processors.email.ExtractTNEFAttachments
org.apache.nifi.processors.email.ExtractEmailAttachments
org.apache.nifi.processors.email.ExtractEmailHeaders
org.apache.nifi.processors.email.ListenSMTP

View File

@ -1,102 +0,0 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.nifi.processors.email;
import org.apache.nifi.util.MockFlowFile;
import org.apache.nifi.util.TestRunner;
import org.apache.nifi.util.TestRunners;
import org.junit.jupiter.api.Test;
import java.nio.file.Paths;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
import static org.junit.jupiter.api.Assertions.assertEquals;
import static org.junit.jupiter.api.Assertions.assertTrue;
public class TestExtractTNEFAttachments {
@Test
public void testValidTNEFWithoutAttachment() throws Exception {
final TestRunner runner = TestRunners.newTestRunner(new ExtractTNEFAttachments());
runner.enqueue(Paths.get("src/test/resources/winmail-simple.dat"));
runner.run();
runner.assertTransferCount(ExtractEmailAttachments.REL_ORIGINAL, 1);
runner.assertTransferCount(ExtractEmailAttachments.REL_FAILURE, 0);
runner.assertTransferCount(ExtractEmailAttachments.REL_ATTACHMENTS, 0);
// Have a look at the attachments...
final List<MockFlowFile> splits = runner.getFlowFilesForRelationship(ExtractEmailAttachments.REL_ATTACHMENTS);
assertEquals(0, splits.size());
}
@Test
public void testValidTNEFWithMultipleAttachments() throws Exception {
final TestRunner runner = TestRunners.newTestRunner(new ExtractTNEFAttachments());
runner.enqueue(Paths.get("src/test/resources/winmail-with-attachments.dat"));
runner.run();
runner.assertTransferCount(ExtractEmailAttachments.REL_ORIGINAL, 1);
runner.assertTransferCount(ExtractEmailAttachments.REL_FAILURE, 0);
runner.assertTransferCount(ExtractEmailAttachments.REL_ATTACHMENTS, 2);
// Have a look at the attachments...
final List<MockFlowFile> splits = runner.getFlowFilesForRelationship(ExtractTNEFAttachments.REL_ATTACHMENTS);
List<String> filenames = new ArrayList<>();
for (final MockFlowFile flowFile : splits) {
filenames.add(flowFile.getAttribute("filename"));
}
assertTrue(filenames.containsAll(Arrays.asList("nifiDrop.svg", "MINIFI~1.PNG")));
}
@Test
public void testValidTNEFWithAttachment() throws Exception {
final TestRunner runner = TestRunners.newTestRunner(new ExtractTNEFAttachments());
runner.enqueue(Paths.get("src/test/resources/winmail-with-attachment.dat"));
runner.run();
runner.assertTransferCount(ExtractEmailAttachments.REL_ORIGINAL, 1);
runner.assertTransferCount(ExtractEmailAttachments.REL_FAILURE, 0);
runner.assertTransferCount(ExtractEmailAttachments.REL_ATTACHMENTS, 1);
// Have a look at the attachments...
final List<MockFlowFile> splits = runner.getFlowFilesForRelationship(ExtractTNEFAttachments.REL_ATTACHMENTS);
List<String> filenames = new ArrayList<>();
for (final MockFlowFile flowFile : splits) {
filenames.add(flowFile.getAttribute("filename"));
}
assertTrue(filenames.contains("nifiDrop.svg"));
}
@Test
public void testInvalidTNEF() {
final TestRunner runner = TestRunners.newTestRunner(new ExtractTNEFAttachments());
runner.enqueue("test test test chocolate".getBytes());
runner.run();
runner.assertTransferCount(ExtractEmailAttachments.REL_ORIGINAL, 0);
runner.assertTransferCount(ExtractEmailAttachments.REL_FAILURE, 1);
runner.assertTransferCount(ExtractEmailAttachments.REL_ATTACHMENTS, 0);
}
}