mirror of https://github.com/apache/nifi.git
NIFI-13503 Removed ExtractTNEFAttachments from nifi-email-processors
This closes #9042 Signed-off-by: David Handermann <exceptionfactory@apache.org>
This commit is contained in:
parent
db6bac21bf
commit
ad7375a3be
|
@ -26,11 +26,9 @@
|
|||
<packaging>jar</packaging>
|
||||
<properties>
|
||||
<spring.integration.version>6.3.1</spring.integration.version>
|
||||
<poi.version>5.3.0</poi.version>
|
||||
</properties>
|
||||
|
||||
<dependencies>
|
||||
|
||||
<dependency>
|
||||
<groupId>org.apache.nifi</groupId>
|
||||
<artifactId>nifi-utils</artifactId>
|
||||
|
@ -103,26 +101,6 @@
|
|||
</exclusion>
|
||||
</exclusions>
|
||||
</dependency>
|
||||
<!-- poi-scratchpad required for TNEF parsing -->
|
||||
<dependency>
|
||||
<groupId>org.apache.poi</groupId>
|
||||
<artifactId>poi-scratchpad</artifactId>
|
||||
<version>${poi.version}</version>
|
||||
<exclusions>
|
||||
<exclusion>
|
||||
<groupId>org.apache.commons</groupId>
|
||||
<artifactId>commons-math3</artifactId>
|
||||
</exclusion>
|
||||
<exclusion>
|
||||
<groupId>org.apache.commons</groupId>
|
||||
<artifactId>commons-collections4</artifactId>
|
||||
</exclusion>
|
||||
<exclusion>
|
||||
<groupId>com.zaxxer</groupId>
|
||||
<artifactId>SparseBitSet</artifactId>
|
||||
</exclusion>
|
||||
</exclusions>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.apache.nifi</groupId>
|
||||
<artifactId>nifi-security-utils</artifactId>
|
||||
|
|
|
@ -1,155 +0,0 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.nifi.processors.email;
|
||||
|
||||
import java.io.BufferedInputStream;
|
||||
import java.io.InputStream;
|
||||
import java.util.ArrayList;
|
||||
import java.util.HashMap;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.Set;
|
||||
import org.apache.nifi.annotation.behavior.InputRequirement;
|
||||
import org.apache.nifi.annotation.behavior.InputRequirement.Requirement;
|
||||
import org.apache.nifi.annotation.behavior.SideEffectFree;
|
||||
import org.apache.nifi.annotation.behavior.SupportsBatching;
|
||||
import org.apache.nifi.annotation.behavior.WritesAttribute;
|
||||
import org.apache.nifi.annotation.behavior.WritesAttributes;
|
||||
import org.apache.nifi.annotation.documentation.CapabilityDescription;
|
||||
import org.apache.nifi.annotation.documentation.Tags;
|
||||
import org.apache.nifi.flowfile.FlowFile;
|
||||
import org.apache.nifi.flowfile.attributes.CoreAttributes;
|
||||
import org.apache.nifi.logging.ComponentLog;
|
||||
import org.apache.nifi.processor.AbstractProcessor;
|
||||
import org.apache.nifi.processor.ProcessContext;
|
||||
import org.apache.nifi.processor.ProcessSession;
|
||||
import org.apache.nifi.processor.Relationship;
|
||||
import org.apache.nifi.processor.exception.FlowFileHandlingException;
|
||||
import org.apache.poi.hmef.Attachment;
|
||||
import org.apache.poi.hmef.HMEFMessage;
|
||||
|
||||
@SupportsBatching
|
||||
@SideEffectFree
|
||||
@Tags({"split", "email"})
|
||||
@InputRequirement(Requirement.INPUT_REQUIRED)
|
||||
@CapabilityDescription("Extract attachments from a mime formatted email file, splitting them into individual flowfiles.")
|
||||
@WritesAttributes({
|
||||
@WritesAttribute(attribute = "filename ", description = "The filename of the attachment"),
|
||||
@WritesAttribute(attribute = "email.tnef.attachment.parent.filename ", description = "The filename of the parent FlowFile"),
|
||||
@WritesAttribute(attribute = "email.tnef.attachment.parent.uuid", description = "The UUID of the original FlowFile.")})
|
||||
public class ExtractTNEFAttachments extends AbstractProcessor {
|
||||
public static final String ATTACHMENT_ORIGINAL_FILENAME = "email.tnef.attachment.parent.filename";
|
||||
public static final String ATTACHMENT_ORIGINAL_UUID = "email.tnef.attachment.parent.uuid";
|
||||
|
||||
public static final Relationship REL_ATTACHMENTS = new Relationship.Builder()
|
||||
.name("attachments")
|
||||
.description("Each individual attachment will be routed to the attachments relationship")
|
||||
.build();
|
||||
public static final Relationship REL_ORIGINAL = new Relationship.Builder()
|
||||
.name("original")
|
||||
.description("Each original flowfile (i.e. before extraction) will be routed to the original relationship")
|
||||
.build();
|
||||
public static final Relationship REL_FAILURE = new Relationship.Builder()
|
||||
.name("failure")
|
||||
.description("Each individual flowfile that could not be parsed will be routed to the failure relationship")
|
||||
.build();
|
||||
|
||||
private final static Set<Relationship> RELATIONSHIPS = Set.of(REL_ATTACHMENTS, REL_ORIGINAL, REL_FAILURE);
|
||||
|
||||
@Override
|
||||
public void onTrigger(final ProcessContext context, final ProcessSession session) {
|
||||
final ComponentLog logger = getLogger();
|
||||
final FlowFile originalFlowFile = session.get();
|
||||
if (originalFlowFile == null) {
|
||||
return;
|
||||
}
|
||||
final List<FlowFile> attachmentsList = new ArrayList<>();
|
||||
final List<FlowFile> invalidFlowFilesList = new ArrayList<>();
|
||||
final List<FlowFile> originalFlowFilesList = new ArrayList<>();
|
||||
|
||||
session.read(originalFlowFile, rawIn -> {
|
||||
try (final InputStream in = new BufferedInputStream(rawIn)) {
|
||||
// This will trigger an exception in case content is not a TNEF.
|
||||
final HMEFMessage hmefMessage = new HMEFMessage(in);
|
||||
|
||||
// Add original FlowFile (may revert later on in case of errors) //
|
||||
originalFlowFilesList.add(originalFlowFile);
|
||||
|
||||
if (!hmefMessage.getAttachments().isEmpty()) {
|
||||
final String originalFlowFileName = originalFlowFile.getAttribute(CoreAttributes.FILENAME.key());
|
||||
try {
|
||||
for (final Attachment attachment : hmefMessage.getAttachments()) {
|
||||
FlowFile split = session.create(originalFlowFile);
|
||||
final Map<String, String> attributes = new HashMap<>();
|
||||
final String attachmentFilename = attachment.getFilename();
|
||||
if (attachmentFilename != null && !attachmentFilename.isBlank()) {
|
||||
attributes.put(CoreAttributes.FILENAME.key(), attachmentFilename);
|
||||
}
|
||||
|
||||
String parentUuid = originalFlowFile.getAttribute(CoreAttributes.UUID.key());
|
||||
attributes.put(ATTACHMENT_ORIGINAL_UUID, parentUuid);
|
||||
attributes.put(ATTACHMENT_ORIGINAL_FILENAME, originalFlowFileName);
|
||||
|
||||
split = session.append(split, out -> out.write(attachment.getContents()));
|
||||
split = session.putAllAttributes(split, attributes);
|
||||
attachmentsList.add(split);
|
||||
}
|
||||
} catch (FlowFileHandlingException e) {
|
||||
// Something went wrong
|
||||
// Removing splits that may have been created
|
||||
session.remove(attachmentsList);
|
||||
// Removing the original flow from its list
|
||||
originalFlowFilesList.remove(originalFlowFile);
|
||||
logger.error("Flowfile {} triggered error {} while processing message removing generated FlowFiles from sessions", originalFlowFile, e);
|
||||
invalidFlowFilesList.add(originalFlowFile);
|
||||
}
|
||||
}
|
||||
} catch (Exception e) {
|
||||
// Another error hit...
|
||||
// Removing the original flow from its list
|
||||
originalFlowFilesList.remove(originalFlowFile);
|
||||
logger.error("Could not parse {} as an email, treating as failure", originalFlowFile, e);
|
||||
// Message is invalid or triggered an error during parsing
|
||||
invalidFlowFilesList.add(originalFlowFile);
|
||||
}
|
||||
});
|
||||
|
||||
session.transfer(attachmentsList, REL_ATTACHMENTS);
|
||||
|
||||
// As per above code, originalFlowfile may be routed to invalid or
|
||||
// original depending on RFC2822 compliance.
|
||||
session.transfer(invalidFlowFilesList, REL_FAILURE);
|
||||
session.transfer(originalFlowFilesList, REL_ORIGINAL);
|
||||
|
||||
// check if attachments have been extracted
|
||||
if (!attachmentsList.isEmpty()) {
|
||||
if (attachmentsList.size() > 10) {
|
||||
// If more than 10, summarise log
|
||||
logger.info("Split {} into {} files", originalFlowFile, attachmentsList.size());
|
||||
} else {
|
||||
// Otherwise be more verbose and list each individual split
|
||||
logger.info("Split {} into {} files: {}", originalFlowFile, attachmentsList.size(), attachmentsList);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public Set<Relationship> getRelationships() {
|
||||
return RELATIONSHIPS;
|
||||
}
|
||||
}
|
||||
|
|
@ -12,7 +12,6 @@
|
|||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
org.apache.nifi.processors.email.ExtractTNEFAttachments
|
||||
org.apache.nifi.processors.email.ExtractEmailAttachments
|
||||
org.apache.nifi.processors.email.ExtractEmailHeaders
|
||||
org.apache.nifi.processors.email.ListenSMTP
|
||||
|
|
|
@ -1,102 +0,0 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.apache.nifi.processors.email;
|
||||
|
||||
import org.apache.nifi.util.MockFlowFile;
|
||||
import org.apache.nifi.util.TestRunner;
|
||||
import org.apache.nifi.util.TestRunners;
|
||||
import org.junit.jupiter.api.Test;
|
||||
|
||||
import java.nio.file.Paths;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Arrays;
|
||||
import java.util.List;
|
||||
|
||||
import static org.junit.jupiter.api.Assertions.assertEquals;
|
||||
import static org.junit.jupiter.api.Assertions.assertTrue;
|
||||
|
||||
public class TestExtractTNEFAttachments {
|
||||
|
||||
@Test
|
||||
public void testValidTNEFWithoutAttachment() throws Exception {
|
||||
final TestRunner runner = TestRunners.newTestRunner(new ExtractTNEFAttachments());
|
||||
|
||||
runner.enqueue(Paths.get("src/test/resources/winmail-simple.dat"));
|
||||
runner.run();
|
||||
|
||||
runner.assertTransferCount(ExtractEmailAttachments.REL_ORIGINAL, 1);
|
||||
runner.assertTransferCount(ExtractEmailAttachments.REL_FAILURE, 0);
|
||||
runner.assertTransferCount(ExtractEmailAttachments.REL_ATTACHMENTS, 0);
|
||||
// Have a look at the attachments...
|
||||
final List<MockFlowFile> splits = runner.getFlowFilesForRelationship(ExtractEmailAttachments.REL_ATTACHMENTS);
|
||||
assertEquals(0, splits.size());
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testValidTNEFWithMultipleAttachments() throws Exception {
|
||||
final TestRunner runner = TestRunners.newTestRunner(new ExtractTNEFAttachments());
|
||||
|
||||
runner.enqueue(Paths.get("src/test/resources/winmail-with-attachments.dat"));
|
||||
runner.run();
|
||||
|
||||
runner.assertTransferCount(ExtractEmailAttachments.REL_ORIGINAL, 1);
|
||||
runner.assertTransferCount(ExtractEmailAttachments.REL_FAILURE, 0);
|
||||
runner.assertTransferCount(ExtractEmailAttachments.REL_ATTACHMENTS, 2);
|
||||
// Have a look at the attachments...
|
||||
final List<MockFlowFile> splits = runner.getFlowFilesForRelationship(ExtractTNEFAttachments.REL_ATTACHMENTS);
|
||||
|
||||
List<String> filenames = new ArrayList<>();
|
||||
for (final MockFlowFile flowFile : splits) {
|
||||
filenames.add(flowFile.getAttribute("filename"));
|
||||
}
|
||||
|
||||
assertTrue(filenames.containsAll(Arrays.asList("nifiDrop.svg", "MINIFI~1.PNG")));
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testValidTNEFWithAttachment() throws Exception {
|
||||
final TestRunner runner = TestRunners.newTestRunner(new ExtractTNEFAttachments());
|
||||
|
||||
runner.enqueue(Paths.get("src/test/resources/winmail-with-attachment.dat"));
|
||||
runner.run();
|
||||
|
||||
runner.assertTransferCount(ExtractEmailAttachments.REL_ORIGINAL, 1);
|
||||
runner.assertTransferCount(ExtractEmailAttachments.REL_FAILURE, 0);
|
||||
runner.assertTransferCount(ExtractEmailAttachments.REL_ATTACHMENTS, 1);
|
||||
// Have a look at the attachments...
|
||||
final List<MockFlowFile> splits = runner.getFlowFilesForRelationship(ExtractTNEFAttachments.REL_ATTACHMENTS);
|
||||
|
||||
List<String> filenames = new ArrayList<>();
|
||||
for (final MockFlowFile flowFile : splits) {
|
||||
filenames.add(flowFile.getAttribute("filename"));
|
||||
}
|
||||
|
||||
assertTrue(filenames.contains("nifiDrop.svg"));
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testInvalidTNEF() {
|
||||
final TestRunner runner = TestRunners.newTestRunner(new ExtractTNEFAttachments());
|
||||
runner.enqueue("test test test chocolate".getBytes());
|
||||
runner.run();
|
||||
|
||||
runner.assertTransferCount(ExtractEmailAttachments.REL_ORIGINAL, 0);
|
||||
runner.assertTransferCount(ExtractEmailAttachments.REL_FAILURE, 1);
|
||||
runner.assertTransferCount(ExtractEmailAttachments.REL_ATTACHMENTS, 0);
|
||||
}
|
||||
}
|
Binary file not shown.
Binary file not shown.
Loading…
Reference in New Issue