From 90ed08ec338d014d6ed5ae6ab2b14f230ccd724a Mon Sep 17 00:00:00 2001 From: btwood <4839861+btwood@users.noreply.github.com> Date: Fri, 25 Aug 2017 16:37:50 -0400 Subject: [PATCH] NIFI-4326 Fix NullPointerException and strict addressing This uses parseHeader() instead of getFrom() and getRecipients() in order to avoid strict addressing. It also checks for null to solve a null pointer exception. By contract, this processor should grab information "if available". Which means it should not fail if the info is unavailable. Signed-off-by: Pierre Villard This closes #2111. --- .../email/ExtractEmailAttachments.java | 13 +- .../processors/email/ExtractEmailHeaders.java | 67 +++++++---- .../processors/email/GenerateAttachment.java | 27 +++-- .../email/TestExtractEmailHeaders.java | 111 +++++++++++++++++- 4 files changed, 181 insertions(+), 37 deletions(-) diff --git a/nifi-nar-bundles/nifi-email-bundle/nifi-email-processors/src/main/java/org/apache/nifi/processors/email/ExtractEmailAttachments.java b/nifi-nar-bundles/nifi-email-bundle/nifi-email-processors/src/main/java/org/apache/nifi/processors/email/ExtractEmailAttachments.java index 18c74e960e..e92889cfc6 100644 --- a/nifi-nar-bundles/nifi-email-bundle/nifi-email-processors/src/main/java/org/apache/nifi/processors/email/ExtractEmailAttachments.java +++ b/nifi-nar-bundles/nifi-email-bundle/nifi-email-processors/src/main/java/org/apache/nifi/processors/email/ExtractEmailAttachments.java @@ -121,21 +121,27 @@ public class ExtractEmailAttachments extends AbstractProcessor { final List invalidFlowFilesList = new ArrayList<>(); final List originalFlowFilesList = new ArrayList<>(); + final String requireStrictAddresses = "false"; + session.read(originalFlowFile, new InputStreamCallback() { @Override public void process(final InputStream rawIn) throws IOException { try (final InputStream in = new BufferedInputStream(rawIn)) { Properties props = new Properties(); - Session mailSession = Session.getDefaultInstance(props, null); + props.put("mail.mime.address.strict", requireStrictAddresses); + Session mailSession = Session.getInstance(props); MimeMessage originalMessage = new MimeMessage(mailSession, in); MimeMessageParser parser = new MimeMessageParser(originalMessage).parse(); // RFC-2822 determines that a message must have a "From:" header // if a message lacks the field, it is flagged as invalid Address[] from = originalMessage.getFrom(); + if (from == null) { + throw new MessagingException("Message failed RFC-2822 validation: No Sender"); + } Date sentDate = originalMessage.getSentDate(); - if (from == null || sentDate == null) { + if (sentDate == null) { // Throws MessageException due to lack of minimum required headers - throw new MessagingException("Message failed RFC2822 validation"); + throw new MessagingException("Message failed RFC2822 validation: No Sent Date"); } originalFlowFilesList.add(originalFlowFile); if (parser.hasAttachments()) { @@ -209,4 +215,3 @@ public class ExtractEmailAttachments extends AbstractProcessor { } - diff --git a/nifi-nar-bundles/nifi-email-bundle/nifi-email-processors/src/main/java/org/apache/nifi/processors/email/ExtractEmailHeaders.java b/nifi-nar-bundles/nifi-email-bundle/nifi-email-processors/src/main/java/org/apache/nifi/processors/email/ExtractEmailHeaders.java index 2018349d59..22936fdcd3 100644 --- a/nifi-nar-bundles/nifi-email-bundle/nifi-email-processors/src/main/java/org/apache/nifi/processors/email/ExtractEmailHeaders.java +++ b/nifi-nar-bundles/nifi-email-bundle/nifi-email-processors/src/main/java/org/apache/nifi/processors/email/ExtractEmailHeaders.java @@ -30,6 +30,7 @@ import org.apache.nifi.annotation.behavior.WritesAttributes; import org.apache.nifi.annotation.documentation.CapabilityDescription; import org.apache.nifi.annotation.documentation.Tags; import org.apache.nifi.components.PropertyDescriptor; +import org.apache.nifi.components.AllowableValue; import org.apache.nifi.flowfile.FlowFile; import org.apache.nifi.logging.ComponentLog; import org.apache.nifi.processor.AbstractProcessor; @@ -49,7 +50,6 @@ import javax.mail.Session; import javax.mail.internet.MimeMessage; import java.io.IOException; import java.io.InputStream; -import java.lang.reflect.Array; import java.util.ArrayList; import java.util.Arrays; import java.util.Collections; @@ -67,7 +67,7 @@ import java.util.Set; @SideEffectFree @Tags({"split", "email"}) @InputRequirement(Requirement.INPUT_REQUIRED) -@CapabilityDescription("Using the flowfile content as source of data, extract header from an RFC compliant email file adding the relevant attributes to the flowfile. " + +@CapabilityDescription("Using the flowfile content as source of data, extract header from an RFC compliant email file adding the relevant attributes to the flowfile. " + "This processor does not perform extensive RFC validation but still requires a bare minimum compliance with RFC 2822") @WritesAttributes({ @WritesAttribute(attribute = "email.headers.bcc.*", description = "Each individual BCC recipient (if available)"), @@ -103,6 +103,24 @@ public class ExtractEmailHeaders extends AbstractProcessor { .defaultValue("x-mailer") .build(); + private static final AllowableValue STRICT_ADDRESSING = new AllowableValue("true", "Strict Address Parsing", + "Strict email address format will be enforced. FlowFiles will be transfered to the failure relationship if the email address is invalid."); + private static final AllowableValue NONSTRICT_ADDRESSING = new AllowableValue("false", "Non-Strict Address Parsing", + "Accept emails, even if the address is poorly formed and doesn't strictly comply with RFC Validation."); + public static final PropertyDescriptor STRICT_PARSING = new PropertyDescriptor.Builder() + .name("STRICT_ADDRESS_PARSING") + .displayName("Email Address Parsing") + .description("If \"strict\", strict address format parsing rules are applied to mailbox and mailbox list fields, " + + "such as \"to\" and \"from\" headers, and FlowFiles with poorly formed addresses will be routed " + + "to the failure relationship, similar to messages that fail RFC compliant format validation. " + + "If \"non-strict\", the processor will extract the contents of mailbox list headers as comma-separated " + + "values without attempting to parse each value as well-formed Internet mailbox addresses. " + + "This is optional and defaults to " + STRICT_ADDRESSING.getDisplayName()) + .required(false) + .defaultValue(STRICT_ADDRESSING.getValue()) + .allowableValues(STRICT_ADDRESSING, NONSTRICT_ADDRESSING) + .build(); + public static final Relationship REL_SUCCESS = new Relationship.Builder() .name("success") .description("Extraction was successful") @@ -125,6 +143,7 @@ public class ExtractEmailHeaders extends AbstractProcessor { final List descriptors = new ArrayList<>(); descriptors.add(CAPTURED_HEADERS); + descriptors.add(STRICT_PARSING); this.descriptors = Collections.unmodifiableList(descriptors); } @@ -140,6 +159,7 @@ public class ExtractEmailHeaders extends AbstractProcessor { return; } + final String requireStrictAddresses = context.getProperty(STRICT_PARSING).getValue(); final List capturedHeadersList = Arrays.asList(context.getProperty(CAPTURED_HEADERS).getValue().toLowerCase().split(":")); final Map attributes = new HashMap<>(); @@ -148,16 +168,20 @@ public class ExtractEmailHeaders extends AbstractProcessor { public void process(final InputStream rawIn) throws IOException { try (final InputStream in = new BufferedInputStream(rawIn)) { Properties props = new Properties(); - Session mailSession = Session.getDefaultInstance(props, null); + props.put("mail.mime.address.strict", requireStrictAddresses); + Session mailSession = Session.getInstance(props); MimeMessage originalMessage = new MimeMessage(mailSession, in); MimeMessageParser parser = new MimeMessageParser(originalMessage).parse(); // RFC-2822 determines that a message must have a "From:" header // if a message lacks the field, it is flagged as invalid Address[] from = originalMessage.getFrom(); + if (from == null) { + throw new MessagingException("Message failed RFC-2822 validation: No Sender"); + } Date sentDate = originalMessage.getSentDate(); - if (from == null || sentDate == null ) { + if (sentDate == null ) { // Throws MessageException due to lack of minimum required headers - throw new MessagingException("Message failed RFC2822 validation"); + throw new MessagingException("Message failed RFC-2822 validation: No Sent Date"); } else if (capturedHeadersList.size() > 0){ Enumeration headers = originalMessage.getAllHeaders(); while (headers.hasMoreElements()) { @@ -168,21 +192,12 @@ public class ExtractEmailHeaders extends AbstractProcessor { } } } - if (Array.getLength(originalMessage.getAllRecipients()) > 0) { - for (int toCount = 0; toCount < ArrayUtils.getLength(originalMessage.getRecipients(Message.RecipientType.TO)); toCount++) { - attributes.put(EMAIL_HEADER_TO + "." + toCount, originalMessage.getRecipients(Message.RecipientType.TO)[toCount].toString()); - } - for (int toCount = 0; toCount < ArrayUtils.getLength(originalMessage.getRecipients(Message.RecipientType.BCC)); toCount++) { - attributes.put(EMAIL_HEADER_BCC + "." + toCount, originalMessage.getRecipients(Message.RecipientType.BCC)[toCount].toString()); - } - for (int toCount = 0; toCount < ArrayUtils.getLength(originalMessage.getRecipients(Message.RecipientType.CC)); toCount++) { - attributes.put(EMAIL_HEADER_CC + "." + toCount, originalMessage.getRecipients(Message.RecipientType.CC)[toCount].toString()); - } - } - // Incredibly enough RFC-2822 specified From as a "mailbox-list" so an array I returned by getFrom - for (int toCount = 0; toCount < ArrayUtils.getLength(originalMessage.getFrom()); toCount++) { - attributes.put(EMAIL_HEADER_FROM + "." + toCount, originalMessage.getFrom()[toCount].toString()); - } + + putAddressListInAttributes(attributes, EMAIL_HEADER_TO, originalMessage.getRecipients(Message.RecipientType.TO)); + putAddressListInAttributes(attributes, EMAIL_HEADER_CC, originalMessage.getRecipients(Message.RecipientType.CC)); + putAddressListInAttributes(attributes, EMAIL_HEADER_BCC, originalMessage.getRecipients(Message.RecipientType.BCC)); + putAddressListInAttributes(attributes, EMAIL_HEADER_FROM, originalMessage.getFrom()); // RFC-2822 specifies "From" as mailbox-list + if (StringUtils.isNotEmpty(originalMessage.getMessageID())) { attributes.put(EMAIL_HEADER_MESSAGE_ID, originalMessage.getMessageID()); } @@ -231,5 +246,15 @@ public class ExtractEmailHeaders extends AbstractProcessor { public final List getSupportedPropertyDescriptors() { return descriptors; } -} + private static void putAddressListInAttributes( + Map attributes, + final String attributePrefix, + Address[] addresses) { + if (addresses != null) { + for (int count = 0; count < ArrayUtils.getLength(addresses); count++) { + attributes.put(attributePrefix + "." + count, addresses[count].toString()); + } + } + } +} diff --git a/nifi-nar-bundles/nifi-email-bundle/nifi-email-processors/src/test/java/org/apache/nifi/processors/email/GenerateAttachment.java b/nifi-nar-bundles/nifi-email-bundle/nifi-email-processors/src/test/java/org/apache/nifi/processors/email/GenerateAttachment.java index ef100b2cc3..621597fa96 100644 --- a/nifi-nar-bundles/nifi-email-bundle/nifi-email-processors/src/test/java/org/apache/nifi/processors/email/GenerateAttachment.java +++ b/nifi-nar-bundles/nifi-email-bundle/nifi-email-processors/src/test/java/org/apache/nifi/processors/email/GenerateAttachment.java @@ -44,6 +44,20 @@ public class GenerateAttachment { } public byte[] SimpleEmail() { + MimeMessage mimeMessage = SimpleEmailMimeMessage(); + ByteArrayOutputStream output = new ByteArrayOutputStream(); + try { + mimeMessage.writeTo(output); + } catch (IOException e) { + e.printStackTrace(); + } catch (MessagingException e) { + e.printStackTrace(); + } + + return output.toByteArray(); + } + + public MimeMessage SimpleEmailMimeMessage() { Email email = new SimpleEmail(); try { email.setFrom(from); @@ -56,19 +70,10 @@ public class GenerateAttachment { e.printStackTrace(); } - ByteArrayOutputStream output = new ByteArrayOutputStream(); - MimeMessage mimeMessage = email.getMimeMessage(); - try { - mimeMessage.writeTo(output); - } catch (IOException e) { - e.printStackTrace(); - } catch (MessagingException e) { - e.printStackTrace(); - } - - return output.toByteArray(); + return email.getMimeMessage(); } + public byte[] WithAttachments(int amount) { MultiPartEmail email = new MultiPartEmail(); try { diff --git a/nifi-nar-bundles/nifi-email-bundle/nifi-email-processors/src/test/java/org/apache/nifi/processors/email/TestExtractEmailHeaders.java b/nifi-nar-bundles/nifi-email-bundle/nifi-email-processors/src/test/java/org/apache/nifi/processors/email/TestExtractEmailHeaders.java index aed2292c25..4cb0009feb 100644 --- a/nifi-nar-bundles/nifi-email-bundle/nifi-email-processors/src/test/java/org/apache/nifi/processors/email/TestExtractEmailHeaders.java +++ b/nifi-nar-bundles/nifi-email-bundle/nifi-email-processors/src/test/java/org/apache/nifi/processors/email/TestExtractEmailHeaders.java @@ -17,11 +17,15 @@ package org.apache.nifi.processors.email; +import org.apache.nifi.stream.io.ByteArrayOutputStream; import org.apache.nifi.util.MockFlowFile; import org.apache.nifi.util.TestRunner; import org.apache.nifi.util.TestRunners; import org.junit.Test; +import javax.mail.MessagingException; +import javax.mail.internet.MimeMessage; +import java.io.IOException; import java.util.List; public class TestExtractEmailHeaders { @@ -79,6 +83,111 @@ public class TestExtractEmailHeaders { splits.get(0).assertAttributeExists("email.headers.mime-version"); } + /** + * Test case added for NIFI-4326 for a potential NPE bug + * if the email message contains no recipient header fields, ie, + * TO, CC, BCC. + */ + @Test + public void testValidEmailWithNoRecipients() throws Exception { + final TestRunner runner = TestRunners.newTestRunner(new ExtractEmailHeaders()); + runner.setProperty(ExtractEmailHeaders.CAPTURED_HEADERS, "MIME-Version"); + + MimeMessage simpleEmailMimeMessage = attachmentGenerator.SimpleEmailMimeMessage(); + + simpleEmailMimeMessage.removeHeader("To"); + simpleEmailMimeMessage.removeHeader("Cc"); + simpleEmailMimeMessage.removeHeader("Bcc"); + + ByteArrayOutputStream messageBytes = new ByteArrayOutputStream(); + try { + simpleEmailMimeMessage.writeTo(messageBytes); + } catch (IOException | MessagingException e) { + e.printStackTrace(); + } + + runner.enqueue(messageBytes.toByteArray()); + runner.run(); + + runner.assertTransferCount(ExtractEmailHeaders.REL_SUCCESS, 1); + runner.assertTransferCount(ExtractEmailHeaders.REL_FAILURE, 0); + + runner.assertQueueEmpty(); + final List splits = runner.getFlowFilesForRelationship(ExtractEmailHeaders.REL_SUCCESS); + splits.get(0).assertAttributeEquals("email.headers.from.0", from); + splits.get(0).assertAttributeExists("email.headers.mime-version"); + splits.get(0).assertAttributeNotExists("email.headers.to"); + splits.get(0).assertAttributeNotExists("email.headers.cc"); + splits.get(0).assertAttributeNotExists("email.headers.bcc"); + } + + /** + * NIFI-4326 adds a new feature to disable strict address parsing for + * mailbox list header fields. This is a test case that asserts that + * lax address parsing passes (when set to "strict=false") for malformed + * addresses. + */ + @Test + public void testNonStrictParsingPassesForInvalidAddresses() throws Exception { + final TestRunner runner = TestRunners.newTestRunner(new ExtractEmailHeaders()); + runner.setProperty(ExtractEmailHeaders.STRICT_PARSING, "false"); + + MimeMessage simpleEmailMimeMessage = attachmentGenerator.SimpleEmailMimeMessage(); + + simpleEmailMimeMessage.setHeader("From", ""); + simpleEmailMimeMessage.setHeader("To", "<>, Joe, \"\" <>"); + + ByteArrayOutputStream messageBytes = new ByteArrayOutputStream(); + try { + simpleEmailMimeMessage.writeTo(messageBytes); + } catch (IOException | MessagingException e) { + e.printStackTrace(); + } + + runner.enqueue(messageBytes.toByteArray()); + runner.run(); + + runner.assertTransferCount(ExtractEmailHeaders.REL_SUCCESS, 1); + runner.assertTransferCount(ExtractEmailHeaders.REL_FAILURE, 0); + + + runner.assertQueueEmpty(); + final List splits = runner.getFlowFilesForRelationship(ExtractEmailHeaders.REL_SUCCESS); + splits.get(0).assertAttributeEquals("email.headers.from.0", "bad_email"); + splits.get(0).assertAttributeEquals("email.headers.to.0", ""); + splits.get(0).assertAttributeEquals("email.headers.to.1", "Joe"); + splits.get(0).assertAttributeEquals("email.headers.to.2", ""); + } + + /** + * NIFI-4326 adds a new feature to disable strict address parsing for + * mailbox list header fields. This is a test case that asserts that + * strict address parsing fails (when set to "strict=true") for malformed + * addresses. + */ + @Test + public void testStrictParsingFailsForInvalidAddresses() throws Exception { + final TestRunner runner = TestRunners.newTestRunner(new ExtractEmailHeaders()); + runner.setProperty(ExtractEmailHeaders.STRICT_PARSING, "true"); + + MimeMessage simpleEmailMimeMessage = attachmentGenerator.SimpleEmailMimeMessage(); + + simpleEmailMimeMessage.setHeader("From", ""); + simpleEmailMimeMessage.setHeader("To", "<>, Joe, "); + + ByteArrayOutputStream messageBytes = new ByteArrayOutputStream(); + try { + simpleEmailMimeMessage.writeTo(messageBytes); + } catch (IOException | MessagingException e) { + e.printStackTrace(); + } + + runner.enqueue(messageBytes.toByteArray()); + runner.run(); + + runner.assertTransferCount(ExtractEmailHeaders.REL_SUCCESS, 0); + runner.assertTransferCount(ExtractEmailHeaders.REL_FAILURE, 1); + } @Test public void testInvalidEmail() throws Exception { @@ -90,4 +199,4 @@ public class TestExtractEmailHeaders { runner.assertTransferCount(ExtractEmailHeaders.REL_FAILURE, 1); } -} \ No newline at end of file +}